diff --git a/osaca/data/a64fx.yml b/osaca/data/a64fx.yml index a53a8f3..a39e374 100644 --- a/osaca/data/a64fx.yml +++ b/osaca/data/a64fx.yml @@ -1132,6 +1132,27 @@ instruction_forms: throughput: 2.0 latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses +- name: ld2d + operands: + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: p + predication: '*' + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 2.0 + latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D + port_pressure: [[2, '56'], [4, ['5D', '6D']]] - name: ldp operands: - class: register @@ -1980,6 +2001,27 @@ instruction_forms: throughput: 1.0 latency: 0 # 1*p5+1*p6+1*p0 port_pressure: [[1, '5'], [1, '6'], [1, '0']] +- name: st2d + operands: + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: p + predication: '*' + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 1*p5+1*p6+1*p0 + port_pressure: [[1, '5'], [1, '6'], [1, '0']] - name: sub operands: - class: register diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index 2b216fd..fd8a681 100755 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - - +from copy import deepcopy import pyparsing as pp from osaca.parser import AttrDict, BaseParser @@ -240,7 +239,7 @@ class ParserAArch64(BaseParser): # 1. Parse comment try: - result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict()) + result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())[0] result = AttrDict.convert_dict(result) instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID]) except pp.ParseException: @@ -249,7 +248,7 @@ class ParserAArch64(BaseParser): try: result = self.process_operand( self.llvm_markers.parseString(line, parseAll=True).asDict() - ) + )[0] result = AttrDict.convert_dict(result) instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID]) except pp.ParseException: @@ -257,7 +256,7 @@ class ParserAArch64(BaseParser): # 2. Parse label if result is None: try: - result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) + result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())[0] result = AttrDict.convert_dict(result) instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name if self.COMMENT_ID in result[self.LABEL_ID]: @@ -272,7 +271,7 @@ class ParserAArch64(BaseParser): try: result = self.process_operand( self.directive.parseString(line, parseAll=True).asDict() - ) + )[0] result = AttrDict.convert_dict(result) instruction_form[self.DIRECTIVE_ID] = AttrDict( { @@ -292,7 +291,6 @@ class ParserAArch64(BaseParser): try: result = self.parse_instruction(line) except (pp.ParseException, KeyError) as e: - raise e raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID] instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID] @@ -313,19 +311,19 @@ class ParserAArch64(BaseParser): # Add operands to list # Check first operand if "operand1" in result: - operands.append(self.process_operand(result["operand1"])) + operands += self.process_operand(result["operand1"]) # Check second operand if "operand2" in result: - operands.append(self.process_operand(result["operand2"])) + operands += self.process_operand(result["operand2"]) # Check third operand if "operand3" in result: - operands.append(self.process_operand(result["operand3"])) + operands += self.process_operand(result["operand3"]) # Check fourth operand if "operand4" in result: - operands.append(self.process_operand(result["operand4"])) + operands += self.process_operand(result["operand4"]) # Check fifth operand if "operand5" in result: - operands.append(self.process_operand(result["operand5"])) + operands += self.process_operand(result["operand5"]) return_dict = AttrDict( { @@ -342,23 +340,23 @@ class ParserAArch64(BaseParser): """Post-process operand""" # structure memory addresses if self.MEMORY_ID in operand: - return self.process_memory_address(operand[self.MEMORY_ID]) + return [self.process_memory_address(operand[self.MEMORY_ID])] # structure register lists if self.REGISTER_ID in operand and ( "list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID] ): - # TODO: discuss if ranges should be converted to lists - return self.process_register_list(operand[self.REGISTER_ID]) + # resolve ranges and lists + return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID])) if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"] == "sp": - return self.process_sp_register(operand[self.REGISTER_ID]) + return [self.process_sp_register(operand[self.REGISTER_ID])] # add value attribute to floating point immediates without exponent if self.IMMEDIATE_ID in operand: - return self.process_immediate(operand[self.IMMEDIATE_ID]) + return [self.process_immediate(operand[self.IMMEDIATE_ID])] if self.LABEL_ID in operand: - return self.process_label(operand[self.LABEL_ID]) + return [self.process_label(operand[self.LABEL_ID])] if self.IDENTIFIER_ID in operand: - return self.process_identifier(operand[self.IDENTIFIER_ID]) - return operand + return [self.process_identifier(operand[self.IDENTIFIER_ID])] + return [operand] def process_memory_address(self, memory_address): """Post-process memory address operand""" @@ -391,6 +389,36 @@ class ParserAArch64(BaseParser): reg["prefix"] = "x" return AttrDict({self.REGISTER_ID: reg}) + def resolve_range_list(self, operand): + """ + Resolve range or list register operand to list of registers. + + Returns None if neither list nor range + """ + if 'register' in operand: + if 'list' in operand.register: + index = operand.register.get('index') + l = [] + for reg in operand.register.list: + reg = deepcopy(reg) + if index is not None: + reg.index = index + l.append(AttrDict({self.REGISTER_ID: reg})) + return l + elif 'range' in operand.register: + base_register = operand.register.range[0] + index = operand.register.get('index') + l = [] + start_name = base_register.name + end_name = operand.register.range[1].name + for name in range(int(start_name), int(end_name)+1): + reg = deepcopy(base_register) + if index is not None: + reg['index'] = operand.register.range.index + reg['name'] = str(name) + l.append(AttrDict({self.REGISTER_ID: reg})) + return l + def process_register_list(self, register_list): """Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})""" # Remove unnecessarily created dictionary entries during parsing diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index eca5869..b450c17 100755 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 from itertools import chain +from copy import deepcopy from osaca import utils from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT @@ -122,6 +123,7 @@ class ISASemantics(object): "pre_indexed": pre_indexed, "post_indexed": post_indexed}) ) + # store operand list in dict and reassign operand key/value pair instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict) # assign LD/ST flags @@ -130,6 +132,7 @@ class ISASemantics(object): instruction_form["flags"] += [INSTR_FLAGS.HAS_LD] if self._has_store(instruction_form): instruction_form["flags"] += [INSTR_FLAGS.HAS_ST] + def get_reg_changes(self, instruction_form, only_postindexed=False): """ diff --git a/tests/test_semantics.py b/tests/test_semantics.py index b322d16..d3f5ef1 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -34,6 +34,8 @@ class TestSemanticTools(unittest.TestCase): cls.code_aarch64_memdep = f.read() with open(cls._find_file("kernel_aarch64.s")) as f: cls.code_AArch64 = f.read() + with open(cls._find_file("kernel_aarch64_sve.s")) as f: + cls.code_AArch64_SVE = f.read() cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86") cls.kernel_x86_memdep = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_memdep), "x86") @@ -41,6 +43,8 @@ class TestSemanticTools(unittest.TestCase): cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64") cls.kernel_aarch64_memdep = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64") + cls.kernel_aarch64_SVE = reduce_to_section( + cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64") # set up machine models cls.machine_model_csx = MachineModel( @@ -49,6 +53,9 @@ class TestSemanticTools(unittest.TestCase): cls.machine_model_tx2 = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml") ) + cls.machine_model_a64fx = MachineModel( + path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml") + ) cls.semantics_x86 = ISASemantics("x86") cls.semantics_csx = ArchSemantics( cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml") @@ -58,6 +65,10 @@ class TestSemanticTools(unittest.TestCase): cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) + cls.semantics_a64fx = ArchSemantics( + cls.machine_model_a64fx, + path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), + ) cls.machine_model_zen = MachineModel(arch="zen1") for i in range(len(cls.kernel_x86)): @@ -72,6 +83,9 @@ class TestSemanticTools(unittest.TestCase): for i in range(len(cls.kernel_aarch64_memdep)): cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i]) + for i in range(len(cls.kernel_aarch64_SVE)): + cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i]) + cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i]) ########### # Tests @@ -320,6 +334,11 @@ class TestSemanticTools(unittest.TestCase): dg.get_dependent_instruction_forms() # test dot creation dg.export_graph(filepath="/dev/null") + + def test_kernelDG_SVE(self): + dg = KernelDG(self.kernel_aarch64_SVE, self.parser_AArch64, self.machine_model_a64fx, + self.semantics_a64fx) + # TODO check for correct analysis def test_hidden_load(self): machine_model_hld = MachineModel( @@ -421,6 +440,7 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1)) self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm)) + def test_is_read_is_written_AArch64(self): # independent form HW model diff --git a/validation/Analysis.ipynb b/validation/Analysis.ipynb index 4a21cd7..57fd7f0 100644 --- a/validation/Analysis.ipynb +++ b/validation/Analysis.ipynb @@ -232,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -284,8 +284,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 27, + "metadata": { + "scrolled": false + }, "outputs": [ { "name": "stdout", @@ -296,8 +298,672 @@ "ZEN has 156 tests, compiled to 126 unique assembly representations.\n", "ZEN2 has 156 tests, compiled to 126 unique assembly representations.\n", "TX2 has 104 tests, compiled to 78 unique assembly representations.\n", - "A64FX has 104 tests, compiled to 81 unique assembly representations.\n" + "A64FX has 104 tests, compiled to 81 unique assembly representations.\n", + "High-level iterations in assembly block: 16\n", + "Measured: 1.1903856655856655\n", + "IACA Predicted: 1.96875 TP: 1.875 LCD: None CP: None\n", + "Ithemal Predicted: nan TP: None LCD: None CP: None\n", + "LLVM-MCA Predicted: 2.240625 TP: 1.948125 LCD: 2.240625 CP: 3.8125\n", + "OSACA Predicted: 1.875 TP: 1.875 LCD: 0.5 CP: 2.75\n" ] + }, + { + "data": { + "text/html": [ + "
Open Source Architecture Code Analyzer (OSACA) - 0.3.14\n",
+ "Analyzed file: build/SKX/icc/O3/pi.marked.s\n",
+ "Architecture: SKX\n",
+ "Timestamp: 2021-04-15 12:15:40\n",
+ "\n",
+ "\n",
+ " P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction\n",
+ " * - Instruction micro-ops not bound to a port\n",
+ " X - No throughput/latency information for this instruction in data file\n",
+ "\n",
+ "\n",
+ "Combined Analysis Report\n",
+ "------------------------\n",
+ " Port pressure in cycles \n",
+ " | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |\n",
+ "-------------------------------------------------------------------------------------------------\n",
+ " 62 | | | | | | | | || | | # pointer_increment=128 fa3c665ee18e1e5f704c8a6026891c36\n",
+ " 63 | | | | | | | | || | | ..B1.4: # Preds ..B1.4 ..B1.3\n",
+ " 64 | | | | | | | | || | | # Execution count [5.00e+00]\n",
+ " 65 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $32, %ecx #16.5\n",
+ " 66 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm5, %ymm9, %ymm14 #17.9\n",
+ " 67 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm9, %zmm8 #17.14\n",
+ " 68 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm8, %zmm1, %zmm10 #17.18\n",
+ " 69 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm2, %zmm11 #17.25\n",
+ " 70 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm11, %zmm11 #18.38\n",
+ " 71 | | | | | | | | || | | * vmovaps %zmm0, %zmm29 #18.38\n",
+ " 72 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm11, %zmm13 #18.38\n",
+ " 73 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11 #18.38\n",
+ " 74 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm13, %k0 #18.38\n",
+ " 75 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm11, %zmm11, %zmm12 #18.38\n",
+ " 76 | 1.00 | | | | | | | || | | knotw %k0, %k1 #18.38\n",
+ " 77 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm11, %zmm13{%k1} #18.38\n",
+ " 78 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm12, %zmm13{%k1} #18.38\n",
+ " 79 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38\n",
+ " 80 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm14, %ymm20 #17.9\n",
+ " 81 | 0.50 | | | | | 1.50 | | || 7.0 | | vcvtdq2pd %ymm14, %zmm15 #17.14\n",
+ " 82 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddpd %zmm15, %zmm1, %zmm16 #17.18\n",
+ " 83 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm16, %zmm2, %zmm17 #17.25\n",
+ " 84 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm0, %zmm17, %zmm17 #18.38\n",
+ " 85 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm17, %zmm19 #18.38\n",
+ " 86 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17 #18.38\n",
+ " 87 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm19, %k2 #18.38\n",
+ " 88 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm17, %zmm18 #18.38\n",
+ " 89 | 1.00 | | | | | | | || | | knotw %k2, %k3 #18.38\n",
+ " 90 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm19, %zmm17, %zmm19{%k3} #18.38\n",
+ " 91 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm19, %zmm18, %zmm19{%k3} #18.38\n",
+ " 92 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38\n",
+ " 93 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm20, %ymm26 #17.9\n",
+ " 94 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm20, %zmm21 #17.14\n",
+ " 95 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm21, %zmm1, %zmm22 #17.18\n",
+ " 96 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm22, %zmm2, %zmm23 #17.25\n",
+ " 97 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm23, %zmm23 #18.38\n",
+ " 98 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm23, %zmm25 #18.38\n",
+ " 99 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23 #18.38\n",
+ " 100 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm25, %k4 #18.38\n",
+ " 101 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm23, %zmm23, %zmm24 #18.38\n",
+ " 102 | 1.00 | | | | | | | || | | knotw %k4, %k5 #18.38\n",
+ " 103 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm23, %zmm25{%k5} #18.38\n",
+ " 104 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm24, %zmm25{%k5} #18.38\n",
+ " 105 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm25, %zmm6 #18.38\n",
+ " 106 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm26, %zmm27 #17.14\n",
+ " 107 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm26, %ymm9 #17.9\n",
+ " 108 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm27, %zmm1, %zmm28 #17.18\n",
+ " 109 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm28, %zmm2, %zmm8 #17.25\n",
+ " 110 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm8, %zmm8, %zmm29 #18.38\n",
+ " 111 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm29, %zmm31 #18.38\n",
+ " 112 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29 #18.38\n",
+ " 113 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm31, %k6 #18.38\n",
+ " 114 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm29, %zmm29, %zmm30 #18.38\n",
+ " 115 | 1.00 | | | | | | | || | | knotw %k6, %k7 #18.38\n",
+ " 116 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm29, %zmm31{%k7} #18.38\n",
+ " 117 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm30, %zmm31{%k7} #18.38\n",
+ " 118 | 0.00 | | | | | 1.00 | | || 0.0 | 4.0 | vfmadd231pd %zmm4, %zmm31, %zmm3 #18.38\n",
+ " 119 | 0.00 | 0.34 | | | | 0.00 | 0.66 | || | | cmpl %edx, %ecx #16.5\n",
+ " 120 | 0.00 | | | | | | 1.00 | || | | jb ..B1.4 # Prob 82% #16.5\n",
+ "\n",
+ " 30.0 4.34 2.00 2.00 2.00 2.00 30.0 2.66 44 8.0 \n",
+ "\n",
+ "\n",
+ "Loop-Carried Dependencies Analysis Report\n",
+ "-----------------------------------------\n",
+ " 92 | 8.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38| [92, 118]\n",
+ " 79 | 8.0 | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38| [79, 105]\n",
+ " 66 | 4.0 | vpaddd %ymm5, %ymm9, %ymm14 #17.9| [66, 80, 93, 107]\n",
+ " 65 | 1.0 | addl $32, %ecx #16.5| [65]\n",
+ ""
+ ],
+ "text/plain": [
+ "Iterations: 100\n",
+ "Instructions: 5600\n",
+ "Total Cycles: 3585\n",
+ "Total uOps: 7200\n",
+ "\n",
+ "Dispatch Width: 6\n",
+ "uOps Per Cycle: 2.01\n",
+ "IPC: 1.56\n",
+ "Block RThroughput: 18.0\n",
+ "\n",
+ "\n",
+ "Instruction Info:\n",
+ "[1]: #uOps\n",
+ "[2]: Latency\n",
+ "[3]: RThroughput\n",
+ "[4]: MayLoad\n",
+ "[5]: MayStore\n",
+ "[6]: HasSideEffects (U)\n",
+ "\n",
+ "[1] [2] [3] [4] [5] [6] Instructions:\n",
+ " 1 1 0.25 addl\t$32, %ecx\n",
+ " 1 1 0.33 vpaddd\t%ymm5, %ymm9, %ymm14\n",
+ " 2 7 1.00 vcvtdq2pd\t%ymm9, %zmm8\n",
+ " 1 4 0.50 vaddpd\t%zmm8, %zmm1, %zmm10\n",
+ " 1 4 0.50 vmulpd\t%zmm10, %zmm2, %zmm11\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+ " 1 1 0.33 vmovaps\t%zmm0, %zmm29\n",
+ " 3 4 2.00 vrcp14pd\t%zmm11, %zmm13\n",
+ " 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+ " 1 4 1.00 vfpclasspd\t$30, %zmm13, %k0\n",
+ " 1 4 0.50 vmulpd\t%zmm11, %zmm11, %zmm12\n",
+ " 1 1 1.00 knotw\t%k0, %k1\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+ " 1 4 0.50 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+ " 1 1 0.33 vpaddd\t%ymm5, %ymm14, %ymm20\n",
+ " 2 7 1.00 vcvtdq2pd\t%ymm14, %zmm15\n",
+ " 1 4 0.50 vaddpd\t%zmm15, %zmm1, %zmm16\n",
+ " 1 4 0.50 vmulpd\t%zmm16, %zmm2, %zmm17\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+ " 3 4 2.00 vrcp14pd\t%zmm17, %zmm19\n",
+ " 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+ " 1 4 1.00 vfpclasspd\t$30, %zmm19, %k2\n",
+ " 1 4 0.50 vmulpd\t%zmm17, %zmm17, %zmm18\n",
+ " 1 1 1.00 knotw\t%k2, %k3\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+ " 1 4 0.50 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+ " 1 1 0.33 vpaddd\t%ymm5, %ymm20, %ymm26\n",
+ " 2 7 1.00 vcvtdq2pd\t%ymm20, %zmm21\n",
+ " 1 4 0.50 vaddpd\t%zmm21, %zmm1, %zmm22\n",
+ " 1 4 0.50 vmulpd\t%zmm22, %zmm2, %zmm23\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+ " 3 4 2.00 vrcp14pd\t%zmm23, %zmm25\n",
+ " 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+ " 1 4 1.00 vfpclasspd\t$30, %zmm25, %k4\n",
+ " 1 4 0.50 vmulpd\t%zmm23, %zmm23, %zmm24\n",
+ " 1 1 1.00 knotw\t%k4, %k5\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+ " 1 4 0.50 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+ " 2 7 1.00 vcvtdq2pd\t%ymm26, %zmm27\n",
+ " 1 1 0.33 vpaddd\t%ymm5, %ymm26, %ymm9\n",
+ " 1 4 0.50 vaddpd\t%zmm27, %zmm1, %zmm28\n",
+ " 1 4 0.50 vmulpd\t%zmm28, %zmm2, %zmm8\n",
+ " 1 4 0.50 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+ " 3 4 2.00 vrcp14pd\t%zmm29, %zmm31\n",
+ " 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+ " 1 4 1.00 vfpclasspd\t$30, %zmm31, %k6\n",
+ " 1 4 0.50 vmulpd\t%zmm29, %zmm29, %zmm30\n",
+ " 1 1 1.00 knotw\t%k6, %k7\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+ " 1 4 0.50 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+ " 1 4 0.50 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+ " 1 1 0.25 cmpl\t%edx, %ecx\n",
+ " 1 1 0.50 jb\t..B1.4\n",
+ "\n",
+ "\n",
+ "Resources:\n",
+ "[0] - SKXDivider\n",
+ "[1] - SKXFPDivider\n",
+ "[2] - SKXPort0\n",
+ "[3] - SKXPort1\n",
+ "[4] - SKXPort2\n",
+ "[5] - SKXPort3\n",
+ "[6] - SKXPort4\n",
+ "[7] - SKXPort5\n",
+ "[8] - SKXPort6\n",
+ "[9] - SKXPort7\n",
+ "\n",
+ "\n",
+ "Resource pressure per iteration:\n",
+ "[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] \n",
+ " - - 31.17 5.72 2.00 2.00 - 29.10 2.01 - \n",
+ "\n",
+ "Resource pressure by instruction:\n",
+ "[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:\n",
+ " - - - 0.80 - - - 0.19 0.01 - addl\t$32, %ecx\n",
+ " - - 0.07 0.92 - - - 0.01 - - vpaddd\t%ymm5, %ymm9, %ymm14\n",
+ " - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm9, %zmm8\n",
+ " - - 0.42 - - - - 0.58 - - vaddpd\t%zmm8, %zmm1, %zmm10\n",
+ " - - 0.51 - - - - 0.49 - - vmulpd\t%zmm10, %zmm2, %zmm11\n",
+ " - - 0.45 - - - - 0.55 - - vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+ " - - - 1.00 - - - - - - vmovaps\t%zmm0, %zmm29\n",
+ " - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm11, %zmm13\n",
+ " - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+ " - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm13, %k0\n",
+ " - - 0.49 - - - - 0.51 - - vmulpd\t%zmm11, %zmm11, %zmm12\n",
+ " - - 1.00 - - - - - - - knotw\t%k0, %k1\n",
+ " - - 0.44 - - - - 0.56 - - vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+ " - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+ " - - 0.70 - - - - 0.30 - - vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+ " - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm14, %ymm20\n",
+ " - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm14, %zmm15\n",
+ " - - 0.48 - - - - 0.52 - - vaddpd\t%zmm15, %zmm1, %zmm16\n",
+ " - - 0.42 - - - - 0.58 - - vmulpd\t%zmm16, %zmm2, %zmm17\n",
+ " - - 0.32 - - - - 0.68 - - vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+ " - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm17, %zmm19\n",
+ " - - 0.32 - 1.00 - - 0.68 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+ " - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm19, %k2\n",
+ " - - 0.47 - - - - 0.53 - - vmulpd\t%zmm17, %zmm17, %zmm18\n",
+ " - - 1.00 - - - - - - - knotw\t%k2, %k3\n",
+ " - - 0.53 - - - - 0.47 - - vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+ " - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+ " - - 0.57 - - - - 0.43 - - vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+ " - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm20, %ymm26\n",
+ " - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm20, %zmm21\n",
+ " - - 0.52 - - - - 0.48 - - vaddpd\t%zmm21, %zmm1, %zmm22\n",
+ " - - 0.47 - - - - 0.53 - - vmulpd\t%zmm22, %zmm2, %zmm23\n",
+ " - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+ " - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm23, %zmm25\n",
+ " - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+ " - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm25, %k4\n",
+ " - - 0.53 - - - - 0.47 - - vmulpd\t%zmm23, %zmm23, %zmm24\n",
+ " - - 1.00 - - - - - - - knotw\t%k4, %k5\n",
+ " - - 0.42 - - - - 0.58 - - vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+ " - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+ " - - 0.60 - - - - 0.40 - - vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+ " - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm26, %zmm27\n",
+ " - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm26, %ymm9\n",
+ " - - 0.26 - - - - 0.74 - - vaddpd\t%zmm27, %zmm1, %zmm28\n",
+ " - - 0.47 - - - - 0.53 - - vmulpd\t%zmm28, %zmm2, %zmm8\n",
+ " - - 0.34 - - - - 0.66 - - vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+ " - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm29, %zmm31\n",
+ " - - 0.34 - 1.00 - - 0.66 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+ " - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm31, %k6\n",
+ " - - 0.52 - - - - 0.48 - - vmulpd\t%zmm29, %zmm29, %zmm30\n",
+ " - - 1.00 - - - - - - - knotw\t%k6, %k7\n",
+ " - - 0.47 - - - - 0.53 - - vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+ " - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+ " - - 0.66 - - - - 0.34 - - vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+ " - - - - - - - - 1.00 - cmpl\t%edx, %ecx\n",
+ " - - - - - - - - 1.00 - jb\t..B1.4\n",
+ "\n",
+ "\n",
+ "Timeline view:\n",
+ " 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 \n",
+ "Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678\n",
+ "\n",
+ "[0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
+ "[0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
+ "[0,2] D=eeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
+ "[0,3] D========eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
+ "[0,4] D============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
+ "[0,5] .D===============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+ "[0,6] .DeE------------------R . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
+ "[0,7] .D===================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
+ "[0,8] . D======================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+ "[0,9] . D======================eeeeE-------R . . . . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
+ "[0,10] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
+ "[0,11] . D==========================eE----------R . . . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
+ "[0,12] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+ "[0,13] . D====================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+ "[0,14] . D========================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+ "[0,15] . DeE-------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
+ "[0,16] . DeeeeeeeE-------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
+ "[0,17] . D=======eeeeE---------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
+ "[0,18] . D==========eeeeE-----------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
+ "[0,19] . D==============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+ "[0,20] . D==================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
+ "[0,21] . D=====================eeeeeeeeeeeE----------R. . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+ "[0,22] . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
+ "[0,23] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
+ "[0,24] . D==========================eE---------------R. . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
+ "[0,25] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+ "[0,26] . .D===================================eeeeE--R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+ "[0,27] . .D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+ "[0,28] . .DeE------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
+ "[0,29] . .DeeeeeeeE------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
+ "[0,30] . .D=======eeeeE--------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
+ "[0,31] . . D==========eeeeE----------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
+ "[0,32] . . D==============eeeeE------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+ "[0,33] . . D==================eeeeE--------------------R . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
+ "[0,34] . . D=====================eeeeeeeeeeeE---------R . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+ "[0,35] . . D=====================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
+ "[0,36] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
+ "[0,37] . . D==========================eE--------------R . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
+ "[0,38] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+ "[0,39] . . D===================================eeeeE-R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+ "[0,40] . . D=======================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+ "[0,41] . . DeeeeeeeE------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
+ "[0,42] . . DeE------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
+ "[0,43] . . D=======eeeeE--------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
+ "[0,44] . . D=============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
+ "[0,45] . . D=================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+ "[0,46] . . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
+ "[0,47] . . .D=========================eeeeeeeeeeeE-----R. . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+ "[0,48] . . .D=========================eeeeE------------R. . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
+ "[0,49] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
+ "[0,50] . . .D==============================eE----------R. . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
+ "[0,51] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+ "[0,52] . . . D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+ "[0,53] . . . D===========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+ "[0,54] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
+ "[0,55] . . . D=eE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . jb\t..B1.4\n",
+ "[1,0] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
+ "[1,1] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
+ "[1,2] . . . D==eeeeeeeE-------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
+ "[1,3] . . . D===============eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
+ "[1,4] . . . D====================eeeeE----------------------R . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
+ "[1,5] . . . D=========================eeeeE-----------------R . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+ "[1,6] . . . DeE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
+ "[1,7] . . . D============================eeeeE-------------R . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
+ "[1,8] . . . D================================eeeeeeeeeeeE--R . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+ "[1,9] . . . D================================eeeeE---------R . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
+ "[1,10] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
+ "[1,11] . . . D====================================eE---------R . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
+ "[1,12] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+ "[1,13] . . . D==============================================eeeeER . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+ "[1,14] . . . D==================================================eeeeER . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+ "[1,15] . . . DeE-----------------------------------------------------R . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
+ "[1,16] . . . .D===eeeeeeeE-------------------------------------------R . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
+ "[1,17] . . . .D==============eeeeE-----------------------------------R . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
+ "[1,18] . . . .D==================eeeeE-------------------------------R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
+ "[1,19] . . . .D======================eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+ "[1,20] . . . . D================================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
+ "[1,21] . . . . D====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+ "[1,22] . . . . D=====================================eeeeE-----------R . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
+ "[1,23] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
+ "[1,24] . . . . D========================================eE----------R . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
+ "[1,25] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+ "[1,26] . . . . D==================================================eeeeER. . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+ "[1,27] . . . . D======================================================eeeeER . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+ "[1,28] . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
+ "[1,29] . . . . D=================================eeeeeeeE-----------------R . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
+ "[1,30] . . . . D========================================eeeeE-------------R . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
+ "[1,31] . . . . D===========================================eeeeE---------R . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
+ "[1,32] . . . . .D==============================================eeeeE-----R . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+ "[1,33] . . . . . D=================================================eeeeE-R . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
+ "[1,34] . . . . . D====================================================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+ "[1,35] . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
+ "[1,36] . . . . . D=============================================================eeeeER . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
+ "[1,37] . . . . . D======================================================eE----------R . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
+ "[1,38] . . . . . .D============================================================eeeeER . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+ "[1,39] . . . . . . D===============================================================eeeeER . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+ "[1,40] . . . . . . D==================================================================eeeeER . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+ "[1,41] . . . . . . D============================eeeeeeeE-----------------------------------R . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
+ "[1,42] . . . . . . DeE--------------------------------------------------------------------R . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
+ "[1,43] . . . . . . D==================================eeeeE-------------------------------R . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
+ "[1,44] . . . . . . D=====================================eeeeE---------------------------R . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
+ "[1,45] . . . . . . D===========================================eeeeE---------------------R . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+ "[1,46] . . . . . . D===============================================eeeeE-----------------R . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
+ "[1,47] . . . . . . .D==================================================eeeeeeeeeeeE------R . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+ "[1,48] . . . . . . . D=================================================eeeeE-------------R . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
+ "[1,49] . . . . . . . D===========================================================eeeeE--R . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
+ "[1,50] . . . . . . . D=====================================================eE----------R . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
+ "[1,51] . . . . . . . D==========================================================eeeeE-R . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+ "[1,52] . . . . . . . D==============================================================eeeeER . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+ "[1,53] . . . . . . . .D=================================================================eeeeER . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+ "[1,54] . . . . . . . .DeE--------------------------------------------------------------------R . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
+ "[1,55] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . jb\t..B1.4\n",
+ "[2,0] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . addl\t$32, %ecx\n",
+ "[2,1] . . . . . . . . D=eE------------------------------------------------------------------R . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
+ "[2,2] . . . . . . . . D======================eeeeeeeE--------------------------------------R . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
+ "[2,3] . . . . . . . . D==============================eeeeE---------------------------------R . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
+ "[2,4] . . . . . . . . D===================================eeeeE----------------------------R . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
+ "[2,5] . . . . . . . . D========================================eeeeE-----------------------R . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+ "[2,6] . . . . . . . . DeE-----------------------------------------------------------------R . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
+ "[2,7] . . . . . . . . D===========================================eeeeE-------------------R . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
+ "[2,8] . . . . . . . . D================================================eeeeeeeeeeeE-------R . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+ "[2,9] . . . . . . . . D================================================eeeeE-------------R . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
+ "[2,10] . . . . . . . . D==========================================================eeeeE---R . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
+ "[2,11] . . . . . . . . .D======================================================eE---------R . . . . . . . . . . . . . knotw\t%k0, %k1\n",
+ "[2,12] . . . . . . . . .D=========================================================eeeeE---R . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+ "[2,13] . . . . . . . . . D============================================================eeeeER . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+ "[2,14] . . . . . . . . . D================================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+ "[2,15] . . . . . . . . . DeE------------------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
+ "[2,16] . . . . . . . . . D==================eeeeeeeE-----------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
+ "[2,17] . . . . . . . . . D=========================eeeeE-------------------------------------R . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
+ "[2,18] . . . . . . . . . D=============================eeeeE--------------------------------R . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
+ "[2,19] . . . . . . . . . .D=================================eeeeE---------------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+ "[2,20] . . . . . . . . . . D=====================================eeeeE----------------------R . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
+ "[2,21] . . . . . . . . . . D=========================================eeeeeeeeeeeE-----------R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+ "[2,22] . . . . . . . . . . D=========================================eeeeE-----------------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
+ "[2,23] . . . . . . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
+ "[2,24] . . . . . . . . . . D===============================================eE-------------R . . . . . . . . . . . . knotw\t%k2, %k3\n",
+ "[2,25] . . . . . . . . . . D=================================================eeeeE-------R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+ "[2,26] . . . . . . . . . . . D===================================================eeeeE---R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+ "[2,27] . . . . . . . . . . . D=======================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+ "[2,28] . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
+ "[2,29] . . . . . . . . . . . D============eeeeeeeE--------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
+ "[2,30] . . . . . . . . . . . D====================eeeeE---------------------------------R . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
+ "[2,31] . . . . . . . . . . . D=========================eeeeE---------------------------R . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
+ "[2,32] . . . . . . . . . . . .D=============================eeeeE----------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+ "[2,33] . . . . . . . . . . . . D==================================eeeeE----------------R . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
+ "[2,34] . . . . . . . . . . . . D=====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+ "[2,35] . . . . . . . . . . . . D======================================eeeeE-----------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
+ "[2,36] . . . . . . . . . . . . D===============================================eeeeE-R . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
+ "[2,37] . . . . . . . . . . . . D========================================eE----------R . . . . . . . . . . . . knotw\t%k4, %k5\n",
+ "[2,38] . . . . . . . . . . . . .D==============================================eeeeER . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+ "[2,39] . . . . . . . . . . . . . D=================================================eeeeER . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+ "[2,40] . . . . . . . . . . . . . D====================================================eeeeER . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+ "[2,41] . . . . . . . . . . . . . D======eeeeeeeE------------------------------------------R . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
+ "[2,42] . . . . . . . . . . . . . DeE------------------------------------------------------R . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
+ "[2,43] . . . . . . . . . . . . . D===============eeeeE-----------------------------------R . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
+ "[2,44] . . . . . . . . . . . . . D========================eeeeE--------------------------R . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
+ "[2,45] . . . . . . . . . . . . . D============================eeeeE----------------------R . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+ "[2,46] . . . . . . . . . . . . . .D======================================eeeeE-----------R . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
+ "[2,47] . . . . . . . . . . . . . . D=========================================eeeeeeeeeeeER . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+ "[2,48] . . . . . . . . . . . . . . D=========================================eeeeE------R . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
+ "[2,49] . . . . . . . . . . . . . . D===================================================eeeeER . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
+ "[2,50] . . . . . . . . . . . . . . D============================================eE---------R . . . . . . . . . knotw\t%k6, %k7\n",
+ "[2,51] . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+ "[2,52] . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+ "[2,53] . . . . . . . . . . . . . . .D========================================================eeeeER . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+ "[2,54] . . . . . . . . . . . . . . . DeE----------------------------------------------------------R . . . . . . . . cmpl\t%edx, %ecx\n",
+ "[2,55] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . jb\t..B1.4\n",
+ "[3,0] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . addl\t$32, %ecx\n",
+ "[3,1] . . . . . . . . . . . . . . . DeE--------------------------------------------------------R . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
+ "[3,2] . . . . . . . . . . . . . . . D==eeeeeeeE------------------------------------------------R . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
+ "[3,3] . . . . . . . . . . . . . . . D=========eeeeE--------------------------------------------R . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
+ "[3,4] . . . . . . . . . . . . . . . D================eeeeE-------------------------------------R . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
+ "[3,5] . . . . . . . . . . . . . . . D===================eeeeE---------------------------------R . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+ "[3,6] . . . . . . . . . . . . . . . DeE-------------------------------------------------------R . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
+ "[3,7] . . . . . . . . . . . . . . . D===================================eeeeE-----------------R . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
+ "[3,8] . . . . . . . . . . . . . . . .D======================================eeeeeeeeeeeE------R . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+ "[3,9] . . . . . . . . . . . . . . . .D=======================================eeeeE------------R . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
+ "[3,10] . . . . . . . . . . . . . . . .D=================================================eeeeE--R . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
+ "[3,11] . . . . . . . . . . . . . . . . D===========================================eE----------R . . . . . . . . knotw\t%k0, %k1\n",
+ "[3,12] . . . . . . . . . . . . . . . . D===============================================eeeeE--R . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+ "[3,13] . . . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+ "[3,14] . . . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+ "[3,15] . . . . . . . . . . . . . . . . DeE--------------------------------------------------------R. . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
+ "[3,16] . . . . . . . . . . . . . . . . .D===============================eeeeeeeE------------------R. . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
+ "[3,17] . . . . . . . . . . . . . . . . .D=======================================eeeeE-------------R. . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
+ "[3,18] . . . . . . . . . . . . . . . . .D===========================================eeeeE---------R. . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
+ "[3,19] . . . . . . . . . . . . . . . . . D==============================================eeeeE-----R. . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+ "[3,20] . . . . . . . . . . . . . . . . . D==================================================eeeeE-R. . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
+ "[3,21] . . . . . . . . . . . . . . . . . D=====================================================eeeeeeeeeeeER. . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+ "[3,22] . . . . . . . . . . . . . . . . . D=====================================================eeeeE------R. . . . . vfpclasspd\t$30, %zmm19, %k2\n",
+ "[3,23] . . . . . . . . . . . . . . . . . D==============================================================eeeeER . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
+ "[3,24] . . . . . . . . . . . . . . . . . .D=======================================================eE---------R . . . . knotw\t%k2, %k3\n",
+ "[3,25] . . . . . . . . . . . . . . . . . . D============================================================eeeeER . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+ "[3,26] . . . . . . . . . . . . . . . . . . D================================================================eeeeER . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+ "[3,27] . . . . . . . . . . . . . . . . . . D===================================================================eeeeER . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+ "[3,28] . . . . . . . . . . . . . . . . . . DeE----------------------------------------------------------------------R . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
+ "[3,29] . . . . . . . . . . . . . . . . . . D===========================eeeeeeeE------------------------------------R . . vcvtdq2pd\t%ymm20, %zmm21\n",
+ "[3,30] . . . . . . . . . . . . . . . . . . D==================================eeeeE--------------------------------R . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
+ "[3,31] . . . . . . . . . . . . . . . . . . D======================================eeeeE----------------------------R . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
+ "[3,32] . . . . . . . . . . . . . . . . . . D=========================================eeeeE------------------------R . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+ "[3,33] . . . . . . . . . . . . . . . . . . D=============================================eeeeE--------------------R . . vrcp14pd\t%zmm23, %zmm25\n",
+ "[3,34] . . . . . . . . . . . . . . . . . . .D================================================eeeeeeeeeeeE---------R . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+ "[3,35] . . . . . . . . . . . . . . . . . . .D=================================================eeeeE---------------R . . vfpclasspd\t$30, %zmm25, %k4\n",
+ "[3,36] . . . . . . . . . . . . . . . . . . . D==========================================================eeeeE-----R . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
+ "[3,37] . . . . . . . . . . . . . . . . . . . D====================================================eE-------------R . . knotw\t%k4, %k5\n",
+ "[3,38] . . . . . . . . . . . . . . . . . . . D========================================================eeeeE-----R . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+ "[3,39] . . . . . . . . . . . . . . . . . . . D============================================================eeeeE-R . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+ "[3,40] . . . . . . . . . . . . . . . . . . . D===============================================================eeeeER. . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+ "[3,41] . . . . . . . . . . . . . . . . . . . D======================eeeeeeeE--------------------------------------R. . vcvtdq2pd\t%ymm26, %zmm27\n",
+ "[3,42] . . . . . . . . . . . . . . . . . . . .DeE-----------------------------------------------------------------R. . vpaddd\t%ymm5, %ymm26, %ymm9\n",
+ "[3,43] . . . . . . . . . . . . . . . . . . . .D============================eeeeE----------------------------------R. . vaddpd\t%zmm27, %zmm1, %zmm28\n",
+ "[3,44] . . . . . . . . . . . . . . . . . . . . D===============================eeeeE------------------------------R. . vmulpd\t%zmm28, %zmm2, %zmm8\n",
+ "[3,45] . . . . . . . . . . . . . . . . . . . . D=====================================eeeeE------------------------R. . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+ "[3,46] . . . . . . . . . . . . . . . . . . . . D=========================================eeeeE--------------------R. . vrcp14pd\t%zmm29, %zmm31\n",
+ "[3,47] . . . . . . . . . . . . . . . . . . . . D============================================eeeeeeeeeeeE---------R. . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+ "[3,48] . . . . . . . . . . . . . . . . . . . . D===========================================eeeeE----------------R. . vfpclasspd\t$30, %zmm31, %k6\n",
+ "[3,49] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE-----R. . vmulpd\t%zmm29, %zmm29, %zmm30\n",
+ "[3,50] . . . . . . . . . . . . . . . . . . . . D==============================================eE---------------R. . knotw\t%k6, %k7\n",
+ "[3,51] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE----R. . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+ "[3,52] . . . . . . . . . . . . . . . . . . . . .D=========================================================eeeeER. . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+ "[3,53] . . . . . . . . . . . . . . . . . . . . . D============================================================eeeeER vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+ "[3,54] . . . . . . . . . . . . . . . . . . . . . DeE--------------------------------------------------------------R cmpl\t%edx, %ecx\n",
+ "[3,55] . . . . . . . . . . . . . . . . . . . . . DeE-------------------------------------------------------------R jb\t..B1.4\n",
+ "\n",
+ "\n",
+ "Average Wait times (based on the timeline view):\n",
+ "[0]: Executions\n",
+ "[1]: Average time spent waiting in a scheduler's queue\n",
+ "[2]: Average time spent waiting in a scheduler's queue while ready\n",
+ "[3]: Average time elapsed from WB until retire stage\n",
+ "\n",
+ " [0] [1] [2] [3]\n",
+ "0. 4 1.0 1.0 42.5 addl\t$32, %ecx\n",
+ "1. 4 1.3 1.3 42.0 vpaddd\t%ymm5, %ymm9, %ymm14\n",
+ "2. 4 7.8 7.8 30.8 vcvtdq2pd\t%ymm9, %zmm8\n",
+ "3. 4 16.5 1.8 26.0 vaddpd\t%zmm8, %zmm1, %zmm10\n",
+ "4. 4 21.8 1.3 21.8 vmulpd\t%zmm10, %zmm2, %zmm11\n",
+ "5. 4 25.8 0.5 18.3 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+ "6. 4 1.0 1.0 45.8 vmovaps\t%zmm0, %zmm29\n",
+ "7. 4 32.3 3.0 12.3 vrcp14pd\t%zmm11, %zmm13\n",
+ "8. 4 36.0 0.3 3.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+ "9. 4 36.3 0.8 10.3 vfpclasspd\t$30, %zmm13, %k0\n",
+ "10. 4 46.5 0.0 1.3 vmulpd\t%zmm11, %zmm11, %zmm12\n",
+ "11. 4 40.8 1.3 9.5 knotw\t%k0, %k1\n",
+ "12. 4 45.8 0.0 1.3 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+ "13. 4 49.0 0.0 0.0 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+ "14. 4 52.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+ "15. 4 1.0 1.0 54.5 vpaddd\t%ymm5, %ymm14, %ymm20\n",
+ "16. 4 14.0 14.0 34.8 vcvtdq2pd\t%ymm14, %zmm15\n",
+ "17. 4 22.3 1.3 29.5 vaddpd\t%zmm15, %zmm1, %zmm16\n",
+ "18. 4 26.0 0.3 25.3 vmulpd\t%zmm16, %zmm2, %zmm17\n",
+ "19. 4 29.8 0.3 21.0 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+ "20. 4 35.3 2.0 15.0 vrcp14pd\t%zmm17, %zmm19\n",
+ "21. 4 38.8 0.0 6.5 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+ "22. 4 39.3 1.0 12.5 vfpclasspd\t$30, %zmm19, %k2\n",
+ "23. 4 48.8 0.0 3.5 vmulpd\t%zmm17, %zmm17, %zmm18\n",
+ "24. 4 43.0 0.8 11.8 knotw\t%k2, %k3\n",
+ "25. 4 47.8 0.0 3.5 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+ "26. 4 51.0 0.0 1.3 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+ "27. 4 54.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+ "28. 4 1.0 1.0 56.5 vpaddd\t%ymm5, %ymm20, %ymm26\n",
+ "29. 4 19.0 19.0 31.8 vcvtdq2pd\t%ymm20, %zmm21\n",
+ "30. 4 26.3 0.3 27.5 vaddpd\t%zmm21, %zmm1, %zmm22\n",
+ "31. 4 30.0 0.5 23.0 vmulpd\t%zmm22, %zmm2, %zmm23\n",
+ "32. 4 33.5 0.3 18.8 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+ "33. 4 37.5 0.5 14.3 vrcp14pd\t%zmm23, %zmm25\n",
+ "34. 4 40.5 0.0 5.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+ "35. 4 40.8 0.5 12.3 vfpclasspd\t$30, %zmm25, %k4\n",
+ "36. 4 50.5 0.0 2.8 vmulpd\t%zmm23, %zmm23, %zmm24\n",
+ "37. 4 44.0 0.5 11.8 knotw\t%k4, %k5\n",
+ "38. 4 49.5 0.3 2.5 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+ "39. 4 52.8 0.0 0.5 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+ "40. 4 56.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+ "41. 4 15.0 15.0 37.8 vcvtdq2pd\t%ymm26, %zmm27\n",
+ "42. 4 1.0 1.0 57.3 vpaddd\t%ymm5, %ymm26, %ymm9\n",
+ "43. 4 22.0 0.8 33.0 vaddpd\t%zmm27, %zmm1, %zmm28\n",
+ "44. 4 27.3 2.0 27.0 vmulpd\t%zmm28, %zmm2, %zmm8\n",
+ "45. 4 32.3 1.0 22.0 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+ "46. 4 38.0 2.0 16.0 vrcp14pd\t%zmm29, %zmm31\n",
+ "47. 4 41.0 0.0 5.0 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+ "48. 4 40.5 0.3 11.8 vfpclasspd\t$30, %zmm31, %k6\n",
+ "49. 4 51.0 0.0 2.0 vmulpd\t%zmm29, %zmm29, %zmm30\n",
+ "50. 4 44.3 0.8 11.0 knotw\t%k6, %k7\n",
+ "51. 4 50.5 0.5 1.5 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+ "52. 4 53.8 0.0 0.0 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+ "53. 4 57.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+ "54. 4 1.0 1.0 58.5 cmpl\t%edx, %ecx\n",
+ "55. 4 1.3 0.0 57.5 jb\t..B1.4\n",
+ " 4 32.5 1.6 18.4 \n",
+ " "
+ ],
+ "text/plain": [
+ "Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-30;16:57:45\n",
+ "Analyzed File - build/SKX/icc/O3/pi.marked.o\n",
+ "Binary Format - 64Bit\n",
+ "Architecture - SKX\n",
+ "Analysis Type - Throughput\n",
+ "\n",
+ "Throughput Analysis Report\n",
+ "--------------------------\n",
+ "Block Throughput: 31.50 Cycles Throughput Bottleneck: Backend\n",
+ "Loop Count: 103\n",
+ "Port Binding In Cycles Per Iteration:\n",
+ "--------------------------------------------------------------------------------------------------\n",
+ "| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
+ "--------------------------------------------------------------------------------------------------\n",
+ "| Cycles | 30.0 0.0 | 4.0 | 2.0 2.0 | 2.0 2.0 | 0.0 | 30.0 | 1.0 | 0.0 |\n",
+ "--------------------------------------------------------------------------------------------------\n",
+ "\n",
+ "DV - Divider pipe (on port 0)\n",
+ "D - Data fetch pipe (on ports 2 and 3)\n",
+ "F - Macro Fusion with the previous instruction occurred\n",
+ "* - instruction micro-ops not bound to a port\n",
+ "^ - Micro Fusion occurred\n",
+ "# - ESP Tracking sync uop was issued\n",
+ "@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected\n",
+ "X - instruction not supported, was not accounted in Analysis\n",
+ "\n",
+ "| Num Of | Ports pressure in cycles | |\n",
+ "| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
+ "-----------------------------------------------------------------------------------------\n",
+ "| 1 | | | | | | | 1.0 | | add ecx, 0x20\n",
+ "| 1 | | 1.0 | | | | | | | vpaddd ymm14, ymm9, ymm5\n",
+ "| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm8, ymm9\n",
+ "| 1 | | | | | | 1.0 | | | vaddpd zmm10, zmm1, zmm8\n",
+ "| 1 | 1.0 | | | | | | | | vmulpd zmm11, zmm2, zmm10\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm11, zmm11, zmm0\n",
+ "| 1* | | | | | | | | | vmovaps zmm29, zmm0\n",
+ "| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm13, zmm11\n",
+ "| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm11, zmm13, qword ptr [rip]{1to8}\n",
+ "| 1 | | | | | | 1.0 | | | vfpclasspd k0, zmm13, 0x1e\n",
+ "| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm11, zmm11\n",
+ "| 1 | 1.0 | | | | | | | | knotw k1, k0\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm13{k1}, zmm11, zmm13\n",
+ "| 1 | 1.0 | | | | | | | | vfmadd213pd zmm13{k1}, zmm12, zmm13\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm13, zmm4\n",
+ "| 1 | | 1.0 | | | | | | | vpaddd ymm20, ymm14, ymm5\n",
+ "| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm15, ymm14\n",
+ "| 1 | 1.0 | | | | | | | | vaddpd zmm16, zmm1, zmm15\n",
+ "| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm2, zmm16\n",
+ "| 1 | 1.0 | | | | | | | | vfmadd213pd zmm17, zmm17, zmm0\n",
+ "| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm19, zmm17\n",
+ "| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm17, zmm19, qword ptr [rip]{1to8}\n",
+ "| 1 | | | | | | 1.0 | | | vfpclasspd k2, zmm19, 0x1e\n",
+ "| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm17, zmm17\n",
+ "| 1 | 1.0 | | | | | | | | knotw k3, k2\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm17, zmm19\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm18, zmm19\n",
+ "| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm4\n",
+ "| 1 | | 1.0 | | | | | | | vpaddd ymm26, ymm20, ymm5\n",
+ "| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm21, ymm20\n",
+ "| 1 | | | | | | 1.0 | | | vaddpd zmm22, zmm1, zmm21\n",
+ "| 1 | 1.0 | | | | | | | | vmulpd zmm23, zmm2, zmm22\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm23, zmm23, zmm0\n",
+ "| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm25, zmm23\n",
+ "| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm23, zmm25, qword ptr [rip]{1to8}\n",
+ "| 1 | | | | | | 1.0 | | | vfpclasspd k4, zmm25, 0x1e\n",
+ "| 1 | 1.0 | | | | | | | | vmulpd zmm24, zmm23, zmm23\n",
+ "| 1 | 1.0 | | | | | | | | knotw k5, k4\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm25{k5}, zmm23, zmm25\n",
+ "| 1 | 1.0 | | | | | | | | vfmadd213pd zmm25{k5}, zmm24, zmm25\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm25, zmm4\n",
+ "| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm27, ymm26\n",
+ "| 1 | | 1.0 | | | | | | | vpaddd ymm9, ymm26, ymm5\n",
+ "| 1 | 1.0 | | | | | | | | vaddpd zmm28, zmm1, zmm27\n",
+ "| 1 | | | | | | 1.0 | | | vmulpd zmm8, zmm2, zmm28\n",
+ "| 1 | 1.0 | | | | | | | | vfmadd231pd zmm29, zmm8, zmm8\n",
+ "| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm31, zmm29\n",
+ "| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm29, zmm31, qword ptr [rip]{1to8}\n",
+ "| 1 | | | | | | 1.0 | | | vfpclasspd k6, zmm31, 0x1e\n",
+ "| 1 | 1.0 | | | | | | | | vmulpd zmm30, zmm29, zmm29\n",
+ "| 1 | 1.0 | | | | | | | | knotw k7, k6\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm29, zmm31\n",
+ "| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm30, zmm31\n",
+ "| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm31, zmm4\n",
+ "| 1* | | | | | | | | | cmp ecx, edx\n",
+ "| 0*F | | | | | | | | | jb 0xfffffffffffffeb3\n",
+ "Total Num Of Uops: 71\n",
+ "Analysis Notes:\n",
+ "Backend allocation was stalled due to unavailable allocation resources.\n",
+ ""
+ ],
+ "text/plain": [
+ "