diff --git a/osaca/data/a64fx.yml b/osaca/data/a64fx.yml index a53a8f3..a39e374 100644 --- a/osaca/data/a64fx.yml +++ b/osaca/data/a64fx.yml @@ -1132,6 +1132,27 @@ instruction_forms: throughput: 2.0 latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses +- name: ld2d + operands: + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: p + predication: '*' + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 2.0 + latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D + port_pressure: [[2, '56'], [4, ['5D', '6D']]] - name: ldp operands: - class: register @@ -1980,6 +2001,27 @@ instruction_forms: throughput: 1.0 latency: 0 # 1*p5+1*p6+1*p0 port_pressure: [[1, '5'], [1, '6'], [1, '0']] +- name: st2d + operands: + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: 'z' + shape: 'd' + - class: register + prefix: p + predication: '*' + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 1*p5+1*p6+1*p0 + port_pressure: [[1, '5'], [1, '6'], [1, '0']] - name: sub operands: - class: register diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index 2b216fd..fd8a681 100755 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - - +from copy import deepcopy import pyparsing as pp from osaca.parser import AttrDict, BaseParser @@ -240,7 +239,7 @@ class ParserAArch64(BaseParser): # 1. Parse comment try: - result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict()) + result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())[0] result = AttrDict.convert_dict(result) instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID]) except pp.ParseException: @@ -249,7 +248,7 @@ class ParserAArch64(BaseParser): try: result = self.process_operand( self.llvm_markers.parseString(line, parseAll=True).asDict() - ) + )[0] result = AttrDict.convert_dict(result) instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID]) except pp.ParseException: @@ -257,7 +256,7 @@ class ParserAArch64(BaseParser): # 2. Parse label if result is None: try: - result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) + result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())[0] result = AttrDict.convert_dict(result) instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name if self.COMMENT_ID in result[self.LABEL_ID]: @@ -272,7 +271,7 @@ class ParserAArch64(BaseParser): try: result = self.process_operand( self.directive.parseString(line, parseAll=True).asDict() - ) + )[0] result = AttrDict.convert_dict(result) instruction_form[self.DIRECTIVE_ID] = AttrDict( { @@ -292,7 +291,6 @@ class ParserAArch64(BaseParser): try: result = self.parse_instruction(line) except (pp.ParseException, KeyError) as e: - raise e raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID] instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID] @@ -313,19 +311,19 @@ class ParserAArch64(BaseParser): # Add operands to list # Check first operand if "operand1" in result: - operands.append(self.process_operand(result["operand1"])) + operands += self.process_operand(result["operand1"]) # Check second operand if "operand2" in result: - operands.append(self.process_operand(result["operand2"])) + operands += self.process_operand(result["operand2"]) # Check third operand if "operand3" in result: - operands.append(self.process_operand(result["operand3"])) + operands += self.process_operand(result["operand3"]) # Check fourth operand if "operand4" in result: - operands.append(self.process_operand(result["operand4"])) + operands += self.process_operand(result["operand4"]) # Check fifth operand if "operand5" in result: - operands.append(self.process_operand(result["operand5"])) + operands += self.process_operand(result["operand5"]) return_dict = AttrDict( { @@ -342,23 +340,23 @@ class ParserAArch64(BaseParser): """Post-process operand""" # structure memory addresses if self.MEMORY_ID in operand: - return self.process_memory_address(operand[self.MEMORY_ID]) + return [self.process_memory_address(operand[self.MEMORY_ID])] # structure register lists if self.REGISTER_ID in operand and ( "list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID] ): - # TODO: discuss if ranges should be converted to lists - return self.process_register_list(operand[self.REGISTER_ID]) + # resolve ranges and lists + return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID])) if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"] == "sp": - return self.process_sp_register(operand[self.REGISTER_ID]) + return [self.process_sp_register(operand[self.REGISTER_ID])] # add value attribute to floating point immediates without exponent if self.IMMEDIATE_ID in operand: - return self.process_immediate(operand[self.IMMEDIATE_ID]) + return [self.process_immediate(operand[self.IMMEDIATE_ID])] if self.LABEL_ID in operand: - return self.process_label(operand[self.LABEL_ID]) + return [self.process_label(operand[self.LABEL_ID])] if self.IDENTIFIER_ID in operand: - return self.process_identifier(operand[self.IDENTIFIER_ID]) - return operand + return [self.process_identifier(operand[self.IDENTIFIER_ID])] + return [operand] def process_memory_address(self, memory_address): """Post-process memory address operand""" @@ -391,6 +389,36 @@ class ParserAArch64(BaseParser): reg["prefix"] = "x" return AttrDict({self.REGISTER_ID: reg}) + def resolve_range_list(self, operand): + """ + Resolve range or list register operand to list of registers. + + Returns None if neither list nor range + """ + if 'register' in operand: + if 'list' in operand.register: + index = operand.register.get('index') + l = [] + for reg in operand.register.list: + reg = deepcopy(reg) + if index is not None: + reg.index = index + l.append(AttrDict({self.REGISTER_ID: reg})) + return l + elif 'range' in operand.register: + base_register = operand.register.range[0] + index = operand.register.get('index') + l = [] + start_name = base_register.name + end_name = operand.register.range[1].name + for name in range(int(start_name), int(end_name)+1): + reg = deepcopy(base_register) + if index is not None: + reg['index'] = operand.register.range.index + reg['name'] = str(name) + l.append(AttrDict({self.REGISTER_ID: reg})) + return l + def process_register_list(self, register_list): """Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})""" # Remove unnecessarily created dictionary entries during parsing diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index eca5869..b450c17 100755 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 from itertools import chain +from copy import deepcopy from osaca import utils from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT @@ -122,6 +123,7 @@ class ISASemantics(object): "pre_indexed": pre_indexed, "post_indexed": post_indexed}) ) + # store operand list in dict and reassign operand key/value pair instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict) # assign LD/ST flags @@ -130,6 +132,7 @@ class ISASemantics(object): instruction_form["flags"] += [INSTR_FLAGS.HAS_LD] if self._has_store(instruction_form): instruction_form["flags"] += [INSTR_FLAGS.HAS_ST] + def get_reg_changes(self, instruction_form, only_postindexed=False): """ diff --git a/tests/test_semantics.py b/tests/test_semantics.py index b322d16..d3f5ef1 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -34,6 +34,8 @@ class TestSemanticTools(unittest.TestCase): cls.code_aarch64_memdep = f.read() with open(cls._find_file("kernel_aarch64.s")) as f: cls.code_AArch64 = f.read() + with open(cls._find_file("kernel_aarch64_sve.s")) as f: + cls.code_AArch64_SVE = f.read() cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86") cls.kernel_x86_memdep = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_memdep), "x86") @@ -41,6 +43,8 @@ class TestSemanticTools(unittest.TestCase): cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64") cls.kernel_aarch64_memdep = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64") + cls.kernel_aarch64_SVE = reduce_to_section( + cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64") # set up machine models cls.machine_model_csx = MachineModel( @@ -49,6 +53,9 @@ class TestSemanticTools(unittest.TestCase): cls.machine_model_tx2 = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml") ) + cls.machine_model_a64fx = MachineModel( + path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml") + ) cls.semantics_x86 = ISASemantics("x86") cls.semantics_csx = ArchSemantics( cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml") @@ -58,6 +65,10 @@ class TestSemanticTools(unittest.TestCase): cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) + cls.semantics_a64fx = ArchSemantics( + cls.machine_model_a64fx, + path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), + ) cls.machine_model_zen = MachineModel(arch="zen1") for i in range(len(cls.kernel_x86)): @@ -72,6 +83,9 @@ class TestSemanticTools(unittest.TestCase): for i in range(len(cls.kernel_aarch64_memdep)): cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i]) + for i in range(len(cls.kernel_aarch64_SVE)): + cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i]) + cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i]) ########### # Tests @@ -320,6 +334,11 @@ class TestSemanticTools(unittest.TestCase): dg.get_dependent_instruction_forms() # test dot creation dg.export_graph(filepath="/dev/null") + + def test_kernelDG_SVE(self): + dg = KernelDG(self.kernel_aarch64_SVE, self.parser_AArch64, self.machine_model_a64fx, + self.semantics_a64fx) + # TODO check for correct analysis def test_hidden_load(self): machine_model_hld = MachineModel( @@ -421,6 +440,7 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1)) self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm)) + def test_is_read_is_written_AArch64(self): # independent form HW model diff --git a/validation/Analysis.ipynb b/validation/Analysis.ipynb index 4a21cd7..57fd7f0 100644 --- a/validation/Analysis.ipynb +++ b/validation/Analysis.ipynb @@ -232,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -284,8 +284,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 27, + "metadata": { + "scrolled": false + }, "outputs": [ { "name": "stdout", @@ -296,8 +298,672 @@ "ZEN has 156 tests, compiled to 126 unique assembly representations.\n", "ZEN2 has 156 tests, compiled to 126 unique assembly representations.\n", "TX2 has 104 tests, compiled to 78 unique assembly representations.\n", - "A64FX has 104 tests, compiled to 81 unique assembly representations.\n" + "A64FX has 104 tests, compiled to 81 unique assembly representations.\n", + "High-level iterations in assembly block: 16\n", + "Measured: 1.1903856655856655\n", + "IACA Predicted: 1.96875 TP: 1.875 LCD: None CP: None\n", + "Ithemal Predicted: nan TP: None LCD: None CP: None\n", + "LLVM-MCA Predicted: 2.240625 TP: 1.948125 LCD: 2.240625 CP: 3.8125\n", + "OSACA Predicted: 1.875 TP: 1.875 LCD: 0.5 CP: 2.75\n" ] + }, + { + "data": { + "text/html": [ + "
Open Source Architecture Code Analyzer (OSACA) - 0.3.14\n",
+       "Analyzed file:      build/SKX/icc/O3/pi.marked.s\n",
+       "Architecture:       SKX\n",
+       "Timestamp:          2021-04-15 12:15:40\n",
+       "\n",
+       "\n",
+       " P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction\n",
+       " * - Instruction micro-ops not bound to a port\n",
+       " X - No throughput/latency information for this instruction in data file\n",
+       "\n",
+       "\n",
+       "Combined Analysis Report\n",
+       "------------------------\n",
+       "                                     Port pressure in cycles                                     \n",
+       "     |  0   - 0DV  |  1   |  2   -  2D  |  3   -  3D  |  4   |  5   |  6   |  7   ||  CP  | LCD  |\n",
+       "-------------------------------------------------------------------------------------------------\n",
+       "  62 |             |      |             |             |      |      |      |      ||      |      |   # pointer_increment=128 fa3c665ee18e1e5f704c8a6026891c36\n",
+       "  63 |             |      |             |             |      |      |      |      ||      |      |   ..B1.4:                         # Preds ..B1.4 ..B1.3\n",
+       "  64 |             |      |             |             |      |      |      |      ||      |      |   # Execution count [5.00e+00]\n",
+       "  65 | 0.00        | 0.00 |             |             |      | 0.00 | 1.00 |      ||      |      |   addl      $32, %ecx                                     #16.5\n",
+       "  66 | 0.00        | 1.00 |             |             |      | 0.00 |      |      ||  1.0 |      |   vpaddd    %ymm5, %ymm9, %ymm14                          #17.9\n",
+       "  67 | 0.50        |      |             |             |      | 1.50 |      |      ||      |      |   vcvtdq2pd %ymm9, %zmm8                                  #17.14\n",
+       "  68 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd    %zmm8, %zmm1, %zmm10                          #17.18\n",
+       "  69 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd    %zmm10, %zmm2, %zmm11                         #17.25\n",
+       "  70 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd213pd %zmm0, %zmm11, %zmm11                       #18.38\n",
+       "  71 |             |      |             |             |      |      |      |      ||      |      | * vmovaps   %zmm0, %zmm29                                 #18.38\n",
+       "  72 | 2.50        |      |             |             |      | 0.50 |      |      ||      |      |   vrcp14pd  %zmm11, %zmm13                                #18.38\n",
+       "  73 | 0.50        |      | 0.50   0.50 | 0.50   0.50 |      | 0.50 |      |      ||      |      |   vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11 #18.38\n",
+       "  74 |             |      |             |             |      | 1.00 |      |      ||      |      |   vfpclasspd $30, %zmm13, %k0                             #18.38\n",
+       "  75 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd    %zmm11, %zmm11, %zmm12                        #18.38\n",
+       "  76 | 1.00        |      |             |             |      |      |      |      ||      |      |   knotw     %k0, %k1                                      #18.38\n",
+       "  77 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd213pd %zmm13, %zmm11, %zmm13{%k1}                 #18.38\n",
+       "  78 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd213pd %zmm13, %zmm12, %zmm13{%k1}                 #18.38\n",
+       "  79 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm4, %zmm13, %zmm6                        #18.38\n",
+       "  80 | 0.00        | 1.00 |             |             |      | 0.00 |      |      ||      |      |   vpaddd    %ymm5, %ymm14, %ymm20                         #17.9\n",
+       "  81 | 0.50        |      |             |             |      | 1.50 |      |      ||  7.0 |      |   vcvtdq2pd %ymm14, %zmm15                                #17.14\n",
+       "  82 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vaddpd    %zmm15, %zmm1, %zmm16                         #17.18\n",
+       "  83 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd    %zmm16, %zmm2, %zmm17                         #17.25\n",
+       "  84 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd213pd %zmm0, %zmm17, %zmm17                       #18.38\n",
+       "  85 | 2.50        |      |             |             |      | 0.50 |      |      ||  8.0 |      |   vrcp14pd  %zmm17, %zmm19                                #18.38\n",
+       "  86 | 0.50        |      | 0.50   0.50 | 0.50   0.50 |      | 0.50 |      |      ||  4.0 |      |   vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17 #18.38\n",
+       "  87 |             |      |             |             |      | 1.00 |      |      ||      |      |   vfpclasspd $30, %zmm19, %k2                             #18.38\n",
+       "  88 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd    %zmm17, %zmm17, %zmm18                        #18.38\n",
+       "  89 | 1.00        |      |             |             |      |      |      |      ||      |      |   knotw     %k2, %k3                                      #18.38\n",
+       "  90 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd213pd %zmm19, %zmm17, %zmm19{%k3}                 #18.38\n",
+       "  91 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd213pd %zmm19, %zmm18, %zmm19{%k3}                 #18.38\n",
+       "  92 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |  4.0 |   vfmadd231pd %zmm4, %zmm19, %zmm3                        #18.38\n",
+       "  93 | 0.00        | 1.00 |             |             |      | 0.00 |      |      ||      |      |   vpaddd    %ymm5, %ymm20, %ymm26                         #17.9\n",
+       "  94 | 0.50        |      |             |             |      | 1.50 |      |      ||      |      |   vcvtdq2pd %ymm20, %zmm21                                #17.14\n",
+       "  95 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd    %zmm21, %zmm1, %zmm22                         #17.18\n",
+       "  96 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd    %zmm22, %zmm2, %zmm23                         #17.25\n",
+       "  97 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd213pd %zmm0, %zmm23, %zmm23                       #18.38\n",
+       "  98 | 2.50        |      |             |             |      | 0.50 |      |      ||      |      |   vrcp14pd  %zmm23, %zmm25                                #18.38\n",
+       "  99 | 0.50        |      | 0.50   0.50 | 0.50   0.50 |      | 0.50 |      |      ||      |      |   vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23 #18.38\n",
+       " 100 |             |      |             |             |      | 1.00 |      |      ||      |      |   vfpclasspd $30, %zmm25, %k4                             #18.38\n",
+       " 101 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd    %zmm23, %zmm23, %zmm24                        #18.38\n",
+       " 102 | 1.00        |      |             |             |      |      |      |      ||      |      |   knotw     %k4, %k5                                      #18.38\n",
+       " 103 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd213pd %zmm25, %zmm23, %zmm25{%k5}                 #18.38\n",
+       " 104 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd213pd %zmm25, %zmm24, %zmm25{%k5}                 #18.38\n",
+       " 105 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm4, %zmm25, %zmm6                        #18.38\n",
+       " 106 | 0.50        |      |             |             |      | 1.50 |      |      ||      |      |   vcvtdq2pd %ymm26, %zmm27                                #17.14\n",
+       " 107 | 0.00        | 1.00 |             |             |      | 0.00 |      |      ||      |      |   vpaddd    %ymm5, %ymm26, %ymm9                          #17.9\n",
+       " 108 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd    %zmm27, %zmm1, %zmm28                         #17.18\n",
+       " 109 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd    %zmm28, %zmm2, %zmm8                          #17.25\n",
+       " 110 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm8, %zmm8, %zmm29                        #18.38\n",
+       " 111 | 2.50        |      |             |             |      | 0.50 |      |      ||      |      |   vrcp14pd  %zmm29, %zmm31                                #18.38\n",
+       " 112 | 0.50        |      | 0.50   0.50 | 0.50   0.50 |      | 0.50 |      |      ||      |      |   vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29 #18.38\n",
+       " 113 |             |      |             |             |      | 1.00 |      |      ||      |      |   vfpclasspd $30, %zmm31, %k6                             #18.38\n",
+       " 114 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd    %zmm29, %zmm29, %zmm30                        #18.38\n",
+       " 115 | 1.00        |      |             |             |      |      |      |      ||      |      |   knotw     %k6, %k7                                      #18.38\n",
+       " 116 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd213pd %zmm31, %zmm29, %zmm31{%k7}                 #18.38\n",
+       " 117 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd213pd %zmm31, %zmm30, %zmm31{%k7}                 #18.38\n",
+       " 118 | 0.00        |      |             |             |      | 1.00 |      |      ||  0.0 |  4.0 |   vfmadd231pd %zmm4, %zmm31, %zmm3                        #18.38\n",
+       " 119 | 0.00        | 0.34 |             |             |      | 0.00 | 0.66 |      ||      |      |   cmpl      %edx, %ecx                                    #16.5\n",
+       " 120 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   jb        ..B1.4        # Prob 82%                      #16.5\n",
+       "\n",
+       "       30.0          4.34   2.00   2.00   2.00   2.00          30.0   2.66            44    8.0  \n",
+       "\n",
+       "\n",
+       "Loop-Carried Dependencies Analysis Report\n",
+       "-----------------------------------------\n",
+       "  92 |  8.0 | vfmadd231pd %zmm4, %zmm19, %zmm3                        #18.38| [92, 118]\n",
+       "  79 |  8.0 | vfmadd231pd %zmm4, %zmm13, %zmm6                        #18.38| [79, 105]\n",
+       "  66 |  4.0 | vpaddd    %ymm5, %ymm9, %ymm14                          #17.9| [66, 80, 93, 107]\n",
+       "  65 |  1.0 | addl      $32, %ecx                                     #16.5| [65]\n",
+       "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Iterations:        100\n",
+       "Instructions:      5600\n",
+       "Total Cycles:      3585\n",
+       "Total uOps:        7200\n",
+       "\n",
+       "Dispatch Width:    6\n",
+       "uOps Per Cycle:    2.01\n",
+       "IPC:               1.56\n",
+       "Block RThroughput: 18.0\n",
+       "\n",
+       "\n",
+       "Instruction Info:\n",
+       "[1]: #uOps\n",
+       "[2]: Latency\n",
+       "[3]: RThroughput\n",
+       "[4]: MayLoad\n",
+       "[5]: MayStore\n",
+       "[6]: HasSideEffects (U)\n",
+       "\n",
+       "[1]    [2]    [3]    [4]    [5]    [6]    Instructions:\n",
+       " 1      1     0.25                        addl\t$32, %ecx\n",
+       " 1      1     0.33                        vpaddd\t%ymm5, %ymm9, %ymm14\n",
+       " 2      7     1.00                        vcvtdq2pd\t%ymm9, %zmm8\n",
+       " 1      4     0.50                        vaddpd\t%zmm8, %zmm1, %zmm10\n",
+       " 1      4     0.50                        vmulpd\t%zmm10, %zmm2, %zmm11\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+       " 1      1     0.33                        vmovaps\t%zmm0, %zmm29\n",
+       " 3      4     2.00                        vrcp14pd\t%zmm11, %zmm13\n",
+       " 2      11    0.50    *                   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+       " 1      4     1.00                        vfpclasspd\t$30, %zmm13, %k0\n",
+       " 1      4     0.50                        vmulpd\t%zmm11, %zmm11, %zmm12\n",
+       " 1      1     1.00                        knotw\t%k0, %k1\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+       " 1      4     0.50                        vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+       " 1      1     0.33                        vpaddd\t%ymm5, %ymm14, %ymm20\n",
+       " 2      7     1.00                        vcvtdq2pd\t%ymm14, %zmm15\n",
+       " 1      4     0.50                        vaddpd\t%zmm15, %zmm1, %zmm16\n",
+       " 1      4     0.50                        vmulpd\t%zmm16, %zmm2, %zmm17\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+       " 3      4     2.00                        vrcp14pd\t%zmm17, %zmm19\n",
+       " 2      11    0.50    *                   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+       " 1      4     1.00                        vfpclasspd\t$30, %zmm19, %k2\n",
+       " 1      4     0.50                        vmulpd\t%zmm17, %zmm17, %zmm18\n",
+       " 1      1     1.00                        knotw\t%k2, %k3\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+       " 1      4     0.50                        vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+       " 1      1     0.33                        vpaddd\t%ymm5, %ymm20, %ymm26\n",
+       " 2      7     1.00                        vcvtdq2pd\t%ymm20, %zmm21\n",
+       " 1      4     0.50                        vaddpd\t%zmm21, %zmm1, %zmm22\n",
+       " 1      4     0.50                        vmulpd\t%zmm22, %zmm2, %zmm23\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+       " 3      4     2.00                        vrcp14pd\t%zmm23, %zmm25\n",
+       " 2      11    0.50    *                   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+       " 1      4     1.00                        vfpclasspd\t$30, %zmm25, %k4\n",
+       " 1      4     0.50                        vmulpd\t%zmm23, %zmm23, %zmm24\n",
+       " 1      1     1.00                        knotw\t%k4, %k5\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+       " 1      4     0.50                        vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+       " 2      7     1.00                        vcvtdq2pd\t%ymm26, %zmm27\n",
+       " 1      1     0.33                        vpaddd\t%ymm5, %ymm26, %ymm9\n",
+       " 1      4     0.50                        vaddpd\t%zmm27, %zmm1, %zmm28\n",
+       " 1      4     0.50                        vmulpd\t%zmm28, %zmm2, %zmm8\n",
+       " 1      4     0.50                        vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+       " 3      4     2.00                        vrcp14pd\t%zmm29, %zmm31\n",
+       " 2      11    0.50    *                   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+       " 1      4     1.00                        vfpclasspd\t$30, %zmm31, %k6\n",
+       " 1      4     0.50                        vmulpd\t%zmm29, %zmm29, %zmm30\n",
+       " 1      1     1.00                        knotw\t%k6, %k7\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+       " 1      4     0.50                        vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+       " 1      4     0.50                        vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+       " 1      1     0.25                        cmpl\t%edx, %ecx\n",
+       " 1      1     0.50                        jb\t..B1.4\n",
+       "\n",
+       "\n",
+       "Resources:\n",
+       "[0]   - SKXDivider\n",
+       "[1]   - SKXFPDivider\n",
+       "[2]   - SKXPort0\n",
+       "[3]   - SKXPort1\n",
+       "[4]   - SKXPort2\n",
+       "[5]   - SKXPort3\n",
+       "[6]   - SKXPort4\n",
+       "[7]   - SKXPort5\n",
+       "[8]   - SKXPort6\n",
+       "[9]   - SKXPort7\n",
+       "\n",
+       "\n",
+       "Resource pressure per iteration:\n",
+       "[0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    \n",
+       " -      -     31.17  5.72   2.00   2.00    -     29.10  2.01    -     \n",
+       "\n",
+       "Resource pressure by instruction:\n",
+       "[0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:\n",
+       " -      -      -     0.80    -      -      -     0.19   0.01    -     addl\t$32, %ecx\n",
+       " -      -     0.07   0.92    -      -      -     0.01    -      -     vpaddd\t%ymm5, %ymm9, %ymm14\n",
+       " -      -     1.00    -      -      -      -     1.00    -      -     vcvtdq2pd\t%ymm9, %zmm8\n",
+       " -      -     0.42    -      -      -      -     0.58    -      -     vaddpd\t%zmm8, %zmm1, %zmm10\n",
+       " -      -     0.51    -      -      -      -     0.49    -      -     vmulpd\t%zmm10, %zmm2, %zmm11\n",
+       " -      -     0.45    -      -      -      -     0.55    -      -     vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+       " -      -      -     1.00    -      -      -      -      -      -     vmovaps\t%zmm0, %zmm29\n",
+       " -      -     2.00    -      -      -      -     1.00    -      -     vrcp14pd\t%zmm11, %zmm13\n",
+       " -      -     0.40    -      -     1.00    -     0.60    -      -     vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+       " -      -      -      -      -      -      -     1.00    -      -     vfpclasspd\t$30, %zmm13, %k0\n",
+       " -      -     0.49    -      -      -      -     0.51    -      -     vmulpd\t%zmm11, %zmm11, %zmm12\n",
+       " -      -     1.00    -      -      -      -      -      -      -     knotw\t%k0, %k1\n",
+       " -      -     0.44    -      -      -      -     0.56    -      -     vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+       " -      -     0.54    -      -      -      -     0.46    -      -     vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+       " -      -     0.70    -      -      -      -     0.30    -      -     vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+       " -      -      -     1.00    -      -      -      -      -      -     vpaddd\t%ymm5, %ymm14, %ymm20\n",
+       " -      -     1.00    -      -      -      -     1.00    -      -     vcvtdq2pd\t%ymm14, %zmm15\n",
+       " -      -     0.48    -      -      -      -     0.52    -      -     vaddpd\t%zmm15, %zmm1, %zmm16\n",
+       " -      -     0.42    -      -      -      -     0.58    -      -     vmulpd\t%zmm16, %zmm2, %zmm17\n",
+       " -      -     0.32    -      -      -      -     0.68    -      -     vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+       " -      -     2.00    -      -      -      -     1.00    -      -     vrcp14pd\t%zmm17, %zmm19\n",
+       " -      -     0.32    -     1.00    -      -     0.68    -      -     vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+       " -      -      -      -      -      -      -     1.00    -      -     vfpclasspd\t$30, %zmm19, %k2\n",
+       " -      -     0.47    -      -      -      -     0.53    -      -     vmulpd\t%zmm17, %zmm17, %zmm18\n",
+       " -      -     1.00    -      -      -      -      -      -      -     knotw\t%k2, %k3\n",
+       " -      -     0.53    -      -      -      -     0.47    -      -     vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+       " -      -     0.54    -      -      -      -     0.46    -      -     vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+       " -      -     0.57    -      -      -      -     0.43    -      -     vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+       " -      -      -     1.00    -      -      -      -      -      -     vpaddd\t%ymm5, %ymm20, %ymm26\n",
+       " -      -     1.00    -      -      -      -     1.00    -      -     vcvtdq2pd\t%ymm20, %zmm21\n",
+       " -      -     0.52    -      -      -      -     0.48    -      -     vaddpd\t%zmm21, %zmm1, %zmm22\n",
+       " -      -     0.47    -      -      -      -     0.53    -      -     vmulpd\t%zmm22, %zmm2, %zmm23\n",
+       " -      -     0.48    -      -      -      -     0.52    -      -     vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+       " -      -     2.00    -      -      -      -     1.00    -      -     vrcp14pd\t%zmm23, %zmm25\n",
+       " -      -     0.40    -      -     1.00    -     0.60    -      -     vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+       " -      -      -      -      -      -      -     1.00    -      -     vfpclasspd\t$30, %zmm25, %k4\n",
+       " -      -     0.53    -      -      -      -     0.47    -      -     vmulpd\t%zmm23, %zmm23, %zmm24\n",
+       " -      -     1.00    -      -      -      -      -      -      -     knotw\t%k4, %k5\n",
+       " -      -     0.42    -      -      -      -     0.58    -      -     vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+       " -      -     0.54    -      -      -      -     0.46    -      -     vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+       " -      -     0.60    -      -      -      -     0.40    -      -     vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+       " -      -     1.00    -      -      -      -     1.00    -      -     vcvtdq2pd\t%ymm26, %zmm27\n",
+       " -      -      -     1.00    -      -      -      -      -      -     vpaddd\t%ymm5, %ymm26, %ymm9\n",
+       " -      -     0.26    -      -      -      -     0.74    -      -     vaddpd\t%zmm27, %zmm1, %zmm28\n",
+       " -      -     0.47    -      -      -      -     0.53    -      -     vmulpd\t%zmm28, %zmm2, %zmm8\n",
+       " -      -     0.34    -      -      -      -     0.66    -      -     vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+       " -      -     2.00    -      -      -      -     1.00    -      -     vrcp14pd\t%zmm29, %zmm31\n",
+       " -      -     0.34    -     1.00    -      -     0.66    -      -     vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+       " -      -      -      -      -      -      -     1.00    -      -     vfpclasspd\t$30, %zmm31, %k6\n",
+       " -      -     0.52    -      -      -      -     0.48    -      -     vmulpd\t%zmm29, %zmm29, %zmm30\n",
+       " -      -     1.00    -      -      -      -      -      -      -     knotw\t%k6, %k7\n",
+       " -      -     0.47    -      -      -      -     0.53    -      -     vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+       " -      -     0.48    -      -      -      -     0.52    -      -     vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+       " -      -     0.66    -      -      -      -     0.34    -      -     vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+       " -      -      -      -      -      -      -      -     1.00    -     cmpl\t%edx, %ecx\n",
+       " -      -      -      -      -      -      -      -     1.00    -     jb\t..B1.4\n",
+       "\n",
+       "\n",
+       "Timeline view:\n",
+       "                    0123456789          0123456789          0123456789          0123456789          0123456789          0123456789          0123456789          0123456789         \n",
+       "Index     0123456789          0123456789          0123456789          0123456789          0123456789          0123456789          0123456789          0123456789          012345678\n",
+       "\n",
+       "[0,0]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addl\t$32, %ecx\n",
+       "[0,1]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm9, %ymm14\n",
+       "[0,2]     D=eeeeeeeER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm9, %zmm8\n",
+       "[0,3]     D========eeeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm8, %zmm1, %zmm10\n",
+       "[0,4]     D============eeeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm10, %zmm2, %zmm11\n",
+       "[0,5]     .D===============eeeeER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+       "[0,6]     .DeE------------------R  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmovaps\t%zmm0, %zmm29\n",
+       "[0,7]     .D===================eeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm11, %zmm13\n",
+       "[0,8]     . D======================eeeeeeeeeeeER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+       "[0,9]     . D======================eeeeE-------R  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm13, %k0\n",
+       "[0,10]    . D=================================eeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm11, %zmm11, %zmm12\n",
+       "[0,11]    . D==========================eE----------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k0, %k1\n",
+       "[0,12]    . D=================================eeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+       "[0,13]    .  D====================================eeeeER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+       "[0,14]    .  D========================================eeeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+       "[0,15]    .  DeE-------------------------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm14, %ymm20\n",
+       "[0,16]    .  DeeeeeeeE-------------------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm14, %zmm15\n",
+       "[0,17]    .  D=======eeeeE---------------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm15, %zmm1, %zmm16\n",
+       "[0,18]    .   D==========eeeeE-----------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm16, %zmm2, %zmm17\n",
+       "[0,19]    .   D==============eeeeE-------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+       "[0,20]    .   D==================eeeeE---------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm17, %zmm19\n",
+       "[0,21]    .    D=====================eeeeeeeeeeeE----------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+       "[0,22]    .    D======================eeeeE----------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm19, %k2\n",
+       "[0,23]    .    D================================eeeeE------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm17, %zmm17, %zmm18\n",
+       "[0,24]    .    D==========================eE---------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k2, %k3\n",
+       "[0,25]    .    D================================eeeeE------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+       "[0,26]    .    .D===================================eeeeE--R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+       "[0,27]    .    .D=======================================eeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+       "[0,28]    .    .DeE------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm20, %ymm26\n",
+       "[0,29]    .    .DeeeeeeeE------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm20, %zmm21\n",
+       "[0,30]    .    .D=======eeeeE--------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm21, %zmm1, %zmm22\n",
+       "[0,31]    .    . D==========eeeeE----------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm22, %zmm2, %zmm23\n",
+       "[0,32]    .    . D==============eeeeE------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+       "[0,33]    .    . D==================eeeeE--------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm23, %zmm25\n",
+       "[0,34]    .    .  D=====================eeeeeeeeeeeE---------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+       "[0,35]    .    .  D=====================eeeeE----------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm25, %k4\n",
+       "[0,36]    .    .  D================================eeeeE-----R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm23, %zmm23, %zmm24\n",
+       "[0,37]    .    .  D==========================eE--------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k4, %k5\n",
+       "[0,38]    .    .  D================================eeeeE-----R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+       "[0,39]    .    .   D===================================eeeeE-R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+       "[0,40]    .    .   D=======================================eeeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+       "[0,41]    .    .   DeeeeeeeE------------------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm26, %zmm27\n",
+       "[0,42]    .    .   DeE------------------------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm26, %ymm9\n",
+       "[0,43]    .    .   D=======eeeeE--------------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm27, %zmm1, %zmm28\n",
+       "[0,44]    .    .    D=============eeeeE-------------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm28, %zmm2, %zmm8\n",
+       "[0,45]    .    .    D=================eeeeE---------------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+       "[0,46]    .    .    D======================eeeeE----------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm29, %zmm31\n",
+       "[0,47]    .    .    .D=========================eeeeeeeeeeeE-----R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+       "[0,48]    .    .    .D=========================eeeeE------------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm31, %k6\n",
+       "[0,49]    .    .    .D====================================eeeeE-R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm29, %zmm29, %zmm30\n",
+       "[0,50]    .    .    .D==============================eE----------R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k6, %k7\n",
+       "[0,51]    .    .    .D====================================eeeeE-R.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+       "[0,52]    .    .    . D=======================================eeeeER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+       "[0,53]    .    .    . D===========================================eeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+       "[0,54]    .    .    . DeE----------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   cmpl\t%edx, %ecx\n",
+       "[0,55]    .    .    . D=eE---------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   jb\t..B1.4\n",
+       "[1,0]     .    .    . DeE----------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addl\t$32, %ecx\n",
+       "[1,1]     .    .    . DeE----------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm9, %ymm14\n",
+       "[1,2]     .    .    .  D==eeeeeeeE-------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm9, %zmm8\n",
+       "[1,3]     .    .    .  D===============eeeeE---------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm8, %zmm1, %zmm10\n",
+       "[1,4]     .    .    .  D====================eeeeE----------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm10, %zmm2, %zmm11\n",
+       "[1,5]     .    .    .  D=========================eeeeE-----------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+       "[1,6]     .    .    .  DeE---------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmovaps\t%zmm0, %zmm29\n",
+       "[1,7]     .    .    .   D============================eeeeE-------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm11, %zmm13\n",
+       "[1,8]     .    .    .   D================================eeeeeeeeeeeE--R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+       "[1,9]     .    .    .   D================================eeeeE---------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm13, %k0\n",
+       "[1,10]    .    .    .    D==========================================eeeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm11, %zmm11, %zmm12\n",
+       "[1,11]    .    .    .    D====================================eE---------R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k0, %k1\n",
+       "[1,12]    .    .    .    D==========================================eeeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+       "[1,13]    .    .    .    D==============================================eeeeER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+       "[1,14]    .    .    .    D==================================================eeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+       "[1,15]    .    .    .    DeE-----------------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm14, %ymm20\n",
+       "[1,16]    .    .    .    .D===eeeeeeeE-------------------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm14, %zmm15\n",
+       "[1,17]    .    .    .    .D==============eeeeE-----------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm15, %zmm1, %zmm16\n",
+       "[1,18]    .    .    .    .D==================eeeeE-------------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm16, %zmm2, %zmm17\n",
+       "[1,19]    .    .    .    .D======================eeeeE---------------------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+       "[1,20]    .    .    .    . D================================eeeeE----------------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm17, %zmm19\n",
+       "[1,21]    .    .    .    . D====================================eeeeeeeeeeeE-----R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+       "[1,22]    .    .    .    . D=====================================eeeeE-----------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm19, %k2\n",
+       "[1,23]    .    .    .    .  D==============================================eeeeE-R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm17, %zmm17, %zmm18\n",
+       "[1,24]    .    .    .    .  D========================================eE----------R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k2, %k3\n",
+       "[1,25]    .    .    .    .  D==============================================eeeeE-R   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+       "[1,26]    .    .    .    .  D==================================================eeeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+       "[1,27]    .    .    .    .  D======================================================eeeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+       "[1,28]    .    .    .    .  DeE---------------------------------------------------------R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm20, %ymm26\n",
+       "[1,29]    .    .    .    .   D=================================eeeeeeeE-----------------R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm20, %zmm21\n",
+       "[1,30]    .    .    .    .   D========================================eeeeE-------------R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm21, %zmm1, %zmm22\n",
+       "[1,31]    .    .    .    .    D===========================================eeeeE---------R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm22, %zmm2, %zmm23\n",
+       "[1,32]    .    .    .    .    .D==============================================eeeeE-----R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+       "[1,33]    .    .    .    .    . D=================================================eeeeE-R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm23, %zmm25\n",
+       "[1,34]    .    .    .    .    .  D====================================================eeeeeeeeeeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+       "[1,35]    .    .    .    .    .   D===================================================eeeeE-------R .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm25, %k4\n",
+       "[1,36]    .    .    .    .    .    D=============================================================eeeeER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm23, %zmm23, %zmm24\n",
+       "[1,37]    .    .    .    .    .    D======================================================eE----------R  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k4, %k5\n",
+       "[1,38]    .    .    .    .    .    .D============================================================eeeeER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+       "[1,39]    .    .    .    .    .    . D===============================================================eeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+       "[1,40]    .    .    .    .    .    .  D==================================================================eeeeER    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+       "[1,41]    .    .    .    .    .    .  D============================eeeeeeeE-----------------------------------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm26, %zmm27\n",
+       "[1,42]    .    .    .    .    .    .   DeE--------------------------------------------------------------------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm26, %ymm9\n",
+       "[1,43]    .    .    .    .    .    .   D==================================eeeeE-------------------------------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm27, %zmm1, %zmm28\n",
+       "[1,44]    .    .    .    .    .    .    D=====================================eeeeE---------------------------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm28, %zmm2, %zmm8\n",
+       "[1,45]    .    .    .    .    .    .    D===========================================eeeeE---------------------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+       "[1,46]    .    .    .    .    .    .    D===============================================eeeeE-----------------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm29, %zmm31\n",
+       "[1,47]    .    .    .    .    .    .    .D==================================================eeeeeeeeeeeE------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+       "[1,48]    .    .    .    .    .    .    . D=================================================eeeeE-------------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm31, %k6\n",
+       "[1,49]    .    .    .    .    .    .    .  D===========================================================eeeeE--R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm29, %zmm29, %zmm30\n",
+       "[1,50]    .    .    .    .    .    .    .   D=====================================================eE----------R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k6, %k7\n",
+       "[1,51]    .    .    .    .    .    .    .    D==========================================================eeeeE-R    .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+       "[1,52]    .    .    .    .    .    .    .    D==============================================================eeeeER .    .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+       "[1,53]    .    .    .    .    .    .    .    .D=================================================================eeeeER  .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+       "[1,54]    .    .    .    .    .    .    .    .DeE--------------------------------------------------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   cmpl\t%edx, %ecx\n",
+       "[1,55]    .    .    .    .    .    .    .    . DeE-------------------------------------------------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   jb\t..B1.4\n",
+       "[2,0]     .    .    .    .    .    .    .    . DeE-------------------------------------------------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   addl\t$32, %ecx\n",
+       "[2,1]     .    .    .    .    .    .    .    . D=eE------------------------------------------------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm9, %ymm14\n",
+       "[2,2]     .    .    .    .    .    .    .    .  D======================eeeeeeeE--------------------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm9, %zmm8\n",
+       "[2,3]     .    .    .    .    .    .    .    .  D==============================eeeeE---------------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm8, %zmm1, %zmm10\n",
+       "[2,4]     .    .    .    .    .    .    .    .  D===================================eeeeE----------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm10, %zmm2, %zmm11\n",
+       "[2,5]     .    .    .    .    .    .    .    .  D========================================eeeeE-----------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+       "[2,6]     .    .    .    .    .    .    .    .   DeE-----------------------------------------------------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vmovaps\t%zmm0, %zmm29\n",
+       "[2,7]     .    .    .    .    .    .    .    .   D===========================================eeeeE-------------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm11, %zmm13\n",
+       "[2,8]     .    .    .    .    .    .    .    .   D================================================eeeeeeeeeeeE-------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+       "[2,9]     .    .    .    .    .    .    .    .    D================================================eeeeE-------------R  .    .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm13, %k0\n",
+       "[2,10]    .    .    .    .    .    .    .    .    D==========================================================eeeeE---R  .    .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm11, %zmm11, %zmm12\n",
+       "[2,11]    .    .    .    .    .    .    .    .    .D======================================================eE---------R  .    .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k0, %k1\n",
+       "[2,12]    .    .    .    .    .    .    .    .    .D=========================================================eeeeE---R  .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+       "[2,13]    .    .    .    .    .    .    .    .    . D============================================================eeeeER .    .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+       "[2,14]    .    .    .    .    .    .    .    .    . D================================================================eeeeER  .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+       "[2,15]    .    .    .    .    .    .    .    .    .  DeE------------------------------------------------------------------R  .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm14, %ymm20\n",
+       "[2,16]    .    .    .    .    .    .    .    .    .   D==================eeeeeeeE-----------------------------------------R  .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm14, %zmm15\n",
+       "[2,17]    .    .    .    .    .    .    .    .    .   D=========================eeeeE-------------------------------------R  .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm15, %zmm1, %zmm16\n",
+       "[2,18]    .    .    .    .    .    .    .    .    .    D=============================eeeeE--------------------------------R  .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm16, %zmm2, %zmm17\n",
+       "[2,19]    .    .    .    .    .    .    .    .    .    .D=================================eeeeE---------------------------R  .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+       "[2,20]    .    .    .    .    .    .    .    .    .    . D=====================================eeeeE----------------------R  .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm17, %zmm19\n",
+       "[2,21]    .    .    .    .    .    .    .    .    .    . D=========================================eeeeeeeeeeeE-----------R  .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+       "[2,22]    .    .    .    .    .    .    .    .    .    .  D=========================================eeeeE-----------------R  .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm19, %k2\n",
+       "[2,23]    .    .    .    .    .    .    .    .    .    .  D===================================================eeeeE-------R  .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm17, %zmm17, %zmm18\n",
+       "[2,24]    .    .    .    .    .    .    .    .    .    .   D===============================================eE-------------R  .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k2, %k3\n",
+       "[2,25]    .    .    .    .    .    .    .    .    .    .    D=================================================eeeeE-------R  .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+       "[2,26]    .    .    .    .    .    .    .    .    .    .    . D===================================================eeeeE---R  .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+       "[2,27]    .    .    .    .    .    .    .    .    .    .    . D=======================================================eeeeER .    .    .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+       "[2,28]    .    .    .    .    .    .    .    .    .    .    .  DeE---------------------------------------------------------R .    .    .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm20, %ymm26\n",
+       "[2,29]    .    .    .    .    .    .    .    .    .    .    .   D============eeeeeeeE--------------------------------------R .    .    .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm20, %zmm21\n",
+       "[2,30]    .    .    .    .    .    .    .    .    .    .    .   D====================eeeeE---------------------------------R .    .    .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm21, %zmm1, %zmm22\n",
+       "[2,31]    .    .    .    .    .    .    .    .    .    .    .    D=========================eeeeE---------------------------R .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm22, %zmm2, %zmm23\n",
+       "[2,32]    .    .    .    .    .    .    .    .    .    .    .    .D=============================eeeeE----------------------R .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+       "[2,33]    .    .    .    .    .    .    .    .    .    .    .    . D==================================eeeeE----------------R .    .    .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm23, %zmm25\n",
+       "[2,34]    .    .    .    .    .    .    .    .    .    .    .    .  D=====================================eeeeeeeeeeeE-----R .    .    .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+       "[2,35]    .    .    .    .    .    .    .    .    .    .    .    .  D======================================eeeeE-----------R .    .    .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm25, %k4\n",
+       "[2,36]    .    .    .    .    .    .    .    .    .    .    .    .   D===============================================eeeeE-R .    .    .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm23, %zmm23, %zmm24\n",
+       "[2,37]    .    .    .    .    .    .    .    .    .    .    .    .    D========================================eE----------R .    .    .    .    .    .    .    .    .    .    .  .   knotw\t%k4, %k5\n",
+       "[2,38]    .    .    .    .    .    .    .    .    .    .    .    .    .D==============================================eeeeER .    .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+       "[2,39]    .    .    .    .    .    .    .    .    .    .    .    .    . D=================================================eeeeER  .    .    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+       "[2,40]    .    .    .    .    .    .    .    .    .    .    .    .    .  D====================================================eeeeER   .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+       "[2,41]    .    .    .    .    .    .    .    .    .    .    .    .    .   D======eeeeeeeE------------------------------------------R   .    .    .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm26, %zmm27\n",
+       "[2,42]    .    .    .    .    .    .    .    .    .    .    .    .    .   DeE------------------------------------------------------R   .    .    .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm26, %ymm9\n",
+       "[2,43]    .    .    .    .    .    .    .    .    .    .    .    .    .    D===============eeeeE-----------------------------------R   .    .    .    .    .    .    .    .    .  .   vaddpd\t%zmm27, %zmm1, %zmm28\n",
+       "[2,44]    .    .    .    .    .    .    .    .    .    .    .    .    .    D========================eeeeE--------------------------R   .    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm28, %zmm2, %zmm8\n",
+       "[2,45]    .    .    .    .    .    .    .    .    .    .    .    .    .    D============================eeeeE----------------------R   .    .    .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+       "[2,46]    .    .    .    .    .    .    .    .    .    .    .    .    .    .D======================================eeeeE-----------R   .    .    .    .    .    .    .    .    .  .   vrcp14pd\t%zmm29, %zmm31\n",
+       "[2,47]    .    .    .    .    .    .    .    .    .    .    .    .    .    . D=========================================eeeeeeeeeeeER   .    .    .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+       "[2,48]    .    .    .    .    .    .    .    .    .    .    .    .    .    .  D=========================================eeeeE------R   .    .    .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm31, %k6\n",
+       "[2,49]    .    .    .    .    .    .    .    .    .    .    .    .    .    .  D===================================================eeeeER    .    .    .    .    .    .    .    .  .   vmulpd\t%zmm29, %zmm29, %zmm30\n",
+       "[2,50]    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D============================================eE---------R    .    .    .    .    .    .    .    .  .   knotw\t%k6, %k7\n",
+       "[2,51]    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D==================================================eeeeER    .    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+       "[2,52]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D=====================================================eeeeER.    .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+       "[2,53]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D========================================================eeeeER .    .    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+       "[2,54]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . DeE----------------------------------------------------------R .    .    .    .    .    .    .  .   cmpl\t%edx, %ecx\n",
+       "[2,55]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  DeE---------------------------------------------------------R .    .    .    .    .    .    .  .   jb\t..B1.4\n",
+       "[3,0]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  DeE---------------------------------------------------------R .    .    .    .    .    .    .  .   addl\t$32, %ecx\n",
+       "[3,1]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   DeE--------------------------------------------------------R .    .    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm9, %ymm14\n",
+       "[3,2]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D==eeeeeeeE------------------------------------------------R .    .    .    .    .    .    .  .   vcvtdq2pd\t%ymm9, %zmm8\n",
+       "[3,3]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D=========eeeeE--------------------------------------------R .    .    .    .    .    .    .  .   vaddpd\t%zmm8, %zmm1, %zmm10\n",
+       "[3,4]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D================eeeeE-------------------------------------R .    .    .    .    .    .    .  .   vmulpd\t%zmm10, %zmm2, %zmm11\n",
+       "[3,5]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D===================eeeeE---------------------------------R .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+       "[3,6]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    DeE-------------------------------------------------------R .    .    .    .    .    .    .  .   vmovaps\t%zmm0, %zmm29\n",
+       "[3,7]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D===================================eeeeE-----------------R .    .    .    .    .    .    .  .   vrcp14pd\t%zmm11, %zmm13\n",
+       "[3,8]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D======================================eeeeeeeeeeeE------R .    .    .    .    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+       "[3,9]     .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D=======================================eeeeE------------R .    .    .    .    .    .    .  .   vfpclasspd\t$30, %zmm13, %k0\n",
+       "[3,10]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D=================================================eeeeE--R .    .    .    .    .    .    .  .   vmulpd\t%zmm11, %zmm11, %zmm12\n",
+       "[3,11]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D===========================================eE----------R .    .    .    .    .    .    .  .   knotw\t%k0, %k1\n",
+       "[3,12]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  D===============================================eeeeE--R .    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+       "[3,13]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D==================================================eeeeER    .    .    .    .    .    .  .   vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+       "[3,14]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D=====================================================eeeeER.    .    .    .    .    .  .   vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+       "[3,15]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    DeE--------------------------------------------------------R.    .    .    .    .    .  .   vpaddd\t%ymm5, %ymm14, %ymm20\n",
+       "[3,16]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D===============================eeeeeeeE------------------R.    .    .    .    .    .  .   vcvtdq2pd\t%ymm14, %zmm15\n",
+       "[3,17]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D=======================================eeeeE-------------R.    .    .    .    .    .  .   vaddpd\t%zmm15, %zmm1, %zmm16\n",
+       "[3,18]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D===========================================eeeeE---------R.    .    .    .    .    .  .   vmulpd\t%zmm16, %zmm2, %zmm17\n",
+       "[3,19]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D==============================================eeeeE-----R.    .    .    .    .    .  .   vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+       "[3,20]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D==================================================eeeeE-R.    .    .    .    .    .  .   vrcp14pd\t%zmm17, %zmm19\n",
+       "[3,21]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  D=====================================================eeeeeeeeeeeER.    .    .    .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+       "[3,22]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D=====================================================eeeeE------R.    .    .    .  .   vfpclasspd\t$30, %zmm19, %k2\n",
+       "[3,23]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D==============================================================eeeeER .    .    .  .   vmulpd\t%zmm17, %zmm17, %zmm18\n",
+       "[3,24]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D=======================================================eE---------R .    .    .  .   knotw\t%k2, %k3\n",
+       "[3,25]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D============================================================eeeeER .    .    .  .   vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+       "[3,26]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D================================================================eeeeER  .    .  .   vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+       "[3,27]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  D===================================================================eeeeER   .  .   vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+       "[3,28]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  DeE----------------------------------------------------------------------R   .  .   vpaddd\t%ymm5, %ymm20, %ymm26\n",
+       "[3,29]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D===========================eeeeeeeE------------------------------------R   .  .   vcvtdq2pd\t%ymm20, %zmm21\n",
+       "[3,30]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D==================================eeeeE--------------------------------R   .  .   vaddpd\t%zmm21, %zmm1, %zmm22\n",
+       "[3,31]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D======================================eeeeE----------------------------R   .  .   vmulpd\t%zmm22, %zmm2, %zmm23\n",
+       "[3,32]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D=========================================eeeeE------------------------R   .  .   vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+       "[3,33]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D=============================================eeeeE--------------------R   .  .   vrcp14pd\t%zmm23, %zmm25\n",
+       "[3,34]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D================================================eeeeeeeeeeeE---------R   .  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+       "[3,35]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D=================================================eeeeE---------------R   .  .   vfpclasspd\t$30, %zmm25, %k4\n",
+       "[3,36]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D==========================================================eeeeE-----R   .  .   vmulpd\t%zmm23, %zmm23, %zmm24\n",
+       "[3,37]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  D====================================================eE-------------R   .  .   knotw\t%k4, %k5\n",
+       "[3,38]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D========================================================eeeeE-----R   .  .   vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+       "[3,39]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D============================================================eeeeE-R   .  .   vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+       "[3,40]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D===============================================================eeeeER.  .   vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+       "[3,41]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D======================eeeeeeeE--------------------------------------R.  .   vcvtdq2pd\t%ymm26, %zmm27\n",
+       "[3,42]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .DeE-----------------------------------------------------------------R.  .   vpaddd\t%ymm5, %ymm26, %ymm9\n",
+       "[3,43]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D============================eeeeE----------------------------------R.  .   vaddpd\t%zmm27, %zmm1, %zmm28\n",
+       "[3,44]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D===============================eeeeE------------------------------R.  .   vmulpd\t%zmm28, %zmm2, %zmm8\n",
+       "[3,45]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D=====================================eeeeE------------------------R.  .   vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+       "[3,46]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D=========================================eeeeE--------------------R.  .   vrcp14pd\t%zmm29, %zmm31\n",
+       "[3,47]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  D============================================eeeeeeeeeeeE---------R.  .   vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+       "[3,48]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D===========================================eeeeE----------------R.  .   vfpclasspd\t$30, %zmm31, %k6\n",
+       "[3,49]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   D======================================================eeeeE-----R.  .   vmulpd\t%zmm29, %zmm29, %zmm30\n",
+       "[3,50]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D==============================================eE---------------R.  .   knotw\t%k6, %k7\n",
+       "[3,51]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    D======================================================eeeeE----R.  .   vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+       "[3,52]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .D=========================================================eeeeER.  .   vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+       "[3,53]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    . D============================================================eeeeER   vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+       "[3,54]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  DeE--------------------------------------------------------------R   cmpl\t%edx, %ecx\n",
+       "[3,55]    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   DeE-------------------------------------------------------------R   jb\t..B1.4\n",
+       "\n",
+       "\n",
+       "Average Wait times (based on the timeline view):\n",
+       "[0]: Executions\n",
+       "[1]: Average time spent waiting in a scheduler's queue\n",
+       "[2]: Average time spent waiting in a scheduler's queue while ready\n",
+       "[3]: Average time elapsed from WB until retire stage\n",
+       "\n",
+       "      [0]    [1]    [2]    [3]\n",
+       "0.     4     1.0    1.0    42.5      addl\t$32, %ecx\n",
+       "1.     4     1.3    1.3    42.0      vpaddd\t%ymm5, %ymm9, %ymm14\n",
+       "2.     4     7.8    7.8    30.8      vcvtdq2pd\t%ymm9, %zmm8\n",
+       "3.     4     16.5   1.8    26.0      vaddpd\t%zmm8, %zmm1, %zmm10\n",
+       "4.     4     21.8   1.3    21.8      vmulpd\t%zmm10, %zmm2, %zmm11\n",
+       "5.     4     25.8   0.5    18.3      vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
+       "6.     4     1.0    1.0    45.8      vmovaps\t%zmm0, %zmm29\n",
+       "7.     4     32.3   3.0    12.3      vrcp14pd\t%zmm11, %zmm13\n",
+       "8.     4     36.0   0.3    3.8       vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
+       "9.     4     36.3   0.8    10.3      vfpclasspd\t$30, %zmm13, %k0\n",
+       "10.    4     46.5   0.0    1.3       vmulpd\t%zmm11, %zmm11, %zmm12\n",
+       "11.    4     40.8   1.3    9.5       knotw\t%k0, %k1\n",
+       "12.    4     45.8   0.0    1.3       vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
+       "13.    4     49.0   0.0    0.0       vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
+       "14.    4     52.8   0.0    0.0       vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
+       "15.    4     1.0    1.0    54.5      vpaddd\t%ymm5, %ymm14, %ymm20\n",
+       "16.    4     14.0   14.0   34.8      vcvtdq2pd\t%ymm14, %zmm15\n",
+       "17.    4     22.3   1.3    29.5      vaddpd\t%zmm15, %zmm1, %zmm16\n",
+       "18.    4     26.0   0.3    25.3      vmulpd\t%zmm16, %zmm2, %zmm17\n",
+       "19.    4     29.8   0.3    21.0      vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
+       "20.    4     35.3   2.0    15.0      vrcp14pd\t%zmm17, %zmm19\n",
+       "21.    4     38.8   0.0    6.5       vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
+       "22.    4     39.3   1.0    12.5      vfpclasspd\t$30, %zmm19, %k2\n",
+       "23.    4     48.8   0.0    3.5       vmulpd\t%zmm17, %zmm17, %zmm18\n",
+       "24.    4     43.0   0.8    11.8      knotw\t%k2, %k3\n",
+       "25.    4     47.8   0.0    3.5       vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
+       "26.    4     51.0   0.0    1.3       vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
+       "27.    4     54.8   0.0    0.0       vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
+       "28.    4     1.0    1.0    56.5      vpaddd\t%ymm5, %ymm20, %ymm26\n",
+       "29.    4     19.0   19.0   31.8      vcvtdq2pd\t%ymm20, %zmm21\n",
+       "30.    4     26.3   0.3    27.5      vaddpd\t%zmm21, %zmm1, %zmm22\n",
+       "31.    4     30.0   0.5    23.0      vmulpd\t%zmm22, %zmm2, %zmm23\n",
+       "32.    4     33.5   0.3    18.8      vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
+       "33.    4     37.5   0.5    14.3      vrcp14pd\t%zmm23, %zmm25\n",
+       "34.    4     40.5   0.0    5.8       vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
+       "35.    4     40.8   0.5    12.3      vfpclasspd\t$30, %zmm25, %k4\n",
+       "36.    4     50.5   0.0    2.8       vmulpd\t%zmm23, %zmm23, %zmm24\n",
+       "37.    4     44.0   0.5    11.8      knotw\t%k4, %k5\n",
+       "38.    4     49.5   0.3    2.5       vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
+       "39.    4     52.8   0.0    0.5       vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
+       "40.    4     56.0   0.0    0.0       vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
+       "41.    4     15.0   15.0   37.8      vcvtdq2pd\t%ymm26, %zmm27\n",
+       "42.    4     1.0    1.0    57.3      vpaddd\t%ymm5, %ymm26, %ymm9\n",
+       "43.    4     22.0   0.8    33.0      vaddpd\t%zmm27, %zmm1, %zmm28\n",
+       "44.    4     27.3   2.0    27.0      vmulpd\t%zmm28, %zmm2, %zmm8\n",
+       "45.    4     32.3   1.0    22.0      vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
+       "46.    4     38.0   2.0    16.0      vrcp14pd\t%zmm29, %zmm31\n",
+       "47.    4     41.0   0.0    5.0       vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
+       "48.    4     40.5   0.3    11.8      vfpclasspd\t$30, %zmm31, %k6\n",
+       "49.    4     51.0   0.0    2.0       vmulpd\t%zmm29, %zmm29, %zmm30\n",
+       "50.    4     44.3   0.8    11.0      knotw\t%k6, %k7\n",
+       "51.    4     50.5   0.5    1.5       vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
+       "52.    4     53.8   0.0    0.0       vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
+       "53.    4     57.0   0.0    0.0       vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
+       "54.    4     1.0    1.0    58.5      cmpl\t%edx, %ecx\n",
+       "55.    4     1.3    0.0    57.5      jb\t..B1.4\n",
+       "       4     32.5   1.6    18.4      \n",
+       "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Intel(R) Architecture Code Analyzer Version -  v3.0-28-g1ba2cbb build date: 2017-10-30;16:57:45\n",
+       "Analyzed File -  build/SKX/icc/O3/pi.marked.o\n",
+       "Binary Format - 64Bit\n",
+       "Architecture  -  SKX\n",
+       "Analysis Type - Throughput\n",
+       "\n",
+       "Throughput Analysis Report\n",
+       "--------------------------\n",
+       "Block Throughput: 31.50 Cycles       Throughput Bottleneck: Backend\n",
+       "Loop Count:  103\n",
+       "Port Binding In Cycles Per Iteration:\n",
+       "--------------------------------------------------------------------------------------------------\n",
+       "|  Port  |   0   -  DV   |   1   |   2   -  D    |   3   -  D    |   4   |   5   |   6   |   7   |\n",
+       "--------------------------------------------------------------------------------------------------\n",
+       "| Cycles | 30.0     0.0  |  4.0  |  2.0     2.0  |  2.0     2.0  |  0.0  | 30.0  |  1.0  |  0.0  |\n",
+       "--------------------------------------------------------------------------------------------------\n",
+       "\n",
+       "DV - Divider pipe (on port 0)\n",
+       "D - Data fetch pipe (on ports 2 and 3)\n",
+       "F - Macro Fusion with the previous instruction occurred\n",
+       "* - instruction micro-ops not bound to a port\n",
+       "^ - Micro Fusion occurred\n",
+       "# - ESP Tracking sync uop was issued\n",
+       "@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected\n",
+       "X - instruction not supported, was not accounted in Analysis\n",
+       "\n",
+       "| Num Of   |                    Ports pressure in cycles                         |      |\n",
+       "|  Uops    |  0  - DV    |  1   |  2  -  D    |  3  -  D    |  4   |  5   |  6   |  7   |\n",
+       "-----------------------------------------------------------------------------------------\n",
+       "|   1      |             |      |             |             |      |      | 1.0  |      | add ecx, 0x20\n",
+       "|   1      |             | 1.0  |             |             |      |      |      |      | vpaddd ymm14, ymm9, ymm5\n",
+       "|   2      | 1.0         |      |             |             |      | 1.0  |      |      | vcvtdq2pd zmm8, ymm9\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vaddpd zmm10, zmm1, zmm8\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm11, zmm2, zmm10\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm11, zmm11, zmm0\n",
+       "|   1*     |             |      |             |             |      |      |      |      | vmovaps zmm29, zmm0\n",
+       "|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm13, zmm11\n",
+       "|   2^     |             |      | 1.0     1.0 |             |      | 1.0  |      |      | vfnmadd213pd zmm11, zmm13, qword ptr [rip]{1to8}\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfpclasspd k0, zmm13, 0x1e\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm12, zmm11, zmm11\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | knotw k1, k0\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm13{k1}, zmm11, zmm13\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd213pd zmm13{k1}, zmm12, zmm13\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm6, zmm13, zmm4\n",
+       "|   1      |             | 1.0  |             |             |      |      |      |      | vpaddd ymm20, ymm14, ymm5\n",
+       "|   2      | 1.0         |      |             |             |      | 1.0  |      |      | vcvtdq2pd zmm15, ymm14\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vaddpd zmm16, zmm1, zmm15\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm17, zmm2, zmm16\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd213pd zmm17, zmm17, zmm0\n",
+       "|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm19, zmm17\n",
+       "|   2^     |             |      |             | 1.0     1.0 |      | 1.0  |      |      | vfnmadd213pd zmm17, zmm19, qword ptr [rip]{1to8}\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfpclasspd k2, zmm19, 0x1e\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm18, zmm17, zmm17\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | knotw k3, k2\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm19{k3}, zmm17, zmm19\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm19{k3}, zmm18, zmm19\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm3, zmm19, zmm4\n",
+       "|   1      |             | 1.0  |             |             |      |      |      |      | vpaddd ymm26, ymm20, ymm5\n",
+       "|   2      | 1.0         |      |             |             |      | 1.0  |      |      | vcvtdq2pd zmm21, ymm20\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vaddpd zmm22, zmm1, zmm21\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm23, zmm2, zmm22\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm23, zmm23, zmm0\n",
+       "|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm25, zmm23\n",
+       "|   2^     |             |      | 1.0     1.0 |             |      | 1.0  |      |      | vfnmadd213pd zmm23, zmm25, qword ptr [rip]{1to8}\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfpclasspd k4, zmm25, 0x1e\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm24, zmm23, zmm23\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | knotw k5, k4\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm25{k5}, zmm23, zmm25\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd213pd zmm25{k5}, zmm24, zmm25\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm6, zmm25, zmm4\n",
+       "|   2      | 1.0         |      |             |             |      | 1.0  |      |      | vcvtdq2pd zmm27, ymm26\n",
+       "|   1      |             | 1.0  |             |             |      |      |      |      | vpaddd ymm9, ymm26, ymm5\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vaddpd zmm28, zmm1, zmm27\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm8, zmm2, zmm28\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm29, zmm8, zmm8\n",
+       "|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm31, zmm29\n",
+       "|   2^     |             |      |             | 1.0     1.0 |      | 1.0  |      |      | vfnmadd213pd zmm29, zmm31, qword ptr [rip]{1to8}\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfpclasspd k6, zmm31, 0x1e\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm30, zmm29, zmm29\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | knotw k7, k6\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm31{k7}, zmm29, zmm31\n",
+       "|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd213pd zmm31{k7}, zmm30, zmm31\n",
+       "|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm3, zmm31, zmm4\n",
+       "|   1*     |             |      |             |             |      |      |      |      | cmp ecx, edx\n",
+       "|   0*F    |             |      |             |             |      |      |      |      | jb 0xfffffffffffffeb3\n",
+       "Total Num Of Uops: 71\n",
+       "Analysis Notes:\n",
+       "Backend allocation was stalled due to unavailable allocation resources.\n",
+       "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -307,7 +973,8 @@ " for l in r['analyzed kernel']\n", " if l['instruction']]))\n", "for a in archs:\n", - " print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')" + " print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')\n", + "get_info((\"SKX\", \"icc\", \"O3\", \"pi\"))" ] }, { @@ -343,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 25, "metadata": { "hideCode": false, "hidePrompt": false,