Merge pull request #12 from RRZE-HPC/dev

Dev
This commit is contained in:
Jan
2018-09-02 14:25:10 +02:00
committed by GitHub
7 changed files with 382 additions and 116 deletions

View File

@@ -25,10 +25,11 @@ Getting started
Installation
~~~~~~~~~~~~
.. On most systems with python pip and setuputils installed, just run:
.. ::
pip install --user osaca
.. for the latest release.
On most systems with python pip and setuputils installed, just run:
::
pip install --user osaca
for the latest release.
To build OSACA from source, clone this repository using ``git clone https://github.com/RRZE-HPC/OSACA`` and run in the root directory:
.. code:: bash
@@ -64,7 +65,7 @@ The usage of OSACA can be listed as:
- ``-h`` or ``--help`` prints out the help message.
- ``-V`` or ``--version`` shows the programs version number.
- ``ARCH`` needs to be replaced with the wished architecture abbreviation. This flag is necessary for the throughput analysis (default function) and the inclusion of an ibench output (``-i``). Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW`` and ``SKL`` for the latest Intel micro architectures starting from Intel Sandy Bridge.
- ``ARCH`` needs to be replaced with the wished architecture abbreviation. This flag is necessary for the throughput analysis (default function) and the inclusion of an ibench output (``-i``). Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW`` and ``SKL`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN`` for AMD Zen (17h family) architecture .
- While in the throughput analysis mode, one can add ``--tp-list`` for printing the additional throughput list of the kernel or ``--iaca`` for letting OSACA to know it has to search for IACA binary markers.
- ``-i`` or ``--include-ibench`` starts the integration of ibench output into the CSV data file determined by ``ARCH``.
- With the flag ``-m`` or ``--insert-marker`` OSACA calls the Kerncraft module for the interactively insertion of `IACA <https://software.intel.com/en-us/articles/intel-architecture-code-analyzer>`_ marker in suggested assembly blocks.
@@ -74,7 +75,7 @@ Hereinafter OSACA's scope of function will be described.
Throughput analysis
~~~~~~~~~~~~~~~~~~~
As main functionality of OSACA this process starts by default. It is always necessary to specify the core architecture by the flag ``--arch ARCH``, where ``ARCH`` can stand for ``SNB``, ``IVB``, ``HSW``, ``BDW`` or ``SKL``.
As main functionality of OSACA this process starts by default. It is always necessary to specify the core architecture by the flag ``--arch ARCH``, where ``ARCH`` can stand for ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKL`` or ``ZEN``.
For extracting the right kernel, one has to mark it beforehand. For this there are two different approaches:

View File

@@ -1 +1,2 @@
__version__ = '0.1'
name = "osaca"
__version__ = '0.2.0'

View File

@@ -1,67 +1,153 @@
instr,TP,LT,ports
jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)"
mov-mem_r64,1.0,2.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
vmovupd-mem_ymm,1.0,3.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
mov-mem_r32,1.0,2.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
add-r64_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
add-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
vxorps-xmm_xmm_xmm,1.0,1.0,"(0,0,0,0,0,1.0,0,0)"
vaddsd-xmm_xmm_mem,0.5,4.0,"(0.5,0.5,0,0,0,0,0,0)"
mov-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
and-r32_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
cmp-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
mov-r64_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
vcvtsi2ss-xmm_xmm_r32,2.0,4.0,"(2.0,2.0,0,0,0,2.0,0,0)"
vmovsd-xmm_mem,0.5,3.0,"(0,0,0.5,0.5,0,0,0,0)"
vaddsd-xmm_xmm_xmm,0.5,4.0,"(0.5,0.5,0,0,0,0,0,0)"
shl-r64_imd,0.5,1.0,"(0.5,0,0,0,0,0,0.5,0)"
vmovsd-mem_xmm,1.0,3.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
add-r32_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5,0.5,0.5,0.5,0,0,0,0)"
movslq-r64_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5,0.5,0,0,0,0,0,0)"
mov-r64_mem,0.5,2.0,"(0,0,0.5,0.5,0,0,0,0)"
xor-r32_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
cmp-r32_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
mov-r32_mem,0.5,2.0,"(0,0,0.5,0.5,0,0,0,0)"
add-mem_imd,1.0,5.0,"(0.5,0.5,0.6666666666666666,0.6666666666666666,1.0,0.5,0.5,0.6666666666666666)"
movsx-r64_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
mov-mem_imd,1.0,2.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
mov-r32_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
sub-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)"
jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)"
addl-r32_mem,0.5,1.0,"(-1,)"
add-mem_imd,1.0,5.0,"(0.5, 0,0.5,0.6666666666666666,0.6666666666666666,1.0,0.5,0.5,0.6666666666666666)"
addpd-xmm_mem,1.0,4.0,"(-1,)"
addq-r64_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
addq-r64_mem,0.5,1.0,"(-1,)"
add-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
addl-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
add-r32_r32,0.25,-1.0,"(-1,)"
add-r64_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
add-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
addsd-xmm_mem,1.0,4.0,"(-1,)"
addsd-xmm_xmm,1.0,4.0,"(-1,)"
and-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
cmpl-r32_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
cmpq-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
cmp-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
cmp-r32_mem,0.5,-1.0,"(0.25, 0, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)"
cmpl-mem_r32,0.5,-1.0,"(0.25, 0, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)"
cmp-r32_r32,0.25,1.0,"(-1,)"
cmp-r64_imd,0.25,1.0,"(-1,)"
cmp-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
imulq-r64_r64_imd,1.0,3.0,"(-1,)"
imul-r64_r64,1.0,3.0,"(-1,)"
incq-r64,0.25,1.0,"(0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)"
incl-r32,0.25,1.0,"(0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)"
inc-r64,0.25,1.0,"(-1,)"
lea-r32_mem,1.0,-1.0,"(-1,)"
lea-r64_mem-,1.0,-1.0,"(-1,)"
lea-r64_mem,1.0,1.0,"(-1,)"
movl-mem_r32,1.0,2.0,"(-1,)"
movl-r32_imd,0.25,1.0,"(-1,)"
movl-r32_mem,0.5,2.0,"(-1,)"
mov-mem_imd,1.0,2.0,"(0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
mov-mem_r32,1.0,2.0,"(0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
mov-mem_r64,1.0,2.0,"(0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)"
movq_r64_xmm,1.0,-1.0,"(-1,)"
mov-r32_imd,0.25,1.0,"(-1,)"
mov-r32_mem,0.5,2.0,"(0, 0,0,0.5,0.5,0,0,0,0)"
mov-r32_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
mov-r64_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
mov_r64_mem,0.5,-1.0,"(-1,)"
mov-r64_mem,0.5,2.0,"(0, 0,0,0.5,0.5,0,0,0,0)"
mov-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
movsd-mem_xmm,1.0,3.0,"(-1,)"
movsd-xmm_mem,0.5,3.0,"(-1,)"
movslq-r64_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
movsx-r64_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
movups-mem_xmm,1.0,3.0,"(-1,)"
movups-xmm_mem,0.5,2.0,"(-1,)"
mulpd-xmm_mem,1.0,4.0,"(-1,)"
mulsd-xmm_mem,1.0,4.0,"(-1,)"
mulsd-xmm_xmm,1.0,4.0,"(-1,)"
prefetcht0-mem,0.5,-1.0,"(-1,)"
prefetchw-mem,1.0,-1.0,"(-1,)"
shl-r64_imd,0.5,1.0,"(0.5, 0,0,0,0,0,0,0.5,0)"
sub-r32_imd,0.25,1.0,"(-1,)"
sub-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
vaddpd-ymm_ymm_mem,0.5,4.0,"(-1,)"
vaddps-ymm_ymm_ymm,0.5,4.0,"(-1,)"
vaddsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0,0.5,0.5,0.5,0,0,0,0)"
vaddsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0,0.5,0,0,0,0,0,0)"
vaddss-xmm_xmm_xmm,0.5,4.0,"(-1,)"
vcvtsi2ss-xmm_xmm_r32,1.0,-1.0,"(0.5, 0,0.5,0,0,0,1.0,0,0)"
vcvtss2si-r32_xmm,1.0,-1.0,"(-1,)"
vfmadd132pd-xmm_xmm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)"
vfmadd132sd-xmm_xmm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)"
vfmadd213pd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vfmadd213pd-ymm_ymm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)"
vfmadd132pd-ymm_ymm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)"
vfmadd213pd-ymm_ymm_ymm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vfmadd213ps-xmm_xmm_xmm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vfmadd213sd,0.5,5.0,"(-1,)"
vfmadd213ss,0.5,5.0,"(-1,)"
vinsertf128-ymm_ymm_imd,1.0,3.0,"(-1,)"
vmovapd-mem_ymm,1.0,-1.0,"(0, 0, 0, 0.5, 0.5, 1.0, 0, 0, 0)"
vmovapd-ymm_mem,0.5,-1.0,"(0, 0, 0, 0.5, 0.5, 0, 0, 0, 0)"
vmovaps-mem_xmm,1.0,3.0,"(0, 0, 0, 0.5, 0.5, 1.0, 0, 0, 0)"
vmovaps-xmm_mem,0.5,2.0,"(0, 0, 0, 0.5, 0.5, 0, 0, 0, 0)"
vmovsd-mem_xmm,1.0,3.0,"(0, 0,0,0.5,0.5,1.0,0,0,0)"
vmovsd-xmm_mem,0.5,3.0,"(0, 0,0,0.5,0.5,0,0,0,0)"
vmovupd-mem_ymm,1.0,3.0,"(0, 0,0,0.5,0.5,1.0,0,0,0)"
vmovupd-ymm_mem,0.5,-1.0,"(-1,)"
vmulpd-ymm_ymm_ymm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vmulps-ymm_ymm_ymm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0,0.5,0.5,0.5,0,0,0,0)"
vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0,0.5,0,0,0,0,0,0)"
vmulss-xmm_xmm_xmm,0.5,4.0,"(-1,)"
vrcpps-avx,1.0,4.0,"(-1,)"
vsqrtpd-avx,12.0,21.0,"(-1,)"
vsqrtps-avx,6.0,16.0,"(-1,)"
vsubpd-ymm_ymm_mem,0.5,4.0,"(-1,)"
vsubsd-xmm_xmm_mem,0.5,4.0,"(-1,)"
vsubsd-xmm_xmm_xmm,0.5,4.0,"(-1,)"
vsubss-xmm_xmm_xmm,0.5,4.0,"(-1,)"
vxorps-xmm_xmm_xmm,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
xor-r32_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)"
cmpl-r32_imd,0.25,1.0,"(0.25, 0, 0.25,0,0,0,0.25,0.25,0)"
vaddpd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vaddpd-ymm_ymm_ymm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vcvtdq2pd-xmm_xmm,1.0,5,"(1.0, 0, 0, 0, 0, 0, 1.0, 0, 0)"
vcvtdq2pd-ymm_xmm,1.0,7,"(1.0, 0, 0, 0, 0, 0, 1.0, 0, 0)"
vcvtsi2sd-xmm_xmm_r32,1.0,4,"(0.5, 0, 0.5, 0, 0, 0, 1.0, 0, 0)"
vextracti128-xmm_ymm_imd,1.0,3,"(0, 0, 0, 0, 0, 0, 1.0, 0, 0)"
vfmadd132pd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vfmadd132pd-ymm_ymm_ymm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vfmadd132sd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vmulpd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)"
vpaddd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0, 0.33, 0, 0, 0, 0.33, 0, 0)"
vpaddd-ymm_ymm_ymm,0.3333333333333333,1,"(0.33, 0, 0.33, 0, 0, 0, 0.33, 0, 0)"
vpshufd-xmm_xmm_imd,1.0,1,"(0, 0, 0, 0, 0, 0, 1.0, 0, 0)"
vxorpd-xmm_xmm_xmm,0.25,1,"(0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)"
vdivpd-ymm_ymm_ymm,8.0,14.0,"(1.0, 8.0, 0, 0, 0, 0, 0, 0, 0)"
vdivps-ymm_ymm_ymm,5.0,11.0,"(1.0, 5.0, 0, 0, 0, 0, 0, 0, 0)"
vdivpd-xmm_xmm_xmm,4.0,13,"(1.0, 4.0, 0, 0, 0, 0, 0, 0, 0)"
vdivsd-xmm_xmm_xmm,4.0,13,"(1.0, 4.0, 0, 0, 0, 0, 0, 0, 0)"
1 instr TP LT ports
2 jmp-lbl jae-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
3 jo-lbl ja-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
4 jno-lbl jbe-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
5 js-lbl jb-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
6 jns-lbl jc-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
7 je-lbl jcxz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
8 jz-lbl jecxz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
9 jne-lbl je-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
10 jnz-lbl jge-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
11 jb-lbl jg-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
12 jnae-lbl jle-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
13 jc-lbl jl-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
14 jnb-lbl jmp-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
15 jae-lbl jnae-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
16 jnc-lbl jna-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
17 jbe-lbl jnbe-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
18 jna-lbl jnb-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
19 ja-lbl jnc-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
20 jnbe-lbl jne-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
21 jl-lbl jnge-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
22 jnge-lbl jng-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
23 jge-lbl jnle-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
24 jnl-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
25 jle-lbl jno-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
26 jng-lbl jno-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
27 jg-lbl jnp-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
28 jnle-lbl jns-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
29 jp-lbl jns-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
30 jpe-lbl jnz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
31 jnp-lbl jo-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
32 jpo-lbl jo-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
33 jcxz-lbl jpe-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
34 jecxz-lbl jp-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
35 jo-lbl jpo-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
36 jno-lbl js-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
37 js-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
38 jns-lbl jz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0.0, 0, 0) (0, 0, 0, 0, 0, 0, 0.0, 0, 0)
39 mov-mem_r64 addl-r32_mem 1.0 0.5 2.0 1.0 (0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333) (-1,)
40 vmovupd-mem_ymm add-mem_imd 1.0 3.0 5.0 (0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333) (0.5, 0,0.5,0.6666666666666666,0.6666666666666666,1.0,0.5,0.5,0.6666666666666666)
41 mov-mem_r32 addpd-xmm_mem 1.0 2.0 4.0 (0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333) (-1,)
42 add-r64_imd addq-r64_imd 0.25 1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
43 add-r64_r64 addq-r64_mem 0.25 0.5 1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
44 vxorps-xmm_xmm_xmm add-r32_imd 1.0 0.25 1.0 (0,0,0,0,0,1.0,0,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
45 vaddsd-xmm_xmm_mem addl-r32_imd 0.5 0.25 4.0 1.0 (0.5,0.5,0,0,0,0,0,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
46 mov-r64_r64 add-r32_r32 0.25 1.0 -1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
47 and-r32_imd add-r64_imd 0.25 1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
48 cmp-r64_r64 add-r64_r64 0.25 1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
49 mov-r64_imd addsd-xmm_mem 0.25 1.0 1.0 4.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
50 vcvtsi2ss-xmm_xmm_r32 addsd-xmm_xmm 2.0 1.0 4.0 (2.0,2.0,0,0,0,2.0,0,0) (-1,)
51 vmovsd-xmm_mem and-r32_imd 0.5 0.25 3.0 1.0 (0,0,0.5,0.5,0,0,0,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
52 vaddsd-xmm_xmm_xmm cmpl-r32_r32 0.5 0.25 4.0 1.0 (0.5,0.5,0,0,0,0,0,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
53 shl-r64_imd cmpq-r64_r64 0.5 0.25 1.0 (0.5,0,0,0,0,0,0.5,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
54 vmovsd-mem_xmm cmp-r32_imd 1.0 0.25 3.0 1.0 (0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
55 add-r32_imd cmp-r32_mem 0.25 0.5 1.0 -1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (0.25, 0, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)
56 vmulsd-xmm_xmm_mem cmpl-mem_r32 0.5 4.0 -1.0 (0.5,0.5,0.5,0.5,0,0,0,0) (0.25, 0, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)
57 movslq-r64_r32 cmp-r32_r32 0.25 1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
58 vmulsd-xmm_xmm_xmm cmp-r64_imd 0.5 0.25 4.0 1.0 (0.5,0.5,0,0,0,0,0,0) (-1,)
59 mov-r64_mem cmp-r64_r64 0.5 0.25 2.0 1.0 (0,0,0.5,0.5,0,0,0,0) (0.25, 0,0.25,0,0,0,0.25,0.25,0)
60 xor-r32_r32 imulq-r64_r64_imd 0.25 1.0 1.0 3.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
61 cmp-r32_imd imul-r64_r64 0.25 1.0 1.0 3.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
62 mov-r32_mem incq-r64 0.5 0.25 2.0 1.0 (0,0,0.5,0.5,0,0,0,0) (0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)
63 add-mem_imd incl-r32 1.0 0.25 5.0 1.0 (0.5,0.5,0.6666666666666666,0.6666666666666666,1.0,0.5,0.5,0.6666666666666666) (0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)
64 movsx-r64_r32 inc-r64 0.25 1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
65 mov-mem_imd lea-r32_mem 1.0 2.0 -1.0 (0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333) (-1,)
66 mov-r32_r32 lea-r64_mem- 0.25 1.0 1.0 -1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
67 sub-r64_r64 lea-r64_mem 0.25 1.0 1.0 (0.25,0.25,0,0,0,0.25,0.25,0) (-1,)
68 movl-mem_r32 1.0 2.0 (-1,)
69 movl-r32_imd 0.25 1.0 (-1,)
70 movl-r32_mem 0.5 2.0 (-1,)
71 mov-mem_imd 1.0 2.0 (0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)
72 mov-mem_r32 1.0 2.0 (0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)
73 mov-mem_r64 1.0 2.0 (0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)
74 movq_r64_xmm 1.0 -1.0 (-1,)
75 mov-r32_imd 0.25 1.0 (-1,)
76 mov-r32_mem 0.5 2.0 (0, 0,0,0.5,0.5,0,0,0,0)
77 mov-r32_r32 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
78 mov-r64_imd 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
79 mov_r64_mem 0.5 -1.0 (-1,)
80 mov-r64_mem 0.5 2.0 (0, 0,0,0.5,0.5,0,0,0,0)
81 mov-r64_r64 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
82 movsd-mem_xmm 1.0 3.0 (-1,)
83 movsd-xmm_mem 0.5 3.0 (-1,)
84 movslq-r64_r32 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
85 movsx-r64_r32 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
86 movups-mem_xmm 1.0 3.0 (-1,)
87 movups-xmm_mem 0.5 2.0 (-1,)
88 mulpd-xmm_mem 1.0 4.0 (-1,)
89 mulsd-xmm_mem 1.0 4.0 (-1,)
90 mulsd-xmm_xmm 1.0 4.0 (-1,)
91 prefetcht0-mem 0.5 -1.0 (-1,)
92 prefetchw-mem 1.0 -1.0 (-1,)
93 shl-r64_imd 0.5 1.0 (0.5, 0,0,0,0,0,0,0.5,0)
94 sub-r32_imd 0.25 1.0 (-1,)
95 sub-r64_r64 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
96 vaddpd-ymm_ymm_mem 0.5 4.0 (-1,)
97 vaddps-ymm_ymm_ymm 0.5 4.0 (-1,)
98 vaddsd-xmm_xmm_mem 0.5 4.0 (0.5, 0,0.5,0.5,0.5,0,0,0,0)
99 vaddsd-xmm_xmm_xmm 0.5 4.0 (0.5, 0,0.5,0,0,0,0,0,0)
100 vaddss-xmm_xmm_xmm 0.5 4.0 (-1,)
101 vcvtsi2ss-xmm_xmm_r32 1.0 -1.0 (0.5, 0,0.5,0,0,0,1.0,0,0)
102 vcvtss2si-r32_xmm 1.0 -1.0 (-1,)
103 vfmadd132pd-xmm_xmm_mem 0.5 4.0 (0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)
104 vfmadd132sd-xmm_xmm_mem 0.5 4.0 (0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)
105 vfmadd213pd-xmm_xmm_xmm 0.5 4.0 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
106 vfmadd213pd-ymm_ymm_mem 0.5 4.0 (0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)
107 vfmadd132pd-ymm_ymm_mem 0.5 4.0 (0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)
108 vfmadd213pd-ymm_ymm_ymm 0.5 4.0 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
109 vfmadd213ps-xmm_xmm_xmm 0.5 4.0 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
110 vfmadd213sd 0.5 5.0 (-1,)
111 vfmadd213ss 0.5 5.0 (-1,)
112 vinsertf128-ymm_ymm_imd 1.0 3.0 (-1,)
113 vmovapd-mem_ymm 1.0 -1.0 (0, 0, 0, 0.5, 0.5, 1.0, 0, 0, 0)
114 vmovapd-ymm_mem 0.5 -1.0 (0, 0, 0, 0.5, 0.5, 0, 0, 0, 0)
115 vmovaps-mem_xmm 1.0 3.0 (0, 0, 0, 0.5, 0.5, 1.0, 0, 0, 0)
116 vmovaps-xmm_mem 0.5 2.0 (0, 0, 0, 0.5, 0.5, 0, 0, 0, 0)
117 vmovsd-mem_xmm 1.0 3.0 (0, 0,0,0.5,0.5,1.0,0,0,0)
118 vmovsd-xmm_mem 0.5 3.0 (0, 0,0,0.5,0.5,0,0,0,0)
119 vmovupd-mem_ymm 1.0 3.0 (0, 0,0,0.5,0.5,1.0,0,0,0)
120 vmovupd-ymm_mem 0.5 -1.0 (-1,)
121 vmulpd-ymm_ymm_ymm 0.5 4.0 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
122 vmulps-ymm_ymm_ymm 0.5 4.0 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
123 vmulsd-xmm_xmm_mem 0.5 4.0 (0.5, 0,0.5,0.5,0.5,0,0,0,0)
124 vmulsd-xmm_xmm_xmm 0.5 4.0 (0.5, 0,0.5,0,0,0,0,0,0)
125 vmulss-xmm_xmm_xmm 0.5 4.0 (-1,)
126 vrcpps-avx 1.0 4.0 (-1,)
127 vsqrtpd-avx 12.0 21.0 (-1,)
128 vsqrtps-avx 6.0 16.0 (-1,)
129 vsubpd-ymm_ymm_mem 0.5 4.0 (-1,)
130 vsubsd-xmm_xmm_mem 0.5 4.0 (-1,)
131 vsubsd-xmm_xmm_xmm 0.5 4.0 (-1,)
132 vsubss-xmm_xmm_xmm 0.5 4.0 (-1,)
133 vxorps-xmm_xmm_xmm 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
134 xor-r32_r32 0.25 1.0 (0.25, 0,0.25,0,0,0,0.25,0.25,0)
135 cmpl-r32_imd 0.25 1.0 (0.25, 0, 0.25,0,0,0,0.25,0.25,0)
136 vaddpd-xmm_xmm_xmm 0.5 4 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
137 vaddpd-ymm_ymm_ymm 0.5 4 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
138 vcvtdq2pd-xmm_xmm 1.0 5 (1.0, 0, 0, 0, 0, 0, 1.0, 0, 0)
139 vcvtdq2pd-ymm_xmm 1.0 7 (1.0, 0, 0, 0, 0, 0, 1.0, 0, 0)
140 vcvtsi2sd-xmm_xmm_r32 1.0 4 (0.5, 0, 0.5, 0, 0, 0, 1.0, 0, 0)
141 vextracti128-xmm_ymm_imd 1.0 3 (0, 0, 0, 0, 0, 0, 1.0, 0, 0)
142 vfmadd132pd-xmm_xmm_xmm 0.5 4 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
143 vfmadd132pd-ymm_ymm_ymm 0.5 4 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
144 vfmadd132sd-xmm_xmm_xmm 0.5 4 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
145 vmulpd-xmm_xmm_xmm 0.5 4 (0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)
146 vpaddd-xmm_xmm_xmm 0.3333333333333333 1 (0.33, 0, 0.33, 0, 0, 0, 0.33, 0, 0)
147 vpaddd-ymm_ymm_ymm 0.3333333333333333 1 (0.33, 0, 0.33, 0, 0, 0, 0.33, 0, 0)
148 vpshufd-xmm_xmm_imd 1.0 1 (0, 0, 0, 0, 0, 0, 1.0, 0, 0)
149 vxorpd-xmm_xmm_xmm 0.25 1 (0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)
150 vdivpd-ymm_ymm_ymm 8.0 14.0 (1.0, 8.0, 0, 0, 0, 0, 0, 0, 0)
151 vdivps-ymm_ymm_ymm 5.0 11.0 (1.0, 5.0, 0, 0, 0, 0, 0, 0, 0)
152 vdivpd-xmm_xmm_xmm 4.0 13 (1.0, 4.0, 0, 0, 0, 0, 0, 0, 0)
153 vdivsd-xmm_xmm_xmm 4.0 13 (1.0, 4.0, 0, 0, 0, 0, 0, 0, 0)

123
osaca/data/zen_data.csv Normal file
View File

@@ -0,0 +1,123 @@
instr,TP,LT,ports
jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jmpq-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
add-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
add-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
addl-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
addq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
addl-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
addq-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
add-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
add-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
addl-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
addq-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
cmp-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
cmpl-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
cmp-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
cmpl-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
cmp-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
cmpl-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
cmp-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
cmp-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
cmpq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
cmpq-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
inc-r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
incq-r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
incl-r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
mov-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
mov-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
mov-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
movq-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
movq-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
movl-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
movslq-r64_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
sub-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
vaddpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0.5, 0.5)"
vaddsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)"
vaddsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
vaddss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
vcvtsi2ss-xmm_xmm_r32,1.0,4.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
vcvtss2si-r32_xmm,1.0,7.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
cvtsi2ss-xmm_r32,1.0,8.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd213pd-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd213pd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd213ps-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd213ps-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd213sd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd213ss-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd132sd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
vfmadd132pd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
vfmadd132pd-ymm_ymm_mem,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
vinsertf128-ymm_ymm_imd,0.6666666666666667,1.0,"(-1,)"
vmovsd-mem_xmm,1.0,8.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
vmovsd-xmm_mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
vmulpd-ymm_ymm_ymm,1.0,4.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vmulss-xmm_xmm_xmm,0.5,3.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vsubpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 1.0, 1.0)"
vsubsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)"
vsubsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
vsubss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
vmovaps-xmm_mem,0.5,3.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0.5, 0.5)"
vmovaps-mem_xmm,1.0,5.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 1.0, 1.0)"
vmovapd-ymm_mem,1.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
vmovapd-mem_ymm,2.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 2.0, 2.0)"
movq_r64_xmm,1.0,-1.0,"(0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)"
#prefetcht0-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
#prefetchw-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
cmpl-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
vaddpd-xmm_xmm_xmm,0.5,3,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)"
vaddpd-ymm_ymm_ymm,1,3,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
vcvtdq2pd-xmm_xmm,1,7,"(0.5, 0.5, 0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
vcvtdq2pd-ymm_xmm,2,7,"(1.0, 1.0, 0, 2.0, 0, 0, 0, 0, 0, 0, 0)"
vcvtsi2sd-xmm_xmm_r32,1,4,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
vextracti128-xmm_ymm_imd,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)"
vfmadd132pd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd132pd-ymm_ymm_ymm,1,5,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vfmadd132sd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vmulpd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
vpaddd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)"
vpaddd-ymm_ymm_ymm,0.6666666666666667,1,"(0.66, 0.66, 0, 0.66, 0, 0, 0, 0, 0, 0, 0)"
vpshufd-xmm_xmm_imd,0.5,1,"(0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)"
vxorpd-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)"
vxorps-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)"
vdivpd-xmm_xmm_xmm,4,8,"(0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)"
vdivsd-xmm_xmm_xmm,4,8,"(0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)"
1 instr TP LT ports
2 jae-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
3 ja-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
4 jbe-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
5 jb-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
6 jc-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
7 jcxz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
8 jecxz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
9 je-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
10 jge-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
11 jg-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
12 jle-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
13 jl-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
14 jmp-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
15 jmpq-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
16 jnae-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
17 jna-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
18 jnbe-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
19 jnb-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
20 jnc-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
21 jne-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
22 jnge-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
23 jng-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
24 jnle-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
25 jnl-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
26 jno-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
27 jno-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
28 jnp-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
29 jns-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
30 jns-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
31 jnz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
32 jo-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
33 jo-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
34 jpe-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
35 jp-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
36 jpo-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
37 js-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
38 js-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
39 jz-lbl 0.0 0.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
40 add-r32_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
41 add-r64_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
42 addl-r32_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
43 addq-r64_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
44 addl-mem_imd 1.0 7.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)
45 addq-mem_imd 1.0 7.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)
46 add-mem_r32 1.0 7.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)
47 add-mem_r64 1.0 7.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)
48 addl-mem_r32 1.0 7.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)
49 addq-mem_r64 1.0 7.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)
50 cmp-mem_r32 0.5 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)
51 cmpl-mem_r32 0.5 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)
52 cmp-r32_mem 0.5 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)
53 cmpl-r32_mem 0.5 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)
54 cmp-r32_r32 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
55 cmpl-r32_r32 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
56 cmp-r64_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
57 cmp-r64_r64 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
58 cmpq-r64_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
59 cmpq-r64_r64 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
60 inc-r64 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
61 incq-r64 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
62 incl-r32 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
63 mov-mem_r64 1.0 4.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)
64 mov-r64_mem 0.5 3.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
65 mov-r32_mem 0.5 3.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
66 movq-mem_r64 1.0 4.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)
67 movq-r64_mem 0.5 3.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
68 movl-r32_mem 0.5 3.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
69 movslq-r64_r32 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
70 sub-r32_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
71 vaddpd-ymm_ymm_mem 1.0 3.0 (0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0.5, 0.5)
72 vaddsd-xmm_xmm_mem 0.5 3.0 (0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)
73 vaddsd-xmm_xmm_xmm 0.5 3.0 (0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)
74 vaddss-xmm_xmm_xmm 0.5 3.0 (0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)
75 vcvtsi2ss-xmm_xmm_r32 1.0 4.0 (1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)
76 vcvtss2si-r32_xmm 1.0 7.0 (1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)
77 cvtsi2ss-xmm_r32 1.0 8.0 (1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)
78 vfmadd213pd-ymm_ymm_ymm 1.0 5.0 (1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
79 vfmadd213pd-xmm_xmm_xmm 0.5 5.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
80 vfmadd213ps-ymm_ymm_ymm 1.0 5.0 (1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
81 vfmadd213ps-xmm_xmm_xmm 0.5 5.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
82 vfmadd213sd-xmm_xmm_xmm 0.5 5.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
83 vfmadd213ss-xmm_xmm_xmm 0.5 5.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
84 vfmadd132sd-xmm_xmm_mem 0.5 5.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
85 vfmadd132pd-xmm_xmm_mem 0.5 5.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
86 vfmadd132pd-ymm_ymm_mem 1.0 5.0 (1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)
87 vinsertf128-ymm_ymm_imd 0.6666666666666667 1.0 (-1,)
88 vmovsd-mem_xmm 1.0 8.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)
89 vmovsd-xmm_mem 0.5 -1.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
90 vmulpd-ymm_ymm_ymm 1.0 4.0 (1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
91 vmulsd-xmm_xmm_mem 0.5 4.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
92 vmulsd-xmm_xmm_xmm 0.5 4.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
93 vmulss-xmm_xmm_xmm 0.5 3.0 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
94 vsubpd-ymm_ymm_mem 1.0 3.0 (0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 1.0, 1.0)
95 vsubsd-xmm_xmm_mem 0.5 3.0 (0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)
96 vsubsd-xmm_xmm_xmm 0.5 3.0 (0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)
97 vsubss-xmm_xmm_xmm 0.5 3.0 (0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)
98 vmovaps-xmm_mem 0.5 3.0 (0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0.5, 0.5)
99 vmovaps-mem_xmm 1.0 5.0 (0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 1.0, 1.0)
100 vmovapd-ymm_mem 1.0 -1.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)
101 vmovapd-mem_ymm 2.0 -1.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 2.0, 2.0)
102 movq_r64_xmm 1.0 -1.0 (0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)
103 #prefetcht0-mem 0.5 -1.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
104 #prefetchw-mem 0.5 -1.0 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)
105 cmpl-r32_imd 0.25 1.0 (0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)
106 vaddpd-xmm_xmm_xmm 0.5 3 (0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)
107 vaddpd-ymm_ymm_ymm 1 3 (0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)
108 vcvtdq2pd-xmm_xmm 1 7 (0.5, 0.5, 0, 1.0, 0, 0, 0, 0, 0, 0, 0)
109 vcvtdq2pd-ymm_xmm 2 7 (1.0, 1.0, 0, 2.0, 0, 0, 0, 0, 0, 0, 0)
110 vcvtsi2sd-xmm_xmm_r32 1 4 (0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)
111 vextracti128-xmm_ymm_imd 0.3333333333333333 1 (0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)
112 vfmadd132pd-xmm_xmm_xmm 0.5 5 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
113 vfmadd132pd-ymm_ymm_ymm 1 5 (1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
114 vfmadd132sd-xmm_xmm_xmm 0.5 5 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
115 vmulpd-xmm_xmm_xmm 0.5 4 (0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)
116 vpaddd-xmm_xmm_xmm 0.3333333333333333 1 (0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)
117 vpaddd-ymm_ymm_ymm 0.6666666666666667 1 (0.66, 0.66, 0, 0.66, 0, 0, 0, 0, 0, 0, 0)
118 vpshufd-xmm_xmm_imd 0.5 1 (0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)
119 vxorpd-xmm_xmm_xmm 0.25 1 (0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)
120 vxorps-xmm_xmm_xmm 0.25 1 (0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)
121 vdivpd-xmm_xmm_xmm 4 8 (0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)
122 vdivsd-xmm_xmm_xmm 4 8 (0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)

View File

@@ -12,11 +12,18 @@ from osaca.param import Register, MemAddr
class Scheduler(object):
arch_dict = {'SNB': 6, 'IVB': 6, 'HSW': 8, 'BDW': 8, 'SKL': 8}
arch_dict = {'SNB': 6, 'IVB': 6, 'HSW': 8, 'BDW': 8, 'SKL': 8, 'ZEN': 10}
dv_port_dict = {'SKL': 0, 'ZEN': 3}
ports = None # type: int
instrList = None # type: list<list<str,Param[,Param][,Param],str>>,
# content of most inner list in instrList: instr, operand(s), instr form
df = None # type: DataFrame
# for parallel ld/st in archs with 1 st/cy and >1 ld/cy, able to do 1 st and 1 ld in 1cy
ld_ports = None # type: list<int>
# enable flag for parallel ld/st
en_par_ldst = False # type: boolean
dv_port = -1 # type: int
def __init__(self, arch, instruction_list):
arch = arch.upper()
@@ -25,6 +32,17 @@ class Scheduler(object):
except KeyError:
print('Architecture not supportet for EU scheduling.', file=sys.stderr)
sys.exit(1)
# check for parallel ld/st in a cycle
if(arch == 'ZEN'):
self.en_par_ldst = True
self.ld_ports = [9, 10]
# check for DV port
try:
self.dv_port = self.dv_port_dict[arch]
except KeyError:
# no DV port available (yet, new feature in OSACA v0.2)
# do nothing
pass
self.instrList = instruction_list
#curr_dir = os.path.realpath(__file__)[:-11]
osaca_dir = os.path.expanduser('~/.osaca/')
@@ -42,8 +60,21 @@ class Scheduler(object):
"""
sched = self.get_head()
# Initialize ports
occ_ports = [[0] * self.ports for x in range(len(self.instrList))]
port_bndgs = [0] * self.ports
# Add DV port, if it is existing
tmp_port = 0
if(self.dv_port != -1):
tmp_port = 1
occ_ports = [[0] * (self.ports + tmp_port) for x in range(len(self.instrList))]
port_bndgs = [0] * (self.ports + tmp_port)
# Store instruction counter for parallel ld/st
par_ldst = 0
# Count the number of store instr if we schedule for an architecture with par ld/st
if(self.en_par_ldst):
for i, instrForm in enumerate(self.instrList):
if(isinstance(instrForm[1], MemAddr) and len(instrForm) > 3
and not instrForm[0].startswith('cmp')):
#print('({}, {}) is st --> par_ldst = {}'.format(i, instrForm[0], par_ldst + 1))
par_ldst += 1
# Check if there's a port occupation stored in the CSV, otherwise leave the
# occ_port list item empty
for i, instrForm in enumerate(self.instrList):
@@ -61,8 +92,25 @@ class Scheduler(object):
sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1])
continue
occ_ports[i] = list(tup)
# Check if it's a ld including instr
p_flg = ''
if(self.en_par_ldst):
# Check for ld
if(isinstance(instrForm[-2], MemAddr) or
(len(instrForm) > 4 and isinstance(instrForm[2], MemAddr))):
if(par_ldst > 0):
par_ldst -= 1
p_flg = 'P '
for port in self.ld_ports:
tmp_port_add = 1 if(self.dv_port != -1 and self.dv_port < port) else 0
occ_ports[i][port] = '(' + str(occ_ports[i][port]) + ')'
# Write schedule line
sched += self.get_line(occ_ports[i], instrForm[-1])
if(len(p_flg) > 0):
sched += self.get_line(occ_ports[i], p_flg + instrForm[-1])
for port in self.ld_ports:
occ_ports[i][port] = 0
else:
sched += self.get_line(occ_ports[i], instrForm[-1])
# Add throughput to total port binding
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
return (sched, port_bndgs)
@@ -258,7 +306,8 @@ class Scheduler(object):
String containing the report information
"""
analysis = 'Throughput Analysis Report\n' + ('-' * 26) + '\n'
annotations = ('X - No information for this instruction in data file\n'
annotations = ('P - Load operation can be hidden behind a past or future store instruction\n'
'X - No information for this instruction in data file\n'
'* - Instruction micro-ops not bound to a port\n'
'\n')
return analysis + annotations
@@ -272,12 +321,17 @@ class Scheduler(object):
str
String containing the header
"""
horiz_line = '-' * 7 * self.ports + '-\n'
horiz_line = '-' * 7 * self.ports
if(self.dv_port != -1):
horiz_line += '-' * 6
horiz_line += '-\n'
port_anno = (' ' * (math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles'
+ ' ' * (math.ceil((len(horiz_line) - 24) / 2)) + '\n')
port_line = ''
for i in range(0, self.ports):
port_line += '| {} '.format(i)
if(i == self.dv_port):
port_line = port_line + '- DV '
port_line += '|\n'
head = port_anno + port_line + horiz_line
return head
@@ -299,12 +353,22 @@ class Scheduler(object):
String for output containing port scheduling for instr_name
"""
line = ''
for i in occ_ports:
cycles = ' ' if (i == 0) else '%.2f' % float(i)
if(i >= 10):
line += '|' + cycles + ' '
r_space = ' '
for p_num, i in enumerate(occ_ports):
pipe = '|'
if(isinstance(i, str)):
cycles = i
i = float(i[1:-1])
r_space = ''
else:
line += '| ' + cycles + ' '
cycles = ' ' if (i == 0) else '%.2f' % float(i)
r_space = ' '
if(p_num == self.dv_port + 1 and p_num != 0):
pipe = ' '
if(i >= 10):
line += pipe + cycles + r_space
else:
line += pipe + ' ' + cycles + r_space
line += '| ' + instr_name + '\n'
return line
@@ -326,13 +390,21 @@ class Scheduler(object):
header = 'Port Binding in Cycles Per Iteration:\n'
horiz_line = '-' * 10 + '-' * total + '\n'
port_line = '| Port |'
after_dv = 0
for i in range(0, self.ports):
port_line += ' ' * sp_left[i] + str(i) + ' ' * sp_right[i] + '|'
if(i == self.dv_port):
port_line += ' ' * sp_left[i] + str(i) + ' ' * sp_right[i] + '-'
port_line += ' ' * (sp_left[i+1] - 1) + 'DV' + ' ' * sp_right[i+1] + '|'
after_dv = 1
else:
port_line += ' ' * sp_left[i + after_dv] + str(i) + ' ' * sp_right[i + after_dv]
port_line += '|'
port_line += '\n'
cyc_line = '| Cycles |'
for i in range(len(port_bndg)):
pipe = '|' if (i != self.dv_port) else ' '
cyc = str(round(port_bndg[i], 2))
cyc_line += ' {} |'.format(cyc)
cyc_line += ' {} {}'.format(cyc, pipe)
cyc_line += '\n'
binding = header + horiz_line + port_line + horiz_line + cyc_line + horiz_line
return binding
@@ -374,26 +446,3 @@ class Scheduler(object):
if __name__ == '__main__':
print('Nothing to do.')
# data = [
# ['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x1(%rax,%rax,1),%edx'],
# ['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %edx,%xmm2,%xmm2'],
# ['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm2,%xmm0,%xmm3'],
# ['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x2(%rax,%rax,1),%ecx'],
# ['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm3,%xmm1,%xmm4'],
# ['vxorps',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vxorps %xmm1, %xmm1,%xmm1'],
# ['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %ecx,%xmm1,%xmm1'],
# ['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm1,%xmm0,%xmm5'],
# ['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm4,0x4(%rsp,%rax,8)'],
# ['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm5,%xmm4,%xmm1'],
# ['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm1,0x8(%rsp,%rax,8)'],
# ['inc',Register('RAX'),'inc %rax'],
# ['cmp',Register('RAX'),Parameter('IMD'),'cmp $0x1f3,%rax'],
# ['jb',Parameter('LBL'),'jb 400bc2 <main+0x62>']
# ]
# sched = Scheduler('ivb', data)
# output,binding = sched.schedule()
# print(sched.get_port_binding(binding))
# print(sched.get_report_info(),end='')
# print(output)
# print('Block Throughput: {}'.format(round(max(binding),2)))

View File

@@ -202,7 +202,7 @@ class Osaca(object):
False if arch is not supported
"""
arch_list = ['SNB', 'IVB', 'HSW', 'BDW', 'SKL']
arch_list = ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'ZEN']
if(self.arch in arch_list):
return True
else:
@@ -822,7 +822,7 @@ def main():
parser.add_argument('-V', '--version', action='version', version='%(prog)s '
+ __find_version('__init__.py'))
parser.add_argument('--arch', dest='arch', type=str, help='define architecture '
+ '(SNB, IVB, HSW, BDW, SKL)')
+ '(SNB, IVB, HSW, BDW, SKL, ZEN)')
parser.add_argument('--tp-list', dest='tp_list', action='store_true',
help='print an additional list of all throughput values for the kernel')
group = parser.add_mutually_exclusive_group(required=False)
@@ -871,8 +871,12 @@ def main():
+ '\'pip install --user kerncraft\' for installation.\nFor more information see '
+ 'https://github.com/RRZE-HPC/kerncraft', file=sys.stderr)
sys.exit(1)
iaca.iaca_instrumentation(input_file=filepath, output_file=filepath,
block_selection='manual', pointer_increment=1)
# Change due to newer kerncraft version (hopefully temporary)
#iaca.iaca_instrumentation(input_file=filepath, output_file=filepath,
# block_selection='manual', pointer_increment=1)
with open(filepath, 'r') as f_in, open(filepath[:-2] + '-iaca.s', 'w') as f_out:
iaca.iaca_instrumentation(input_file=f_in, output_file=f_out,
block_selection='manual', pointer_increment=1)
else:
osaca.inspect_binary()

View File

@@ -96,17 +96,19 @@ class Testcase(object):
Controls if latency testcase should be written
(default True)
"""
osaca_dir = os.path.expanduser('~') + '/.osaca/'
if(lt):
# Write latency file
call(['mkdir', '-p', os.path.dirname(__file__)+'/../benchmarks'])
f = open(os.path.dirname(__file__)+'/../benchmarks/'+self.instr+self.extension+'.S', 'w')
call(['mkdir', '-p', osaca_dir + 'benchmarks'])
f = open(osaca_dir + 'benchmarks/'+self.instr+self.extension+'.S', 'w')
data = (self.def_instr + self.ninstr + self.init + self.dp1 + self.expand + self.gprPush
+ self.zeroGPR + self.copy + self.loop_lat + self.gprPop + self.done)
f.write(data)
f.close()
if(tp):
# Write throughput file
f = open(os.path.dirname(__file__) + '/../benchmarks/' + self.instr + self.extension
call(['mkdir', '-p', osaca_dir + 'benchmarks'])
f = open(osaca_dir + 'benchmarks/' + self.instr + self.extension
+ '-TP.S', 'w')
data = (self.def_instr + self.ninstr + self.init + self.dp1 + self.expand + self.gprPush
+ self.zeroGPR + self.copy + self.loop_thrpt + self.gprPop + self.done)