From 8f84368db941060ec2774c74d3d72c7e8a80eaa0 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Fri, 30 Aug 2024 17:41:45 +0200 Subject: [PATCH] initial support ZEN 4 --- README.rst | 4 +- osaca/data/zen4.yml | 5371 +++++++++++++++++++++++++++++++++++ osaca/osaca.py | 5 +- osaca/semantics/hw_model.py | 1 + 4 files changed, 5378 insertions(+), 3 deletions(-) create mode 100644 osaca/data/zen4.yml diff --git a/README.rst b/README.rst index 10ea35d..0fe99f4 100644 --- a/README.rst +++ b/README.rst @@ -100,7 +100,7 @@ The usage of OSACA can be listed as: shows the program’s version number. --arch ARCH needs to be replaced with the target architecture abbreviation. - Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ICL`` (Client), ``ICX`` (Server), ``SPR`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2``, and ``ZEN3`` for AMD Zen architectures. + Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ICL`` (Client), ``ICX`` (Server), ``SPR`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN[1-4]`` for AMD Zen architectures. Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72, ``TSV110`` for the HiSilicon TaiShan v110, ``A64FX`` for Fujitsu's HPC ARM architecture, ``M1`` for the Apple M1-Firestorm performance core, and ``V2`` for the Neoverse V2 (used in NVIDIA's Grace CPU) are available. If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64. --fixed @@ -169,6 +169,8 @@ Supported microarchitectures +----------+-----------------+------------+ | AMD | Milan / Zen 3 | ``ZEN3`` | +----------+-----------------+------------+ +| AMD | Genoa / Zen 4 | ``ZEN4`` | ++----------+-----------------+------------+ **ARM AArch64 CPUs** diff --git a/osaca/data/zen4.yml b/osaca/data/zen4.yml new file mode 100644 index 0000000..c867d27 --- /dev/null +++ b/osaca/data/zen4.yml @@ -0,0 +1,5371 @@ +osaca_version: 0.5.3 +micro_architecture: AMD Zen4 +arch_code: ZEN4 +isa: x86 +ROB_size: 320 +dispatched_uops_per_cycle: 6 +retired_uOps_per_cycle: ~ +scheduler_size: ~ +hidden_loads: false +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0} +load_throughput: +- {dst: gpr, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[1, ['1', '3', '5']]]} +- {dst: xmm, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[1, ['1', '3']]]} +- {dst: ymm, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[1, ['1', '3']]]} +- {dst: zmm, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[2, ['1', '3']]]} +load_throughput_default: [[1, '13']] +store_throughput: +- {src: gpr, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[1, ['1', '3']], [1, ['8', '13']]]} +- {src: xmm, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[1, ['1', '3']], [1, ['13']]]} +- {src: ymm, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[1, ['1', '3']], [1, ['13']]]} +- {src: zmm, base: "*", index: "*", offset: "*", scale: "*", port_pressure: [[2, ['1', '3']], [2, ['13']]]} +store_throughput_default: [[1 , ['1', '3']], [1, ['13']]] +store_to_load_forward_latency: 0.0 +ports: ['0', '1', '2', '3', '4', 4D, '5', '6', '7', '8', '9', 9D, '10', '11', 11D, '12', '13'] +port_model_scheme: | + +--------------------------------------------------------------+ +-------------------------------------------------------+ + | INT0-7 4x24 OoO scheduler | |2x32 FP0 FP2 FP1 FP3 | + +--------------------------------------------------------------+ +-------------------------------------------------------+ + 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | + \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ + +------+ +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ +------+ +-------+ +-------+ +-------+ +-------+ +------+ + | ALU | | AGU | | ALU | | AGU | | ALU | | AGU | | ALU | | BR | | iST | |AVX MUL| |AVX ALU| |AVX MUL| |AVX ALU| | ST | + +------+ +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ +------+ +-------+ +-------+ +-------+ +-------+ +------+ + +------+ +-----+ +-----+ +-----+ +-----+ +-----+ +------+ +-------+ +-------+ +-------+ +-------+ + |BRANCH| | LD | | SHF | | LD | | SHF | | iLD | | F2I | |AVX FMA| |AVX ADD| |AVX FMA| |AVX ADD| + +------+ +-----+ +-----+ +-----+ +-----+ +-----+ +------+ +-------+ +-------+ +-------+ +-------+ + //MUL //DIV +-------+ +-------+ +-------+ + | DIV | | CONV/ | | DIV | + +-------+ | SHUF | +-------+ + +-------+ +-------+ + |//AVX | + |//SHUF | + +-------+ + +instruction_forms: +########################################## +# assume all jmp instruction 0 +- name: [jo, jno, js, jns, jp, jpe, jnp, jpo] + operands: + - class: identifier + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: [jc, jb, jae, jnb, jna, jbe, ja, jnbe] + operands: + - class: identifier + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: [je, jz, jne, jnz, jl, jnge] + operands: + - class: identifier + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: [jge, jnl, jle, jng, jg, jnle] + operands: + - class: identifier + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: jmp + operands: + - class: identifier + throughput: 0.0 + latency: 0.0 + port_pressure: [] +########################################## +# assume all cmp's equal for now +# TODO add cmp instructions +- name: [cmp, cmpeqpd, cmpltpd, cmplepd, cmpunordpd, cmpneqpd, cmpnltpd, cmpnlepd, cmpordpd, cmpltps, cmpleps, cmpunordps, cmpneqps, cmpnltps, cmpnleps, cmpordps] + operands: + - class: register + name: '*' + - class: register + name: '*' + latency: 1.0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1 +- name: [cmp, cmpeqpd, cmpltpd, cmplepd, cmpunordpd, cmpneqpd, cmpnltpd, cmpnlepd, cmpordpd, cmpltps, cmpleps, cmpunordps, cmpneqps, cmpnltps, cmpnleps, cmpordps] + operands: + - class: immediate + imd: int + - class: register + name: '*' + latency: 1.0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1 +########################################## +- name: push + operands: + - class: immediate + imd: int + latency: 0 + port_pressure: [[1, ['1', '3']], [1, ['8', '13']]] + throughput: 0.5 + uops: 2 +- name: push + operands: + - class: register + name: gpr + latency: 11 + port_pressure: [[1, ['1', '3']], [1, ['8', '13']]] + throughput: 0.5 + uops: 2 +- name: push + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 11 + port_pressure: [[1, ['1', '3']], [1, ['8', '13']]] + throughput: 0.5 + uops: 2 +- name: pop + operands: + - class: immediate + imd: int + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.3333333333333333 + uops: 1 +- name: pop + operands: + - class: register + name: gpr + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.3333333333333333 + uops: 1 +- name: pop + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 12 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.3333333333333333 + uops: 2 +########################################## +- name: mov + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: mov # with store + operands: + - class: register + name: gpr + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1, '135'], [1, ['8', '13']]] + throughput: 0.5 + uops: 2 +- name: mov # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: gpr + latency: 4 + port_pressure: [[1, '135']] + throughput: 0.3333333333333333 + uops: 1 +- name: mov + operands: + - class: immediate + imd: int + - class: register + name: gpr + latency: 1 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: mov # with store + operands: + - class: immediate + imd: int + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['8', '13']]] + throughput: 0.5 + uops: 2 +- name: movabs + operands: + - class: immediate + imd: int + - class: register + name: gpr + latency: 1 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: movapd + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movapd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: movapd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovapd + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovapd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovapd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovapd + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovapd # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovapd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovapd + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovapd # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: vmovapd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: movaps + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movaps # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: movaps # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovaps + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovaps # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovaps # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovaps + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovaps # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovaps # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovaps + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovaps # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: vmovaps # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: movdqa + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movdqa # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: movdqa # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqa + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqa # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqa + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqa # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqa32 + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa32 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqa32 # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqa32 + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa32 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqa32 # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqa32 + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa32 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovdqa32 # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: vmovdqa64 + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa64 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqa64 # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqa64 + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa64 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqa64 # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqa64 + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqa64 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovdqa64 # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: movdqu + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movdqu # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: movdqu # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu8 + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu8 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu8 # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu8 + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu8 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu8 # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu8 + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu8 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovdqu8 # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: vmovdqu16 + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu16 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu16 # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu16 + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu16 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu16 # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu16 + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu16 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovdqu16 # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: vmovdqu32 + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu32 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu32 # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu32 + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu32 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu32 # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu32 + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu32 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovdqu32 # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: vmovdqu64 + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu64 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu64 # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu64 + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu64 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovdqu64 # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovdqu64 + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovdqu64 # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovdqu64 # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: movntdq # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntdq # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntdq # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntdq # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: movntdqa # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovntdqa # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovntdqa # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovntdqa # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: movnti # with store + operands: + - class: register + name: gpr + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['8', '13']]] + throughput: 0.5 + uops: 2 +- name: movntpd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntpd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntpd # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntpd # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: movntps # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntps # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntps # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovntps # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: movntq # with store + operands: + - class: register + name: mm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: movq + operands: + - class: register + name: mm + - class: register + name: mm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: movq # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: mm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: movq # with store + operands: + - class: register + name: mm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: movq + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movq # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: movq # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovq + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovq # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovq # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: movsd + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movsd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: movsd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovsd + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovsd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovsd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: movss + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movss # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovss + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovss # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovss + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovss # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: movss # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: movsx + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: movsx # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: gpr + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.5 + uops: 1 +- name: movsxd + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1 +- name: movsxd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: gpr + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.5 + uops: 1 +- name: movsb + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: movsb # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: gpr + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.5 + uops: 1 +- name: movsw + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: movsw # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: gpr + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.5 + uops: 1 +- name: movsl + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: movsl # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: gpr + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.5 + uops: 1 +- name: movsq + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 0 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1.0 +- name: movsq # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: gpr + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.5 + uops: 1 +- name: movupd + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movupd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [1, ['1', '3', '5']] + throughput: 0.5 + uops: 1 +- name: movupd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovupd + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovupd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovupd # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovupd + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovupd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovupd # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovupd + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovupd # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovupd # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +- name: movups + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: movups # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: movups # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovups + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovups # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovups # with store + operands: + - class: register + name: xmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovups + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovups # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['1', '3']]] + throughput: 0.5 + uops: 1 +- name: vmovups # with store + operands: + - class: register + name: ymm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[1 , ['1', '3']], [1, ['13']]] + throughput: 0.5 + uops: 2 +- name: vmovups + operands: + - class: register + name: zmm + - class: register + name: zmm + latency: 0 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1.0 +- name: vmovups # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['1', '3']]] + throughput: 1.0 + uops: 2 +- name: vmovups # with store + operands: + - class: register + name: zmm + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + latency: 0 + port_pressure: [[2 , ['1', '3']], [2, ['13']]] + throughput: 2.0 + uops: 4 +########################################## +- name: adc # ibench + operands: # ibench + - class: register # ibench + name: gpr # ibench + - class: register # ibench + name: gpr # ibench + latency: 1 # ibench + port_pressure: [[1, '07']] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: add # ibench + operands: # ibench + - class: register # ibench + name: gpr # ibench + - class: register # ibench + name: gpr # ibench + latency: 1 # ibench + port_pressure: [[1, '0246']] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: add # ibench + operands: # ibench + - class: immediate # ibench + imd: int # ibench + - class: register # ibench + name: gpr # ibench + latency: 1 # ibench + port_pressure: [[1, '0246']] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: addpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: addsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: mulsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: mulpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: mulss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: mulps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: addps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: addss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: rcpss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 1 # ibench +- name: rcpps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 1 # ibench +- name: vrcpps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 1 # ibench +- name: vrcpps # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 1 # ibench +- name: vrcpss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 1 # ibench +- name: sqrtsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 21 # ibench + port_pressure: [[17, ['9D', '11D']], [1, ['9', '11']]] # ibench + throughput: 8.5 # ibench + uops: 1 # ibench +- name: sqrtss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 15 # ibench + port_pressure: [[10, ['9D', '11D']], [1, ['9', '11']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: vsqrtsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 21 # ibench + port_pressure: [[17, ['9D', '11D']], [1, ['9', '11']]] # ibench + throughput: 8.5 # ibench + uops: 1 # ibench +- name: vsqrtss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 15 # ibench + port_pressure: [[10, ['9D', '11D']], [1, ['9', '11']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: sub # ibench + operands: # ibench + - class: immediate # ibench + imd: int # ibench + - class: register # ibench + name: gpr # ibench + latency: 1 # ibench + port_pressure: [[1, '0246']] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: sub # ibench + operands: # ibench + - class: register # ibench + name: gpr # ibench + - class: register # ibench + name: gpr # ibench + latency: 1 # ibench + port_pressure: [[1, '0246']] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: vaddpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddpd # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddpd # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 3 # ibench + port_pressure: [[2, ['10', '12']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vaddpd # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[2, ['10', '12']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vaddpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddpd # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddps # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[2, ['10', '12']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vaddps # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddps # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddps # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 3 # ibench + port_pressure: [[2, ['10', '12']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vaddsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vaddss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 2 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vdivpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [10, ['9D', '11D']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: vdivpd # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [10, ['9D', '11D']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: vdivpd # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [18, ['9D', '11D']]] # ibench + throughput: 9.0 # ibench + uops: 1 # ibench +- name: vdivpd # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [18, ['9D', '11D']]] # ibench + throughput: 9.0 # ibench + uops: 1 # ibench +- name: vdivpd # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [10, ['9D', '11D']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: vdivpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [10, ['9D', '11D']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: vdivps # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [12, ['9D', '11D']]] # ibench + throughput: 6.0 # ibench + uops: 1 # ibench +- name: vdivps # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [12, ['9D', '11D']]] # ibench + throughput: 6.0 # ibench + uops: 1 # ibench +- name: vdivps # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [6, ['9D', '11D']]] # ibench + throughput: 3.0 # ibench + uops: 5 # ibench +- name: vdivps # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [6, ['9D', '11D']]] # ibench + throughput: 3.0 # ibench + uops: 1 # ibench +- name: vdivps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [6, ['9D', '11D']]] # ibench + throughput: 3.0 # ibench + uops: 1 # ibench +- name: vdivps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [6, ['9D', '11D']]] # ibench + throughput: 3.0 # ibench + uops: 1 # ibench +- name: vdivss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [6, ['9D', '11D']]] # ibench + throughput: 3.0 # ibench + uops: 1 # ibench +- name: vdivss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 11 # ibench + port_pressure: [[1, ['9', '11']], [6, ['9D', '11D']]] # ibench + throughput: 3.0 # ibench + uops: 1 # ibench +- name: vdivsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [10, ['9D', '11D']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: vdivsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 13 # ibench + port_pressure: [[1, ['9', '11']], [10, ['9D', '11D']]] # ibench + throughput: 5.0 # ibench + uops: 1 # ibench +- name: [vfmadd213pd, vfmadd132pd, vfmadd231pd, vfnmadd213pd, vfnmadd132pd, vfnmadd231pd, vfmsub213pd, vfmsub132pd, vfmsub231pd, vfnmsub213pd, vfnmsub132pd, vfnmsub231pd] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213pd, vfmadd132pd, vfmadd231pd, vfnmadd213pd, vfnmadd132pd, vfnmadd231pd, vfmsub213pd, vfmsub132pd, vfmsub231pd, vfnmsub213pd, vfnmsub132pd, vfnmsub231pd] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213pd, vfmadd132pd, vfmadd231pd, vfnmadd213pd, vfnmadd132pd, vfnmadd231pd, vfmsub213pd, vfmsub132pd, vfmsub231pd, vfnmsub213pd, vfnmsub132pd, vfnmsub231pd] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213pd, vfmadd132pd, vfmadd231pd, vfnmadd213pd, vfnmadd132pd, vfnmadd231pd, vfmsub213pd, vfmsub132pd, vfmsub231pd, vfnmsub213pd, vfnmsub132pd, vfnmsub231pd] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213pd, vfmadd132pd, vfmadd231pd, vfnmadd213pd, vfnmadd132pd, vfnmadd231pd, vfmsub213pd, vfmsub132pd, vfmsub231pd, vfnmsub213pd, vfnmsub132pd, vfnmsub231pd] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 4 # ibench + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 # ibench + uops: 1 # ibench +- name: [vfmadd213pd, vfmadd132pd, vfmadd231pd, vfnmadd213pd, vfnmadd132pd, vfnmadd231pd, vfmsub213pd, vfmsub132pd, vfmsub231pd, vfnmsub213pd, vfnmsub132pd, vfnmsub231pd] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 # ibench + uops: 1 # ibench +- name: [vfmadd213ps, vfmadd132ps, vfmadd231ps, vfnmadd213ps, vfnmadd132ps, vfnmadd231ps, vfmsub213ps, vfmsub132ps, vfmsub231ps, vfnmsub213ps, vfnmsub132ps, vfnmsub231ps] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213ps, vfmadd132ps, vfmadd231ps, vfnmadd213ps, vfnmadd132ps, vfnmadd231ps, vfmsub213ps, vfmsub132ps, vfmsub231ps, vfnmsub213ps, vfnmsub132ps, vfnmsub231ps] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213ps, vfmadd132ps, vfmadd231ps, vfnmadd213ps, vfnmadd132ps, vfnmadd231ps, vfmsub213ps, vfmsub132ps, vfmsub231ps, vfnmsub213ps, vfnmsub132ps, vfnmsub231ps] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213ps, vfmadd132ps, vfmadd231ps, vfnmadd213ps, vfnmadd132ps, vfnmadd231ps, vfmsub213ps, vfmsub132ps, vfmsub231ps, vfnmsub213ps, vfnmsub132ps, vfnmsub231ps] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213ps, vfmadd132ps, vfmadd231ps, vfnmadd213ps, vfnmadd132ps, vfnmadd231ps, vfmsub213ps, vfmsub132ps, vfmsub231ps, vfnmsub213ps, vfnmsub132ps, vfnmsub231ps] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 4 # ibench + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 # ibench + uops: 1 # ibench +- name: [vfmadd213ps, vfmadd132ps, vfmadd231ps, vfnmadd213ps, vfnmadd132ps, vfnmadd231ps, vfmsub213ps, vfmsub132ps, vfmsub231ps, vfnmsub213ps, vfnmsub132ps, vfnmsub231ps] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 1.0 # ibench + uops: 1 # ibench +- name: [vfmadd213sd, vfmadd132sd, vfmadd231sd, vfnmadd213sd, vfnmadd132sd, vfnmadd231sd, vfmsub213sd, vfmsub132sd, vfmsub231sd, vfnmsub213sd, vfnmsub132sd, vfnmsub231sd] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213sd, vfmadd132sd, vfmadd231sd, vfnmadd213sd, vfnmadd132sd, vfnmadd231sd, vfmsub213sd, vfmsub132sd, vfmsub231sd, vfnmsub213sd, vfnmsub132sd, vfnmsub231sd] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213ss, vfmadd132ss, vfmadd231ss, vfnmadd213ss, vfnmadd132ss, vfnmadd231ss, vfmsub213ss, vfmsub132ss, vfmsub231ss, vfnmsub213ss, vfnmsub132ss, vfnmsub231ss] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vfmadd213ss, vfmadd132ss, vfmadd231ss, vfnmadd213ss, vfnmadd132ss, vfnmadd231ss, vfmsub213ss, vfmsub132ss, vfmsub231ss, vfnmsub213ss, vfnmsub132ss, vfnmsub231ss] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 4 # ibench + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vgatherdpd, vgatherqpd] # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + latency: 13 + port_pressure: [[6, '13'], [1, ['9', '10', '11', '12']], [2, ['9', '11']], [1, ['10','11','12']]] + throughput: 3.0 + uops: 18 +- name: [vgatherdpd, vgatherqpd] # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 15 + port_pressure: [[8, '13'], [1, ['9', '10', '11', '12']], [4, ['9', '11']], [2, ['10','11','12']]] + throughput: 4.0 + uops: 24 +- name: [vgatherdpd, vgatherqpd] # with load + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 26 + port_pressure: [[22, '13'], [2, ['9', '10', '11', '12']], [8, ['9', '11']], [4, ['10','11','12']]] + throughput: 11.0 + uops: 48 +- name: [vgatherdpd, vgatherqpd] # uops.info + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + mask: True + latency: 14 + port_pressure: [[8, '13'], [1, ['9', '10', '11', '12']], [2, ['9', '11']], [1, ['10','11','12']]] + throughput: 4.0 + uops: 18 +- name: [vgatherdpd, vgatherqpd] # uops.info + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + mask: True + latency: 15 + port_pressure: [[8, '13'], [1, ['9', '10', '11', '12']], [4, ['9', '11']], [2, ['10','11','12']]] + throughput: 4.0 + uops: 24 +- name: [vgatherdpd, vgatherqpd] # uops.info + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + mask: True + latency: 26 + port_pressure: [[22, '13'], [2, ['9', '10', '11', '12']], [8, ['9', '11']], [4, ['10','11','12']]] + throughput: 11.0 + uops: 48 +- name: vgatherdps # with load # ibench + operands: # ibench + - class: memory # ibench + base: "*" # ibench + offset: "*" # ibench + index: "*" # ibench + scale: "*" # ibench + - class: register # ibench + name: xmm # ibench + latency: 15 # ibench + port_pressure: [[8, '13'], [1, ['9', '10', '11', '12']], [1, ['9', '11']], [2, ['10','11','12']]] + throughput: 4.0 # ibench + uops: 24 # ibench +- name: vgatherdps # with load # uops.info + operands: # uops.info + - class: memory # uops.info + base: "*" # uops.info + offset: "*" # uops.info + index: "*" # uops.info + scale: "*" # uops.info + - class: register # uops.info + name: ymm # uops.info + latency: 20 # uops.info + port_pressure: [[16, '13'], [1, ['9', '10', '11', '12']], [2, ['9', '11']], [4, ['10','11','12']]] + throughput: 8.0 # uops.info + uops: 42 #uops.info +- name: vgatherdps # with load # ibench + operands: # ibench + - class: memory # ibench + base: "*" # ibench + offset: "*" # ibench + index: "*" # ibench + scale: "*" # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 15 # ibench + port_pressure: [[10, '13'], [1, ['9', '10', '11', '12']], [1, ['9', '11']], [2, ['10','11','12']]] + throughput: 5.0 # ibench + uops: 24 # ibench +- name: vgatherdps # with load # uops.info + operands: # uops.info + - class: memory # uops.info + base: "*" # uops.info + offset: "*" # uops.info + index: "*" # uops.info + scale: "*" # uops.info + - class: register # uops.info + name: ymm # uops.info + mask: True # ibench + latency: 21 # uops.info + port_pressure: [[18, '13'], [1, ['9', '10', '11', '12']], [2, ['9', '11']], [4, ['10','11','12']]] + throughput: 9.0 # uops.info + uops: 41 #uops.info +- name: vgatherdps # with load # uops.info + operands: # uops.info + - class: memory # uops.info + base: "*" # uops.info + offset: "*" # uops.info + index: "*" # uops.info + scale: "*" # uops.info + - class: register # uops.info + name: zmm # uops.info + mask: True # ibench + latency: 35 # uops.info + port_pressure: [[34, '13'], [2, ['9', '10', '11', '12']], [4, ['9', '11']], [8, ['10','11','12']]] + throughput: 17.0 # uops.info + uops: 81 #uops.info +- name: vmulpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulpd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulpd # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulpd # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulpd # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 3 # ibench + port_pressure: [[2, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vmulpd # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vmulps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulps # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulps # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulps # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulps # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 3 # ibench + port_pressure: [[2, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vmulps # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: vmulsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vmulss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['9', '11']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vpaddd, vpaddq] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 1 # ibench + port_pressure: [[1, ['9', '10', '11', '12']]] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: [vpaddd, vpaddq] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 1 # ibench + port_pressure: [[1, ['9', '10', '11', '12']]] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: [vpaddd, vpaddq] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 1 # ibench + port_pressure: [[2, ['9', '10', '11', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vpaddd, vpaddq] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 1 # ibench + port_pressure: [[1, ['9', '10', '11', '12']]] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: [vpaddd, vpaddq] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 1 # ibench + port_pressure: [[1, ['9', '10', '11', '12']]] # ibench + throughput: 0.25 # ibench + uops: 1 # ibench +- name: [vpaddd, vpaddq] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 1 # ibench + port_pressure: [[2, ['9', '10', '11', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vrcp14pd, vrcp14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrcp14pd, vrcp14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrcp14pd, vrcp14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['9', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vrcp14pd, vrcp14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrcp14pd, vrcp14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrcp14pd, vrcp14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['9', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: vrcpss # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: vrcpps # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: vrcpps # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrsqrt14pd, vrsqrt14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrsqrt14pd, vrsqrt14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrsqrt14pd, vrsqrt14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['9', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vrsqrt14pd, vrsqrt14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrsqrt14pd, vrsqrt14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vrsqrt14pd, vrsqrt14ps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['9', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: vsqrtpd # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 21 # asmbench + port_pressure: [[16, ['9', '11']]] # asmbench + throughput: 8.0 # asmbench + uops: 1 # asmbench +- name: vsqrtpd # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 21 # asmbench + port_pressure: [[16, ['9', '11']]] # asmbench + throughput: 8.0 # asmbench + uops: 1 # asmbench +- name: vsqrtpd # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 21 # asmbench + port_pressure: [[32, ['9', '11']]] # asmbench + throughput: 16.0 # asmbench + uops: 2 # asmbench +- name: vsqrtpd # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + mask: True # asmbench + latency: 21 # asmbench + port_pressure: [[16, ['9', '11']]] # asmbench + throughput: 8.0 # asmbench + uops: 1 # asmbench +- name: vsqrtpd # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 21 # asmbench + port_pressure: [[16, ['9', '11']]] # asmbench + throughput: 8.0 # asmbench + uops: 1 # asmbench +- name: vsqrtpd # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 21 # asmbench + port_pressure: [[32, ['9', '11']]] # asmbench + throughput: 16.0 # asmbench + uops: 2 # asmbench +- name: vrsqrtps # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: vrsqrtps # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['9', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [inc, dec] + operands: + - class: register + name: gpr + latency: 1 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1 +- name: vcvtdq2pd # uops.info + operands: # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: zmm # uops.info + latency: 6 # uops.info + port_pressure: [[2, ['10', '11']], [2, ['10', '12']]] # uops.info + throughput: 1.33 # uops.info +- name: vcvtss2si # uops.info + operands: # uops.info + - class: register # uops.info + name: xmm # uops.info + - class: register # uops.info + name: gpr # uops.info + latency: 8 # uops.info + port_pressure: [[1, ['10']]] # uops.info + throughput: 1 # uops.info + uops: 1 # uops.info +- name: vcvtss2sd # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 3 # asmbench + port_pressure: [[1, ['10', '12']]] # uops.info + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vsubpd, vsubps] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 1.5 # ibench + uops: 1 # ibench +- name: [vsubpd, vsubps] # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vsubpd, vsubps] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vsubpd, vsubps] # ibench + operands: # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + - class: register # ibench + name: ymm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: [vsubpd, vsubps] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + latency: 3 # ibench + port_pressure: [[2, ['10', '12']]] # ibench + throughput: 1.0 # ibench + uops: 2 # ibench +- name: [vsubpd, vsubps] # ibench + operands: # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + - class: register # ibench + name: zmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[2, ['10', '12']]] # ibench + throughput: 1.0 # ibench + uops: 1 # ibench +- name: vsubsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vsubsd # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vsubss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + mask: True # ibench + latency: 3 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: vsubss # ibench + operands: # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + - class: register # ibench + name: xmm # ibench + latency: 2 # ibench + port_pressure: [[1, ['10', '12']]] # ibench + throughput: 0.5 # ibench + uops: 1 # ibench +- name: lea # uops.info + operands: # uops.info + - class: memory # uops.info + base: "*" # uops.info + offset: "*" # uops.info + index: "*" # uops.info + scale: "*" # uops.info + - class: register # uops.info + name: gpr # uops.info + latency: 1 # uops.info + port_pressure: [[1, '0246']] # uops.info + throughput: 0.25 # uops.info + uops: 1 # uops.info +- name: [shl, shr, sal, sar] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: gpr # uops.info + latency: 1 # uops.info + port_pressure: [[1, '24']] # uops.info + throughput: 0.5 # uops.info + uops: 1 # uops.info +- name: [shl, shr, sal, sar] + operands: + - class: register + name: gpr + latency: 1 # uops.info + port_pressure: [[1, '4']] # uops.info + throughput: 0.5 # uops.info + uops: 1 # uops.info +- name: vinsertf128 + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11']]] + throughput: 0.5 + uops: 1 +- name: vinserti128 + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11']]] + throughput: 0.5 + uops: 1 +- name: vinsertf32x4 + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11']]] + throughput: 0.5 + uops: 1 +- name: vinsertf32x8 + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: zmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['10', '11']]] + throughput: 1.0 + uops: 1 +- name: vinsertf64x2 + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11']]] + throughput: 0.5 + uops: 1 +- name: vinsertf64x4 + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: zmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['10', '11']]] + throughput: 1.0 + uops: 1 +- name: vinsertps + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['10', '11', '12']]] + throughput: 0.33333333333 + uops: 1 +- name: vinserti64x4 + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: zmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['10', '11']]] + throughput: 1.0 + uops: 1 +- name: vinserti64x2 + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11']]] + throughput: 0.5 + uops: 1 +- name: vinserti32x8 + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: zmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['10', '11']]] + throughput: 1.0 + uops: 1 +- name: vinsertf32x4 + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11']]] + throughput: 0.5 + uops: 1 +- name: vcvtsi2sd + operands: + - class: register + name: gpr + - class: register + name: xmm + - class: register + name: xmm + latency: 3 + port_pressure: [[1, ['10']]] + throughput: 1.0 + uops: 1 +- name: vcvtdq2pd + operands: + - class: register + name: xmm + - class: register + name: ymm + latency: 4 + port_pressure: [[1, ['10', '11']], [1, ['11', '12']]] + throughput: 0.6666666666 + uops: 2 +- name: vcvtsi2ss + operands: + - class: register + name: gpr + - class: register + name: xmm + - class: register + name: xmm + latency: 3 + port_pressure: [[1, ['10']]] + throughput: 1.0 + uops: 1 +- name: [vextractf128, vextracti128] + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: vextractps + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: gpr + latency: 6 + port_pressure: [[1, ['10']]] + throughput: 1.0 + uops: 2 +- name: [vextractf32x4, vextracti32x4] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: xmm # uops.info + latency: 1 # uops.info + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 # uops.info +- name: [vextractf32x4, vextracti32x4] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: xmm # uops.info + latency: 4 # uops.info + port_pressure: [[2, ['9', '11']]] + throughput: 1.00 + uops: 1 # uops.info +- name: [vextractf32x4, vextracti32x4] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: xmm # uops.info + mask: True # uops.info + latency: 4 # uops.info + port_pressure: [[2, ['9', '11']]] + throughput: 1.00 + uops: 1 # uops.info +- name: [vextractf32x4, vextracti32x4] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: xmm # uops.info + mask: True # uops.info + latency: 1 # uops.info + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 # uops.info +- name: [vextractf32x8, vextracti32x8] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: ymm # uops.info + mask: True # uops.info + latency: 1 # uops.info + port_pressure: [[2, ['9', '10', '11', '12']]] + throughput: 0.50 + uops: 1 # uops.info +- name: [vextractf32x8, vextracti32x8] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: ymm # uops.info + latency: 1 # uops.info + port_pressure: [[2, ['9', '10', '11', '12']]] + throughput: 0.50 + uops: 1 # uops.info +- name: [vextractf64x2, vextracti64x2] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: xmm # uops.info + latency: 1 # uops.info + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 # uops.info +- name: [vextractf64x2, vextracti64x2] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: xmm # uops.info + mask: True # uops.info + latency: 4 # uops.info + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 # uops.info +- name: [vextractf64x2, vextracti64x2] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: xmm # uops.info + mask: True # uops.info + latency: 4 # uops.info + port_pressure: [[2, ['10', '11']]] + throughput: 1.0 + uops: 1 # uops.info +- name: [vextractf64x2, vextracti64x2] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: xmm # uops.info + latency: 4 # uops.info + port_pressure: [[2, ['10', '11']]] + throughput: 1.0 + uops: 1 # uops.info +- name: [vextractf64x4, vextracti64x4] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: ymm # uops.info + latency: 1 # uops.info + port_pressure: [[2, ['9', '10', '11', '12']]] + throughput: 0.50 + uops: 1 # uops.info +- name: [vextractf64x4, vextracti64x4] # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: ymm # uops.info + mask: True # uops.info + latency: 1 # uops.info + port_pressure: [[2, ['9', '10', '11', '12']]] + throughput: 0.50 + uops: 1 # uops.info +- name: vpinsrd # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: gpr # uops.info + - class: register # uops.info + name: xmm # uops.info + - class: register # uops.info + name: xmm # uops.info + latency: 1 # uops.info + port_pressure: [[1, ['11']]] # uops.info + throughput: 1.0 # uops.info + uops: 1 # uops.info +- name: vpalignr # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: xmm # uops.info + - class: register # uops.info + name: xmm # uops.info + - class: register # uops.info + name: ximm # uops.info + latency: 2 # uops.info + port_pressure: [[1, ['10', '11']]] # uops.info + throughput: 0.5 # uops.info + uops: 1 # uops.info +- name: vpalignr # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: xmm # uops.info + - class: register # uops.info + name: xmm # uops.info + - class: register # uops.info + name: xmm # uops.info + mask: True # uops.info + latency: 2 # uops.info + port_pressure: [[1, ['10', '11']]] # uops.info + throughput: 0.5 # uops.info + uops: 1 # uops.info +- name: vpalignr # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: ymm # uops.info + latency: 2 # uops.info + port_pressure: [[1, ['10', '11']]] # uops.info + throughput: 0.5 # uops.info + uops: 1 # uops.info +- name: vpalignr # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: ymm # uops.info + - class: register # uops.info + name: ymm # uops.info + mask: True # uops.info + latency: 2 # uops.info + port_pressure: [[1, ['10', '11']]] # uops.info + throughput: 0.5 # uops.info + uops: 1 # uops.info +- name: vpalignr # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: zmm # uops.info + latency: 2 # uops.info + port_pressure: [[2, ['10', '11']]] # uops.info + throughput: 1.0 # uops.info + uops: 2 # uops.info +- name: vpalignr # uops.info + operands: # uops.info + - class: immediate # uops.info + imd: int # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: zmm # uops.info + - class: register # uops.info + name: zmm # uops.info + mask: True # uops.info + latency: 2 # uops.info + port_pressure: [[2, ['10', '11']]] # uops.info + throughput: 1.0 # uops.info + uops: 2 # uops.info +- name: [vperm2f128, vperm2i128] # asmbench + operands: # asmbench + - class: immediate # asmbench + imd: int # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 3 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: vpermd # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: vpermd # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vpermd, vpermt2q] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['10', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vpermd, vpermt2q] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['10', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vpermpd, vpermps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vpermpd, vpermps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 4 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vpermpd, vpermps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['10', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vpermpd, vpermps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 5 # asmbench + port_pressure: [[2, ['10', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 2 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + mask: True # asmbench + latency: 2 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 2 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 2 # asmbench + port_pressure: [[1, ['10', '11']]] # asmbench + throughput: 0.5 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 2 # asmbench + port_pressure: [[2, ['10', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 2 # asmbench + port_pressure: [[2, ['10', '11']]] # asmbench + throughput: 1.0 # asmbench + uops: 2 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: immediate + imd: int + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.3333333333 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: immediate + imd: int + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + mask: True # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.3333333333 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: immediate + imd: int + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.3333333333 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: immediate + imd: int + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.3333333333 # asmbench + uops: 1 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: immediate + imd: int + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 1 # asmbench + port_pressure: [[2, ['10', '11', '12']]] # asmbench + throughput: 0.6666666666 # asmbench + uops: 2 # asmbench +- name: [vpermilpd, vpermilps] # asmbench + operands: # asmbench + - class: immediate + imd: int + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 1 # asmbench + port_pressure: [[2, ['10', '11', '12']]] # asmbench + throughput: 0.6666666666 # asmbench + uops: 2 # asmbench +- name: [vunpckhpd, vunpckhps, vunpcklpd, vunpcklps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.333333333333 # asmbench + uops: 1 # asmbench +- name: [vunpckhpd, vunpckhps, vunpcklpd, vunpcklps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.333333333333 # asmbench + uops: 1 # asmbench +- name: [vunpckhpd, vunpckhps, vunpcklpd, vunpcklps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + latency: 1 # asmbench + port_pressure: [[2, ['10', '11', '12']]] # asmbench + throughput: 0.666666666666 # asmbench + uops: 1 # asmbench +- name: [vunpckhpd, vunpckhps, vunpcklpd, vunpcklps] # asmbench + operands: # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + - class: register # asmbench + name: xmm # asmbench + mask: True # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.333333333333 # asmbench + uops: 1 # asmbench +- name: [vunpckhpd, vunpckhps, vunpcklpd, vunpcklps] # asmbench + operands: # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + - class: register # asmbench + name: ymm # asmbench + mask: True # asmbench + latency: 1 # asmbench + port_pressure: [[1, ['10', '11', '12']]] # asmbench + throughput: 0.333333333333 # asmbench + uops: 1 # asmbench +- name: [vunpckhpd, vunpckhps, vunpcklpd, vunpcklps] # asmbench + operands: # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + - class: register # asmbench + name: zmm # asmbench + mask: True # asmbench + latency: 1 # asmbench + port_pressure: [[2, ['10', '11', '12']]] # asmbench + throughput: 0.666666666666 # asmbench + uops: 1 # asmbench +- name: [vpcmpgtb, vpcmpgtw, vpcmpgtd, vpcmpgtq] + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: k + latency: 3 + port_pressure: [[1, '5']] + throughput: 1.0 + uops: 1 +- name: [vpcmpgtb, vpcmpgtw, vpcmpgtd, vpcmpgtq] + operands: + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: k + latency: 4 + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 + uops: 1 +- name: [vpcmpgtb, vpcmpgtw, vpcmpgtd, vpcmpgtq, vpcmpeqb, vpcmpeqw, vpcmpeqd, vpcmpeqq] + operands: + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: k + latency: 5 + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 + uops: 2 +- name: vpcmpd + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: k + latency: 3 + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 + uops: 1 +- name: vpcmpd + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: k + latency: 4 + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 + uops: 1 +- name: vpcmpd + operands: + - class: immediate + imd: int + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: k + latency: 5 + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 + uops: 2 +- name: [vpcmpeqb, vpcmpeqw, vpcmpeqd, vpcmpeqq] + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: [vpcmpeqb, vpcmpeqw, vpcmpeqd, vpcmpeqq] + operands: + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: [vcmppd, vcmpps] + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 2 + port_pressure: [[1, ['10', '12']]] + throughput: 0.5 + uops: 1 +- name: [vcmppd, vcmpps] + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: ymm + latency: 2 + port_pressure: [[1, ['10', '12']]] + throughput: 0.5 + uops: 1 +- name: vcmpps + operands: + - class: immediate + imd: int + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: k + latency: 5 + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 + uops: 2 +- name: vcmpps + operands: + - class: immediate + imd: int + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: k + latency: 5 + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 + uops: 2 +- name: vcmpps + operands: + - class: immediate + imd: int + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: k + mask: True + latency: 5 + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 + uops: 2 +- name: vcmpps + operands: + - class: immediate + imd: int + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: k + latency: 3 + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 + uops: 1 +- name: vcmpps + operands: + - class: immediate + imd: int + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: k + latency: 4 + port_pressure: [[1, ['9', '11']]] + throughput: 0.5 + uops: 1 +- name: vcmppd + operands: + - class: immediate + imd: int + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: k + latency: 5 + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 + uops: 2 +- name: vcmppd + operands: + - class: immediate + imd: int + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: k + mask: True + latency: 5 + port_pressure: [[2, ['9', '11']]] + throughput: 1.0 + uops: 1 +- name: vpunpckhqdq + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['10', '11', '12']]] + throughput: 0.333333333 + uops: 1 +- name: vpunpckhqdq + operands: + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11', '12']]] + throughput: 0.333333333 + uops: 1 +- name: vpunpckhqdq + operands: + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['10', '11', '12']]] + throughput: 0.666666666 + uops: 1 +########## TODO ############### +- name: AND + operands: + - class: immediate + imd: int + - class: register + name: gpr + latency: 1 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1 +- name: RET + operands: [] + latency: 0 + port_pressure: [[1, ['1', '3']], [1, ['13']]] + throughput: 1.0 + uops: 2 +- name: CALL + operands: + - class: identifier + latency: 0 + port_pressure: [[1, ['1', '3']], [1, ['13']]] + throughput: 1.5 + uops: 2 +- name: TEST + operands: + - class: immediate + imd: int + - class: register + name: gpr + latency: 1 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1 +- name: TEST + operands: + - class: register + name: gpr + - class: register + name: gpr + latency: 1 + port_pressure: [[1, '0246']] + throughput: 0.25 + uops: 1 +- name: PTEST + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 3 + port_pressure: [[1, ['10', '11'], [1, '13']]] + throughput: 1.0 + uops: 2 +- name: VPTEST + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 3 + port_pressure: [[1, ['10', '11'], [1, '13']]] + throughput: 1.0 + uops: 2 +- name: VPTEST + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 3 + port_pressure: [[1, ['10', '11'], [1, '13']]] + uops: 2 +- name: [VTESTPD, VTESTPS] + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 3 + port_pressure: [[1, ['10', '11'], [1, '13']]] + throughput: 1.0 + uops: 1 +- name: [VTESTPD, VTESTPS] + operands: + - class: register + name: ymm + - class: register + name: ymm + latency: 3 + port_pressure: [[1, ['10', '11'], [1, '13']]] + throughput: 1.0 + uops: 1 +- name: [vxorps, vxorpd] + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['9','10','11','12']]] + throughput: 0.25 + uops: 1 +- name: [vxorps, vxorpd] + operands: + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['9','10','11','12']]] + throughput: 0.25 + uops: 1 +- name: VPBROADCASTD + operands: + - class: register + name: gpr + - class: register + name: zmm + latency: 6 + port_pressure: [[2, ['10', '11']]] #uops.info + throughput: 1.0 + uops: 2 +- name: VBROADCASTSS + operands: + - class: register + name: xmm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['10', '11', '12']]] + throughput: 0.3333333 + uops: 1 +- name: [VBROADCASTSD, VBROADCASTSS] + operands: + - class: register + name: xmm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['10', '11']]] + throughput: 0.5 + uops: 1 +- name: [VBROADCASTSD, VBROADCASTSS] + operands: + - class: register + name: xmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['10', '11']]] + throughput: 1.0 + uops: 2 +- name: [VBROADCASTSD, VBROADCASTSS] + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: ymm + latency: 9 + port_pressure: [[1, ['1', '3']], [1, ['10', '11']], [1, ['13']]] + throughput: 1.0 + uops: 1 +- name: [VBROADCASTSD, VBROADCASTSS] + operands: + - class: memory + base: "*" + offset: "*" + index: "*" + scale: "*" + - class: register + name: zmm + latency: 5 + port_pressure: [[1, '23'], [1, ['2D', '3D']], [1, '015']] + throughput: 1.0 + uops: 1 +- name: vandpd + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: vandpd + operands: + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: vandpd + operands: + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['9', '10', '11', '12']]] + throughput: 0.5 + uops: 2 +- name: vshuff64x2 + operands: + - class: immediate + imd: int + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: zmm + latency: 4 + port_pressure: [[2, ['10', '11']]] #uops.info + throughput: 1.0 + uops: 2 +- name: vmovd + operands: + - class: register + name: gpr + - class: register + name: xmm + latency: 1 + port_pressure: [[1, '0']] + throughput: 1.0 + uops: 1 +- name: vmov + operands: + - class: register + name: gpr + - class: register + name: xmm + latency: 1 + port_pressure: [[1, '0']] + throughput: 1.0 + uops: 1 +- name: [vpor, vpxor, vpord, vpxord] + operands: + - class: register + name: xmm + - class: register + name: xmm + - class: register + name: xmm + latency: 1 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: [vpor, vpxor, vpord, vpxord] + operands: + - class: register + name: ymm + - class: register + name: ymm + - class: register + name: ymm + latency: 1 + port_pressure: [[1, ['9', '10', '11', '12']]] + throughput: 0.25 + uops: 1 +- name: [vpor, vpxor, vpord, vpxord] + operands: + - class: register + name: zmm + - class: register + name: zmm + - class: register + name: zmm + latency: 1 + port_pressure: [[2, ['9', '10', '11', '12']]] + throughput: 0.5 + uops: 2 +- name: [kxorb, kxorw, kxord, kxnorb, kxnorw, kxnord] + operands: + - class: register + name: k + - class: register + name: k + - class: register + name: k + latency: 1 + port_pressure: [[1, ['10', '12']]] + throughput: 0.5 + uops: 1 +- name: [kxorq, kxnorq] + operands: + - class: register + name: k + - class: register + name: k + - class: register + name: k + latency: 1 + port_pressure: [[2, ['10', '12']]] + throughput: 1.0 + uops: 1 diff --git a/osaca/osaca.py b/osaca/osaca.py index 34460f3..8a2fbf0 100644 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -34,6 +34,7 @@ SUPPORTED_ARCHS = [ "ZEN1", "ZEN2", "ZEN3", + "ZEN4", "TX2", "N1", "A64FX", @@ -104,8 +105,8 @@ def create_parser(parser=None): "--arch", type=str, help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ICX, SPR, ZEN1, ZEN2, ZEN3, " - "TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a default uarch for " - "x86/AArch64.", + "ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a " + "default uarch for x86/AArch64.", ) parser.add_argument( "--fixed", diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index a3d9d06..45f9f2b 100644 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -444,6 +444,7 @@ class MachineModel(object): "zen+": "x86", "zen2": "x86", "zen3": "x86", + "zen4": "x86", "con": "x86", # Intel Conroe "wol": "x86", # Intel Wolfdale "snb": "x86",