Files
OSACA/osaca/data/zen1.yml
Julian 08440ed5e1 Validation (#71)
Validating of OSACA predictions for IVB, SKX, ZEN1, ZEN2, A64FX and TX2 with different kernels.

build_and_run.py contains the configuration used at RRZE's testcluster and UR's qpace4, Analysis.ipynb contains the analysis script and results. Raw data from measurements (122MB) will be attached to next OSACA release.

For now, find the raw data here: https://hawo.net/~sijuhamm/d/UPIhBOtz/validation-data.tar.gz

The analysis report can be viewed at https://nbviewer.jupyter.org/github/RRZE-HPC/OSACA/blob/validation/validation/Analysis.ipynb

Quite a few changes on OSACA included:

Feature: register change tracking via semantic understanding of operations
Feature: recording LCD latency along path and exposing this to frontend
Feature: support for memory reference aliases
Feature: store throughput scaling (similar to load throughput scaling)
Fix: model importer works with latest uops.info export
Fix: immediate type tracking on ARM now preserves type in internal representaion
Removed unused KerncraftAPI
2021-04-15 14:42:37 +02:00

671 lines
14 KiB
YAML

osaca_version: 0.3.4
micro_architecture: AMD Zen (family 17h)
arch_code: ZEN1
isa: x86
load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0}
load_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0}
load_throughput:
- {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: ~, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: ~, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: gpr, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: gpr, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
load_throughput_default: [[1, '89'], [1, ['8D', '9D']]]
store_throughput: []
store_throughput_default: [[1, '89'], [1,[ST]]]
store_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0}
store_to_load_forward_latency: 0.0 # JH: according to Agner Fog "little or no penalty"
hidden_loads: false
ports: ['0', '1', '2', '3', 3DV, '4', '5', '6', '7', '8', '9', 8D, 9D, ST]
port_model_scheme: |
+--------------------------------------+ +-----------------------------------------------+
| 96 entries OoO scheduler | | 84 entries OoO scheduler |
+--------------------------------------+ +-----------------------------------------------+
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
\/ \/ \/ \/ \/ \/ \/ \/ \/ \/
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ +-----+ +-----+
|SSE ALU| |SSE ALU| |SSE ALU| |SSE ALU| | ALU | | ALU | | ALU | | ALU | | AGU | | AGU |
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ +-----+ +-----+
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ | |
|SSE MUL| |SSE MUL| |SSE ADD| |SSE ADD| |BRANCH| | MUL | | DIV | |BRANCH| \/ \/
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ +-------------+
+-------+ +-------+ +-------+ +-------+ | LOAD |
|SSE FMA| |SSE FMA| | SSE | |SSE DIV| +-------------+
+-------+ +-------+ | SHUF | +-------+ +-------------+
+-------+ +-------+ | LOAD |
| SSE | +-------------+
| SHUF | +-------------+
+-------+ | STORE |
+-------------+
instruction_forms:
- name: VPSHUFD
operands:
- class: immediate
imd: int
- class: register
name: xmm
- class: register
name: xmm
latency: 1
port_pressure: [[1, '12']]
throughput: 0.5
uops: 1
- name: vmovaps
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.0
latency: 0.0
port_pressure: []
- name: vmovaps
operands:
- class: register
name: ymm
- class: register
name: ymm
throughput: 0.0
latency: 0.0
port_pressure: []
- name: VXORPD
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
latency: 1
port_pressure: [[1, '0123']]
throughput: 0.25
uops: 1
- name: VXORPD
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
latency: 1
port_pressure: [[1, '0123']]
throughput: 0.25
uops: 1
- name: VPADDD
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
latency: 1
port_pressure: [[1, '013']]
throughput: 0.3333333333333333
uops: 1
- name: vfmadd231sd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 5.0
port_pressure: [[1, '01']]
- name: vcvtsi2sd
operands:
- class: register
name: gpr
- class: register
name: xmm
- class: register
name: xmm
throughput: 1.0
latency: 7.0
port_pressure: [[1, '5'], [1, '3']]
- name: vcvtdq2pd
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 1.0
latency: 7.0
port_pressure: [[1, '12'], [1, '3']]
- name: vcvtdq2pd
operands:
- class: register
name: xmm
- class: register
name: ymm
throughput: 2.0
latency: 7.0
port_pressure: [[2, '12'], [2, '3']]
- name: add
operands:
- class: immediate
imd: int
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: add
operands:
- class: register
name: gpr
- class: register
name: gpr
throughput: 0.25
latency: 1 # 1*p4567
port_pressure: [[1, '4567']]
- name: addl
operands:
- class: immediate
imd: int
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: addq
operands:
- class: immediate
imd: int
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: cmpl
operands:
- class: register
name: gpr
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: cmpq
operands:
- class: register
name: gpr
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: [CWDE, CWTL] # JH: should actually be TP=0.5, but unclear which ALU to assign to
operands: []
latency: 1
port_pressure: [[1, '4567']]
throughput: 0.25
uops: 1
- name: [CDQE, CLTQ] # JH: should actually be TP=0.5, but unclear which ALU to assign to
operands: []
latency: 1
port_pressure: [[1, '4567']]
throughput: 0.25
uops: 1
- name: [inc, dec]
operands:
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: ja
operands:
- class: identifier
throughput: 0.0
latency: 0
port_pressure: []
- name: jge
operands:
- class: identifier
throughput: 0.0
latency: 0
port_pressure: []
- name: jb
operands:
- class: identifier
throughput: 0.0
latency: 0
port_pressure: []
- name: jne
operands:
- class: identifier
throughput: 0.0
latency: 0
port_pressure: []
- name: lea
operands:
- class: memory
base: gpr
offset: '*'
index: '*'
scale: '*'
- class: register
name: gpr
throughput: 0.5
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: mov
operands:
- class: register
name: gpr
- class: register
name: gpr
throughput: 0.0
latency: 0.0
port_pressure: []
- name: [movsxd, movslq] # JH: assumed from agner and port model
operands:
- class: register
name: gpr
- class: register
name: gpr
throughput: 0.25
latency: 1.0
port_pressure: [[1, '4567']]
- name: mulsd
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 4.0 # 1*p01
port_pressure: [[1, '01']]
- name: mulss
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 3.0 # 1*p01
port_pressure: [[1, '01']]
- name: rcpss
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: ~ #1.0
latency: 5.0
port_pressure: []
- name: sqrtsd
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: ~ #8.0
latency: 23.0
port_pressure: []
- name: sqrtss
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: ~ #5.0
latency: 17.0
port_pressure: []
- name: subq
operands:
- class: register
name: gpr
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: subq
operands:
- class: immediate
imd: int
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: vaddpd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 3.0 # 2*p23
port_pressure: [[1, '23']]
- name: vaddpd
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
throughput: 1.0
latency: 3.0 # 2*p23
port_pressure: [[2, '23']]
- name: vaddsd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 3.0 # 1*p23
port_pressure: [[1, '23']]
- name: vaddss
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 3.0 # 1*p23
port_pressure: [[1, '23']]
- name: vdivsd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 4.0
latency: 13.0 # 1*p3+4*p3DV
port_pressure: [[1, '3'], [4.0, [3DV]]]
- name: vdivss
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 3.0
latency: 10.0
port_pressure: [[1, '3'], [3.0, [3DV]]]
- name: vdivpd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 4.0
latency: 8.0
port_pressure: [[1, '3'], [4, ['3DV']]]
- name: vdivpd
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
throughput: 8.0
latency: 8.0
port_pressure: [[1, '3'], [8, ['3DV']]]
- name: vfmadd213pd
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
throughput: 1.0
latency: 5.0 # 2*p01
port_pressure: [[2, '01']]
- name: vfmadd213pd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: vfmadd231pd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: vfmadd231pd
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
throughput: 1.0
latency: 5.0 # 2*p01
port_pressure: [[2, '01']]
- name: vfmadd132pd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 5.0 # 2*p01
port_pressure: [[1, '01']]
- name: vfmadd132pd
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
throughput: 1.0
latency: 5.0 # 2*p01
port_pressure: [[2, '01']]
- name: vmulsd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 4.0 # 1*p01
port_pressure: [[1, '01']]
- name: vmulss
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 3.0 # 1*p01
port_pressure: [[1, '01']]
- name: vmulpd
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.5
latency: 4.0 # 1*p01
port_pressure: [[1, '01']]
- name: vmulpd
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
throughput: 1.0
latency: 4.0 # 2*p01
port_pressure: [[2, '01']]
- name: vmovapd
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.0
latency: 0.0
port_pressure: []
- name: vmovapd
operands:
- class: register
name: ymm
- class: register
name: ymm
throughput: 0.0
latency: 0.0
port_pressure: []
- name: vmovaps
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.0
latency: 0
port_pressure: []
- name: vmovaps
operands:
- class: register
name: ymm
- class: register
name: ymm
throughput: 0.0
latency: 0
port_pressure: []
- name: vmovups
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.0
latency: 0
port_pressure: []
- name: vmovups
operands:
- class: register
name: ymm
- class: register
name: ymm
throughput: 0.0
latency: 0
port_pressure: []
- name: vmovupd
operands:
- class: register
name: ymm
- class: register
name: ymm
throughput: 0.0
latency: 0.0
port_pressure: []
- name: vmovupd
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.0
latency: 0.0
port_pressure: []
- name: vmovsd
operands:
- class: register
name: xmm
- class: register
name: xmm
throughput: 0.0
latency: 0.0
port_pressure: []
- name: POP
operands:
- class: register
name: gpr
latency: ~
port_pressure: [[1, '89'], [1, ['8D','9D']]]
throughput: 0.5
uops:
- name: VFMADD213SD # JH: assumed from ZEN2
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
latency: 5
port_pressure: [[1, '01']]
throughput: 0.5
uops: 1
- name: VFMADD132SD # JH: assumed from ZEN2
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
latency: 5
port_pressure: [[1, '01']]
throughput: 0.5
uops: 1
- name: [jo, jno, js, jns, jp, jpe, jnp, jpo] # JH: assumed from ZEN2
operands:
- class: identifier
throughput: 0.0
latency: 0
port_pressure: []
- name: [jc, jb, jae, jnb, jna, jbe, ja, jnbe] # JH: assumed from ZEN2
operands:
- class: identifier
throughput: 0.0
latency: 0
port_pressure: []
- name: [je, jz, jne, jnz, jl, jnge] # JH: assumed from ZEN2
operands:
- class: identifier
throughput: 0.0
latency: 0
port_pressure: []