mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-05 02:30:08 +01:00
Validating of OSACA predictions for IVB, SKX, ZEN1, ZEN2, A64FX and TX2 with different kernels. build_and_run.py contains the configuration used at RRZE's testcluster and UR's qpace4, Analysis.ipynb contains the analysis script and results. Raw data from measurements (122MB) will be attached to next OSACA release. For now, find the raw data here: https://hawo.net/~sijuhamm/d/UPIhBOtz/validation-data.tar.gz The analysis report can be viewed at https://nbviewer.jupyter.org/github/RRZE-HPC/OSACA/blob/validation/validation/Analysis.ipynb Quite a few changes on OSACA included: Feature: register change tracking via semantic understanding of operations Feature: recording LCD latency along path and exposing this to frontend Feature: support for memory reference aliases Feature: store throughput scaling (similar to load throughput scaling) Fix: model importer works with latest uops.info export Fix: immediate type tracking on ARM now preserves type in internal representaion Removed unused KerncraftAPI
671 lines
14 KiB
YAML
671 lines
14 KiB
YAML
osaca_version: 0.3.4
|
|
micro_architecture: AMD Zen (family 17h)
|
|
arch_code: ZEN1
|
|
isa: x86
|
|
load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0}
|
|
load_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0}
|
|
load_throughput:
|
|
- {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
- {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
- {base: gpr, index: ~, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
- {base: gpr, index: ~, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
- {base: gpr, index: gpr, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
- {base: gpr, index: gpr, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
- {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
- {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
|
load_throughput_default: [[1, '89'], [1, ['8D', '9D']]]
|
|
store_throughput: []
|
|
store_throughput_default: [[1, '89'], [1,[ST]]]
|
|
store_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0}
|
|
store_to_load_forward_latency: 0.0 # JH: according to Agner Fog "little or no penalty"
|
|
|
|
hidden_loads: false
|
|
ports: ['0', '1', '2', '3', 3DV, '4', '5', '6', '7', '8', '9', 8D, 9D, ST]
|
|
port_model_scheme: |
|
|
+--------------------------------------+ +-----------------------------------------------+
|
|
| 96 entries OoO scheduler | | 84 entries OoO scheduler |
|
|
+--------------------------------------+ +-----------------------------------------------+
|
|
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|
|
\/ \/ \/ \/ \/ \/ \/ \/ \/ \/
|
|
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ +-----+ +-----+
|
|
|SSE ALU| |SSE ALU| |SSE ALU| |SSE ALU| | ALU | | ALU | | ALU | | ALU | | AGU | | AGU |
|
|
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ +-----+ +-----+
|
|
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ | |
|
|
|SSE MUL| |SSE MUL| |SSE ADD| |SSE ADD| |BRANCH| | MUL | | DIV | |BRANCH| \/ \/
|
|
+-------+ +-------+ +-------+ +-------+ +------+ +-----+ +-----+ +------+ +-------------+
|
|
+-------+ +-------+ +-------+ +-------+ | LOAD |
|
|
|SSE FMA| |SSE FMA| | SSE | |SSE DIV| +-------------+
|
|
+-------+ +-------+ | SHUF | +-------+ +-------------+
|
|
+-------+ +-------+ | LOAD |
|
|
| SSE | +-------------+
|
|
| SHUF | +-------------+
|
|
+-------+ | STORE |
|
|
+-------------+
|
|
|
|
instruction_forms:
|
|
- name: VPSHUFD
|
|
operands:
|
|
- class: immediate
|
|
imd: int
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
latency: 1
|
|
port_pressure: [[1, '12']]
|
|
throughput: 0.5
|
|
uops: 1
|
|
- name: vmovaps
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: vmovaps
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: VXORPD
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
latency: 1
|
|
port_pressure: [[1, '0123']]
|
|
throughput: 0.25
|
|
uops: 1
|
|
- name: VXORPD
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
latency: 1
|
|
port_pressure: [[1, '0123']]
|
|
throughput: 0.25
|
|
uops: 1
|
|
- name: VPADDD
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
latency: 1
|
|
port_pressure: [[1, '013']]
|
|
throughput: 0.3333333333333333
|
|
uops: 1
|
|
- name: vfmadd231sd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 5.0
|
|
port_pressure: [[1, '01']]
|
|
- name: vcvtsi2sd
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 1.0
|
|
latency: 7.0
|
|
port_pressure: [[1, '5'], [1, '3']]
|
|
- name: vcvtdq2pd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 1.0
|
|
latency: 7.0
|
|
port_pressure: [[1, '12'], [1, '3']]
|
|
- name: vcvtdq2pd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 2.0
|
|
latency: 7.0
|
|
port_pressure: [[2, '12'], [2, '3']]
|
|
- name: add
|
|
operands:
|
|
- class: immediate
|
|
imd: int
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: add
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: addl
|
|
operands:
|
|
- class: immediate
|
|
imd: int
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: addq
|
|
operands:
|
|
- class: immediate
|
|
imd: int
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: cmpl
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: cmpq
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: [CWDE, CWTL] # JH: should actually be TP=0.5, but unclear which ALU to assign to
|
|
operands: []
|
|
latency: 1
|
|
port_pressure: [[1, '4567']]
|
|
throughput: 0.25
|
|
uops: 1
|
|
- name: [CDQE, CLTQ] # JH: should actually be TP=0.5, but unclear which ALU to assign to
|
|
operands: []
|
|
latency: 1
|
|
port_pressure: [[1, '4567']]
|
|
throughput: 0.25
|
|
uops: 1
|
|
- name: [inc, dec]
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: ja
|
|
operands:
|
|
- class: identifier
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: jge
|
|
operands:
|
|
- class: identifier
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: jb
|
|
operands:
|
|
- class: identifier
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: jne
|
|
operands:
|
|
- class: identifier
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: lea
|
|
operands:
|
|
- class: memory
|
|
base: gpr
|
|
offset: '*'
|
|
index: '*'
|
|
scale: '*'
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.5
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: mov
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: [movsxd, movslq] # JH: assumed from agner and port model
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0
|
|
port_pressure: [[1, '4567']]
|
|
- name: mulsd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 4.0 # 1*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: mulss
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 3.0 # 1*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: rcpss
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: ~ #1.0
|
|
latency: 5.0
|
|
port_pressure: []
|
|
- name: sqrtsd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: ~ #8.0
|
|
latency: 23.0
|
|
port_pressure: []
|
|
- name: sqrtss
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: ~ #5.0
|
|
latency: 17.0
|
|
port_pressure: []
|
|
- name: subq
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: subq
|
|
operands:
|
|
- class: immediate
|
|
imd: int
|
|
- class: register
|
|
name: gpr
|
|
throughput: 0.25
|
|
latency: 1.0 # 1*p4567
|
|
port_pressure: [[1, '4567']]
|
|
- name: vaddpd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 3.0 # 2*p23
|
|
port_pressure: [[1, '23']]
|
|
- name: vaddpd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 1.0
|
|
latency: 3.0 # 2*p23
|
|
port_pressure: [[2, '23']]
|
|
- name: vaddsd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 3.0 # 1*p23
|
|
port_pressure: [[1, '23']]
|
|
- name: vaddss
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 3.0 # 1*p23
|
|
port_pressure: [[1, '23']]
|
|
- name: vdivsd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 4.0
|
|
latency: 13.0 # 1*p3+4*p3DV
|
|
port_pressure: [[1, '3'], [4.0, [3DV]]]
|
|
- name: vdivss
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 3.0
|
|
latency: 10.0
|
|
port_pressure: [[1, '3'], [3.0, [3DV]]]
|
|
- name: vdivpd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 4.0
|
|
latency: 8.0
|
|
port_pressure: [[1, '3'], [4, ['3DV']]]
|
|
- name: vdivpd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 8.0
|
|
latency: 8.0
|
|
port_pressure: [[1, '3'], [8, ['3DV']]]
|
|
- name: vfmadd213pd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 1.0
|
|
latency: 5.0 # 2*p01
|
|
port_pressure: [[2, '01']]
|
|
- name: vfmadd213pd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 5.0 # 1*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: vfmadd231pd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 5.0 # 1*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: vfmadd231pd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 1.0
|
|
latency: 5.0 # 2*p01
|
|
port_pressure: [[2, '01']]
|
|
- name: vfmadd132pd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 5.0 # 2*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: vfmadd132pd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 1.0
|
|
latency: 5.0 # 2*p01
|
|
port_pressure: [[2, '01']]
|
|
- name: vmulsd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 4.0 # 1*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: vmulss
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 3.0 # 1*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: vmulpd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.5
|
|
latency: 4.0 # 1*p01
|
|
port_pressure: [[1, '01']]
|
|
- name: vmulpd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 1.0
|
|
latency: 4.0 # 2*p01
|
|
port_pressure: [[2, '01']]
|
|
- name: vmovapd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: vmovapd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: vmovaps
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: vmovaps
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: vmovups
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: vmovups
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: vmovupd
|
|
operands:
|
|
- class: register
|
|
name: ymm
|
|
- class: register
|
|
name: ymm
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: vmovupd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: vmovsd
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
throughput: 0.0
|
|
latency: 0.0
|
|
port_pressure: []
|
|
- name: POP
|
|
operands:
|
|
- class: register
|
|
name: gpr
|
|
latency: ~
|
|
port_pressure: [[1, '89'], [1, ['8D','9D']]]
|
|
throughput: 0.5
|
|
uops:
|
|
- name: VFMADD213SD # JH: assumed from ZEN2
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
latency: 5
|
|
port_pressure: [[1, '01']]
|
|
throughput: 0.5
|
|
uops: 1
|
|
- name: VFMADD132SD # JH: assumed from ZEN2
|
|
operands:
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
- class: register
|
|
name: xmm
|
|
latency: 5
|
|
port_pressure: [[1, '01']]
|
|
throughput: 0.5
|
|
uops: 1
|
|
- name: [jo, jno, js, jns, jp, jpe, jnp, jpo] # JH: assumed from ZEN2
|
|
operands:
|
|
- class: identifier
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: [jc, jb, jae, jnb, jna, jbe, ja, jnbe] # JH: assumed from ZEN2
|
|
operands:
|
|
- class: identifier
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|
|
- name: [je, jz, jne, jnz, jl, jnge] # JH: assumed from ZEN2
|
|
operands:
|
|
- class: identifier
|
|
throughput: 0.0
|
|
latency: 0
|
|
port_pressure: []
|