mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c97f93c39b | ||
|
|
968c71b7b6 | ||
|
|
df26edd075 | ||
|
|
a767b7f290 | ||
|
|
ba45038ad7 | ||
|
|
72e85075c2 | ||
|
|
40839384ec | ||
|
|
ab615547e5 | ||
|
|
9c16f8bc56 | ||
|
|
be891d45d4 | ||
|
|
5735291d27 | ||
|
|
ab368cded1 | ||
|
|
12044e3ac4 | ||
|
|
8454edef73 | ||
|
|
9165306808 |
@@ -95,7 +95,7 @@ The usage of OSACA can be listed as:
|
||||
--arch ARCH
|
||||
needs to be replaced with the target architecture abbreviation.
|
||||
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX`` and ``ICL`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2`` for AMD Zen architectures.
|
||||
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse and ``A64FX`` for Fujitsu's HPC ARM architecture are available.
|
||||
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72 and ``A64FX`` for Fujitsu's HPC ARM architecture are available.
|
||||
If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
|
||||
--fixed
|
||||
Run the throughput analysis with fixed port utilization for all suitable ports per instruction.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Open Source Architecture Code Analyzer"""
|
||||
name = "osaca"
|
||||
__version__ = "0.4.6"
|
||||
__version__ = "0.4.7"
|
||||
|
||||
# To trigger travis deployment to pypi, do the following:
|
||||
# 1. Increment __version___
|
||||
|
||||
4179
osaca/data/a72.yml
Normal file
4179
osaca/data/a72.yml
Normal file
File diff suppressed because it is too large
Load Diff
401
osaca/data/a72/mapping_pmevo.json
Normal file
401
osaca/data/a72/mapping_pmevo.json
Normal file
File diff suppressed because one or more lines are too long
808
osaca/data/a72/template.yml
Normal file
808
osaca/data/a72/template.yml
Normal file
@@ -0,0 +1,808 @@
|
||||
osaca_version: 0.3.11
|
||||
micro_architecture: Cortex A-72
|
||||
arch_code: a72
|
||||
isa: aarch64
|
||||
hidden_loads: false
|
||||
ports: ['0', '1', '2', '3', '4', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
+-------------------------------------------------------------------------------------+
|
||||
| scheduler |
|
||||
+-------------------------------------------------------------------------------------+
|
||||
0 |I 1 |L 2 |M 3 |S 4 |F1 5 |I 6 |F0 7 |B
|
||||
\/ \/ \/ \/ \/ \/ \/ \/
|
||||
+-------+ +-------+ +-------+ +-------+ +-----------+ +-------+ +---------+ +-------+
|
||||
|INT ALU| | LOAD | | MUL | | STORE | | ASIMD | |INT ALU| | ASIMD | | Branch|
|
||||
+-------+ +-------+ +-------+ +-------+ +-----------+ +-------+ +---------+ +-------+
|
||||
+-------+ +-------+ +-----------+ +-------+ +---------+
|
||||
| AGU | | DIV | | FP ALU | | AGU | |ASIMD MUL|
|
||||
+-------+ +-------+ +-----------+ +-------+ +---------+
|
||||
+-------+ +-----------+ +---------+
|
||||
| SHIFT | | FP MUL | | FP ALU |
|
||||
+-------+ +-----------+ +---------+
|
||||
+-------+ +-----------+ +---------+
|
||||
| CRC | | FP DIV | | FP MUL |
|
||||
+-------+ +-----------+ +---------+
|
||||
+-------+ +-----------+ +---------+
|
||||
| USAD | | FP SQRT | | FP DIV |
|
||||
+-------+ +-----------+ +---------+
|
||||
+-----------+ +---------+
|
||||
|ASIMD SHIFT| | FP CONV |
|
||||
+-----------+ +---------+
|
||||
+---------+
|
||||
| CRYPTO |
|
||||
+---------+
|
||||
# The port pressues do not always correctly match this schema, because most
|
||||
# instructions are imported from an experimentally determined mapping, which
|
||||
# is not always correct.
|
||||
load_latency: {x: 4.0, s: 5.0, d: 5.0, h: 6.0, q: 6.0}
|
||||
load_throughput: []
|
||||
load_throughput_default: [[1, '1']]
|
||||
store_throughput: []
|
||||
store_throughput_default: [[2, '3']]
|
||||
instruction_forms:
|
||||
|
||||
# Branch
|
||||
- name: b
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: bne
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: b.ne
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: br
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: ret
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: bl
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '05'], [1, '7']]
|
||||
throughput: 1.0
|
||||
- name: blr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '05'], [1, '7']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load GPR
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load FP d
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load FP q
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 6.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store GPR
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store FP d
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store FP q
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [2, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [2, '05']]
|
||||
throughput: 2.0
|
||||
|
||||
# Load unscaled GPR
|
||||
- name: ldur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load unscaled FP q
|
||||
- name: ldur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store unscaled GPR
|
||||
- name: stur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store unscaled FP q
|
||||
- name: stur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3']]
|
||||
throughput: 2.0
|
||||
|
||||
# Load pair GPR
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load pair FP q
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[2, '1']]
|
||||
throughput: 2.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[2, '1'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 6.0
|
||||
port_pressure: [[2, '1'], [1, '05']]
|
||||
throughput: 2.0
|
||||
|
||||
# Store pair GPR
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3']]
|
||||
throughput: 2.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
|
||||
# Store pair FP q
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[4, '3'], [1, '05']]
|
||||
throughput: 4.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[4, '3'], [1, '05']]
|
||||
throughput: 4.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 4.0
|
||||
port_pressure: [[4, '3'], [1, '05']]
|
||||
throughput: 4.0
|
||||
|
||||
# Fast-forward (measures 4 cycles, but can be 3)
|
||||
# Lower bound is used in order to ensure no over-estimates are possible.
|
||||
# Ports do not match documentation, but "fixing" requires also "fixing" almost
|
||||
# the entire rest of the model.
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
|
||||
# Automatically generated instructions
|
||||
@@ -212,6 +212,67 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: not
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: not
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: and
|
||||
operands:
|
||||
- class: "immediate"
|
||||
@@ -4085,6 +4146,55 @@ instruction_forms:
|
||||
name: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- name: xor
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- name: xor
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: xor
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: xor
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- name: xor
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands:
|
||||
|
||||
321
osaca/data/pmevo_importer.py
Executable file
321
osaca/data/pmevo_importer.py
Executable file
@@ -0,0 +1,321 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
|
||||
from asmbench import bench, op
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
def build_bench_instruction(name, operands):
|
||||
# Converts an OSACA model instruction to an asmbench one.
|
||||
# Returns `None` in case something went wrong.
|
||||
asmbench_inst = name
|
||||
direction = "dst"
|
||||
separator = " "
|
||||
shift = ""
|
||||
for operand in operands:
|
||||
if operand["class"] == "register" or operand["class"] == "register_shift":
|
||||
if operand["prefix"] == "x":
|
||||
shape = "i64"
|
||||
constraint = "r"
|
||||
elif operand["prefix"] == "s":
|
||||
shape = "float"
|
||||
constraint = "w"
|
||||
elif operand["prefix"] == "d":
|
||||
shape = "double"
|
||||
constraint = "w"
|
||||
elif operand["prefix"] == "v":
|
||||
constraint = "w"
|
||||
if operand["shape"] == "b":
|
||||
shape = "<16 x i8>"
|
||||
elif operand["shape"] == "h":
|
||||
shape = "<8 x i16>"
|
||||
elif operand["shape"] == "s":
|
||||
shape = "<4 x float>"
|
||||
elif operand["shape"] == "d":
|
||||
shape = "<2 x double>"
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
if operand["class"] == "register_shift":
|
||||
shift = ", {}".format(operand["shift_op"])
|
||||
if operand["shift"] is not None:
|
||||
shift += " {}".format(operand["shift"])
|
||||
elif operand["class"] == "immediate" or operand["class"] == "immediate_shift":
|
||||
shape = "i32"
|
||||
# Different instructions have different ranges for literaly,
|
||||
# so need to pick something "reasonable" for each.
|
||||
if name in [
|
||||
"cmeq",
|
||||
"cmge",
|
||||
"cmgt",
|
||||
"cmle",
|
||||
"cmlt",
|
||||
"fcmeq",
|
||||
"fcmge",
|
||||
"fcmgt",
|
||||
"fcmle",
|
||||
"fcmlt",
|
||||
"fcmp",
|
||||
]:
|
||||
constraint = "0"
|
||||
elif name in ["and", "ands", "eor", "eors", "orr", "orrs"]:
|
||||
constraint = "255"
|
||||
elif name in ["bfi", "extr", "sbfiz", "sbfx", "shl", "sshr", "ubfiz", "ubfx", "ushr"]:
|
||||
constraint = "7"
|
||||
else:
|
||||
constraint = "42"
|
||||
if operand["class"] == "immediate_shift":
|
||||
shift = ", {}".format(operand["shift_op"])
|
||||
if operand["shift"] is not None:
|
||||
shift += " {}".format(operand["shift"])
|
||||
else:
|
||||
return None
|
||||
asmbench_inst += "{}{{{}:{}:{}}}{}".format(separator, direction, shape, constraint, shift)
|
||||
direction = "src"
|
||||
separator = ", "
|
||||
return asmbench_inst
|
||||
|
||||
|
||||
def bench_instruction(name, operands):
|
||||
# Converts an OSACA model instruction to an asmbench one and benchmarks it.
|
||||
# Returned tuple may contain a `None` in case something went wrong.
|
||||
asmbench_inst = build_bench_instruction(name, operands)
|
||||
if asmbench_inst is None:
|
||||
return (None, None)
|
||||
return bench.bench_instructions([op.Instruction.from_string(asmbench_inst)])
|
||||
|
||||
|
||||
def round_cycles(value):
|
||||
if value < 0.9:
|
||||
# Frequently found, so we might want to include them.
|
||||
# Measurements over-estimate a lot here, hence the high bound.
|
||||
return 0.5
|
||||
else:
|
||||
# Measurements usually over-estimate, so usually round down,
|
||||
# but still allow slightly smaller values.
|
||||
return float(math.floor(value + 0.1))
|
||||
|
||||
|
||||
def operand_parse(op, state):
|
||||
# Parses an operand from an PMEvo instruction and emits an OSACA model one.
|
||||
# State object is used to keep track of types for future operands, e.g. literals.
|
||||
# Future invocations may also modify previously returned objects.
|
||||
parameter = {}
|
||||
|
||||
if op.startswith("_((REG:"):
|
||||
parts = op.split(".")
|
||||
register = parts[0][7:-2]
|
||||
read_write, register_type, bits = register.split(":")
|
||||
|
||||
parameter["class"] = "register"
|
||||
if register_type == "G":
|
||||
if bits == "32":
|
||||
parameter["prefix"] = "r"
|
||||
elif bits == "64":
|
||||
parameter["prefix"] = "x"
|
||||
else:
|
||||
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
|
||||
elif register_type == "F":
|
||||
if bits == "32":
|
||||
parameter["prefix"] = "s"
|
||||
state["type"] = "float"
|
||||
elif bits == "64":
|
||||
parameter["prefix"] = "d"
|
||||
state["type"] = "double"
|
||||
elif bits == "128":
|
||||
parameter["prefix"] = "q"
|
||||
elif bits == "VEC":
|
||||
vec_shape = parts[1]
|
||||
parameter["prefix"] = "v"
|
||||
if vec_shape == "16b":
|
||||
parameter["shape"] = "b"
|
||||
elif vec_shape == "8h":
|
||||
parameter["shape"] = "h"
|
||||
elif vec_shape == "4s":
|
||||
parameter["shape"] = "s"
|
||||
state["type"] = "float"
|
||||
elif vec_shape == "2d":
|
||||
parameter["shape"] = "d"
|
||||
state["type"] = "double"
|
||||
else:
|
||||
raise ValueError("Invalid vector shape {}".format(vec_shape))
|
||||
else:
|
||||
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
|
||||
else:
|
||||
raise ValueError("Unknown register type {}".format(register_type))
|
||||
elif op.startswith("_[((MEM:"):
|
||||
bits = op[8:-2].split(":")[0]
|
||||
if bits == "64":
|
||||
state["memory_base"] = "x"
|
||||
else:
|
||||
raise ValueError("Invalid register bits for MEM {}".format(bits))
|
||||
return None
|
||||
elif op.startswith("_((MIMM:"):
|
||||
bits = op[8:-3].split(":")[0]
|
||||
if bits == "16":
|
||||
parameter["class"] = "memory"
|
||||
parameter["base"] = state["memory_base"]
|
||||
parameter["offset"] = "imd"
|
||||
parameter["index"] = "*"
|
||||
parameter["scale"] = "*"
|
||||
parameter["post-indexed"] = False
|
||||
parameter["pre-indexed"] = False
|
||||
else:
|
||||
raise ValueError("Invalid register bits for MEM {}".format(bits))
|
||||
elif re.fullmatch("_#?-?(0x)?[0-9a-f]+", op):
|
||||
parameter["class"] = "immediate"
|
||||
parameter["imd"] = "int"
|
||||
elif re.fullmatch("_#?-?[0-9]*\\.[0-9]*", op):
|
||||
parameter["class"] = "immediate"
|
||||
parameter["imd"] = state["type"]
|
||||
elif re.fullmatch("_((sxt|uxt)[bhw]|lsl|lsr|asr|rol|ror)(_[0-9]+)?", op):
|
||||
# split = op[1:].split('_')
|
||||
# shift_op = split[0]
|
||||
# shift = None
|
||||
# if len(split) >= 2:
|
||||
# shift = split[1]
|
||||
# state['previous']['class'] += '_shift'
|
||||
# state['previous']['shift_op'] = shift_op
|
||||
# if shift != None:
|
||||
# state['previous']['shift'] = shift
|
||||
# return None
|
||||
raise ValueError("Skipping instruction with shift operand: {}".format(op))
|
||||
else:
|
||||
raise ValueError("Unknown operand {}".format(op))
|
||||
|
||||
state["previous"] = parameter
|
||||
return parameter
|
||||
|
||||
|
||||
def port_convert(ports):
|
||||
# Try to merge repeated entries together and emit in OSACA's format.
|
||||
# FIXME: This does not handle having more than 10 ports.
|
||||
pressures = []
|
||||
previous = None
|
||||
cycles = 0
|
||||
|
||||
for entry in ports:
|
||||
possible_ports = "".join(entry)
|
||||
|
||||
if possible_ports != previous:
|
||||
if previous is not None:
|
||||
pressures.append([cycles, previous])
|
||||
previous = possible_ports
|
||||
cycles = 0
|
||||
|
||||
cycles += 1
|
||||
|
||||
if previous is not None:
|
||||
pressures.append([cycles, previous])
|
||||
|
||||
return pressures
|
||||
|
||||
|
||||
def throughput_guess(ports):
|
||||
# Minimum amount of possible ports per cycle should determine throughput
|
||||
# to some degree of accuracy. (THIS IS *NOT* ALWAYS TRUE!)
|
||||
bottleneck_ports = min(map(lambda it: len(it), ports))
|
||||
return float(len(ports)) / bottleneck_ports
|
||||
|
||||
|
||||
def latency_guess(ports):
|
||||
# Each entry in the ports array equates to one cycle on any of the ports.
|
||||
# So this is about as good as it is going to get.
|
||||
return float(len(ports))
|
||||
|
||||
|
||||
def extract_model(mapping, arch, template_model, asmbench):
|
||||
try:
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
except ValueError:
|
||||
print("Skipping...", file=sys.stderr)
|
||||
return None
|
||||
if template_model is None:
|
||||
mm = MachineModel(isa=isa)
|
||||
else:
|
||||
mm = template_model
|
||||
|
||||
for port in mapping["arch"]["ports"]:
|
||||
mm.add_port(port)
|
||||
|
||||
for insn in mapping["arch"]["insns"]:
|
||||
try:
|
||||
ports = mapping["assignment"][insn]
|
||||
|
||||
# Parse instruction
|
||||
insn_split = insn.split("_")
|
||||
name = insn_split[1]
|
||||
insn_parts = list(("_" + "_".join(insn_split[2:])).split(","))
|
||||
operands = []
|
||||
state = {}
|
||||
for operand in insn_parts:
|
||||
parsed = operand_parse(operand, state)
|
||||
if parsed is not None:
|
||||
operands.append(parsed)
|
||||
|
||||
# Port pressures from mapping
|
||||
port_pressure = port_convert(ports)
|
||||
|
||||
# Initial guessed throughput and latency
|
||||
throughput = throughput_guess(ports)
|
||||
latency = latency_guess(ports)
|
||||
|
||||
# Benchmark with asmbench
|
||||
# print(build_bench_instruction(name, operands))
|
||||
if asmbench:
|
||||
bench_latency, bench_throughput = bench_instruction(name, operands)
|
||||
if bench_throughput is not None:
|
||||
throughput = round_cycles(bench_throughput)
|
||||
else:
|
||||
print("Failed to measure throughput for instruction {}.".format(insn))
|
||||
if bench_latency is not None:
|
||||
latency = round_cycles(bench_latency)
|
||||
else:
|
||||
print("Failed to measure latency for instruction {}.".format(insn))
|
||||
|
||||
# No u-ops data available
|
||||
uops = None
|
||||
|
||||
# Insert instruction if not already found (can happen with template)
|
||||
if mm.get_instruction(name, operands) is None:
|
||||
mm.set_instruction(name, operands, latency, port_pressure, throughput, uops)
|
||||
except ValueError as e:
|
||||
print("Failed to parse instruction {}: {}.".format(insn, e))
|
||||
|
||||
return mm
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("json", help="path of mapping.json")
|
||||
parser.add_argument("yaml", help="path of template.yml", nargs="?")
|
||||
parser.add_argument(
|
||||
"--asmbench", help="Benchmark latency and throughput using asmbench.", action="store_true"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
json_file = open(args.json, "r")
|
||||
mapping = json.load(json_file)
|
||||
arch = mapping["arch"]["name"].lower()
|
||||
json_file.close()
|
||||
|
||||
template_model = None
|
||||
if args.yaml is not None:
|
||||
template_model = MachineModel(path_to_yaml=args.yaml)
|
||||
|
||||
if args.asmbench:
|
||||
bench.setup_llvm()
|
||||
|
||||
model = extract_model(mapping, arch, template_model, args.asmbench)
|
||||
|
||||
with open("{}.yml".format(arch.lower()), "w") as f:
|
||||
f.write(model.dump())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -234,7 +234,7 @@ class Frontend(object):
|
||||
separator += "--" + len(str(kernel[-1]["line_number"])) * "-"
|
||||
col_sep = "|"
|
||||
# for LCD/CP column
|
||||
separator += "-" * (2 * 6 + len(col_sep)) + "-" * len(col_sep)
|
||||
separator += "-" * (2 * 6 + len(col_sep)) + "-" * len(col_sep) + "--"
|
||||
sep_list = self._get_separator_list(col_sep)
|
||||
headline = "Port pressure in cycles"
|
||||
headline_str = "{{:^{}}}".format(len(separator))
|
||||
@@ -249,17 +249,14 @@ class Frontend(object):
|
||||
instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
|
||||
}
|
||||
|
||||
s += headline_str.format(headline) + "\n"
|
||||
s += (
|
||||
(
|
||||
lineno_filler
|
||||
+ self._get_port_number_line(port_len, separator=col_sep)
|
||||
+ "{}{:^6}{}{:^6}{}".format(col_sep, "CP", col_sep, "LCD", col_sep)
|
||||
)
|
||||
+ "\n"
|
||||
+ separator
|
||||
+ "\n"
|
||||
port_line = (
|
||||
lineno_filler
|
||||
+ self._get_port_number_line(port_len, separator=col_sep)
|
||||
+ "{}{:^6}{}{:^6}{}".format(col_sep, "CP", col_sep, "LCD", col_sep)
|
||||
)
|
||||
separator = "-" * len(port_line)
|
||||
s += headline_str.format(headline) + "\n"
|
||||
s += port_line + "\n" + separator + "\n"
|
||||
for instruction_form in kernel:
|
||||
if show_cmnts is False and self._is_comment(instruction_form):
|
||||
continue
|
||||
@@ -300,7 +297,7 @@ class Frontend(object):
|
||||
s += (
|
||||
lineno_filler
|
||||
+ self._get_port_pressure(tp_sum, port_len, separator=" ")
|
||||
+ " {:^6} {:^6}\n".format(cp_sum, lcd_sum)
|
||||
+ " {:>5} {:>5} \n".format(cp_sum, lcd_sum)
|
||||
)
|
||||
return s
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ SUPPORTED_ARCHS = [
|
||||
"TX2",
|
||||
"N1",
|
||||
"A64FX",
|
||||
"A72",
|
||||
]
|
||||
DEFAULT_ARCHS = {
|
||||
"aarch64": "A64FX",
|
||||
@@ -95,7 +96,7 @@ def create_parser(parser=None):
|
||||
"--arch",
|
||||
type=str,
|
||||
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, N1, "
|
||||
"A64FX). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.",
|
||||
"A64FX, A72). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fixed",
|
||||
|
||||
@@ -266,11 +266,13 @@ class MachineModel(object):
|
||||
"""Return ISA for given micro-arch ``arch``."""
|
||||
arch_dict = {
|
||||
"a64fx": "aarch64",
|
||||
"a72": "aarch64",
|
||||
"tx2": "aarch64",
|
||||
"n1": "aarch64",
|
||||
"zen1": "x86",
|
||||
"zen+": "x86",
|
||||
"zen2": "x86",
|
||||
"icl": "x86",
|
||||
"con": "x86", # Intel Conroe
|
||||
"wol": "x86", # Intel Wolfdale
|
||||
"snb": "x86",
|
||||
|
||||
@@ -215,6 +215,7 @@ class KernelDG(nx.DiGraph):
|
||||
max_latency_instr = max(self.kernel, key=lambda k: k["latency"])
|
||||
if nx.algorithms.dag.is_directed_acyclic_graph(self.dg):
|
||||
longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight="latency")
|
||||
# TODO verify that we can remove the next two lince due to earlier initialization
|
||||
for line_number in longest_path:
|
||||
self._get_node_by_lineno(int(line_number))["latency_cp"] = 0
|
||||
# set cp latency to instruction
|
||||
@@ -223,6 +224,9 @@ class KernelDG(nx.DiGraph):
|
||||
node = self._get_node_by_lineno(int(s))
|
||||
node["latency_cp"] = self.dg.edges[(s, d)]["latency"]
|
||||
path_latency += node["latency_cp"]
|
||||
# add latency for last instruction
|
||||
node = self._get_node_by_lineno(longest_path[-1])
|
||||
node["latency_cp"] = node["latency"]
|
||||
if max_latency_instr["latency"] > path_latency:
|
||||
max_latency_instr["latency_cp"] = float(max_latency_instr["latency"])
|
||||
return [max_latency_instr]
|
||||
|
||||
Reference in New Issue
Block a user