mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
Compare commits
52 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c97f93c39b | ||
|
|
968c71b7b6 | ||
|
|
df26edd075 | ||
|
|
a767b7f290 | ||
|
|
ba45038ad7 | ||
|
|
72e85075c2 | ||
|
|
40839384ec | ||
|
|
ab615547e5 | ||
|
|
9c16f8bc56 | ||
|
|
be891d45d4 | ||
|
|
5735291d27 | ||
|
|
ab368cded1 | ||
|
|
6e99954f0b | ||
|
|
5205cb5cc6 | ||
|
|
e6ce870ca0 | ||
|
|
566fbc6bc4 | ||
|
|
b70cff21ad | ||
|
|
d181184788 | ||
|
|
fcc3475417 | ||
|
|
d418c16f4a | ||
|
|
34523e1b23 | ||
|
|
457ccdcf77 | ||
|
|
ff61c65d58 | ||
|
|
615c809fe3 | ||
|
|
bce837dec9 | ||
|
|
090c24ade1 | ||
|
|
03a2a1da33 | ||
|
|
d59b100fa8 | ||
|
|
5c741a8a2d | ||
|
|
2f4849f44e | ||
|
|
f13a97e5b5 | ||
|
|
66282b0eef | ||
|
|
9ec7c161ab | ||
|
|
8d8eaa8e4f | ||
|
|
88d5094bf1 | ||
|
|
1f32252f91 | ||
|
|
1de644cd62 | ||
|
|
3d1c6aae8d | ||
|
|
dafec70e6e | ||
|
|
6d85fbe9e4 | ||
|
|
3f31235f8a | ||
|
|
cfc061e5e3 | ||
|
|
5eb3e07ad6 | ||
|
|
a82a0e24a3 | ||
|
|
6db08c7e8e | ||
|
|
e6a54ee131 | ||
|
|
152360bad2 | ||
|
|
607d459569 | ||
|
|
b033b3b7aa | ||
|
|
12044e3ac4 | ||
|
|
8454edef73 | ||
|
|
9165306808 |
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -25,4 +25,6 @@ jobs:
|
||||
github_token: ${{ secrets.github_token }}
|
||||
# Enable linters
|
||||
black: true
|
||||
black_args: "-l 99"
|
||||
flake8: true
|
||||
flake8_args: "--max-line-length=99 --extend-ignore=E203,E501"
|
||||
|
||||
4
.github/workflows/test-n-publish.yml
vendored
4
.github/workflows/test-n-publish.yml
vendored
@@ -31,11 +31,11 @@ jobs:
|
||||
- uses: codecov/codecov-action@v1
|
||||
- name: Build package
|
||||
run: |
|
||||
python setup.py build sdist
|
||||
python setup.py build sdist bdist_wheel
|
||||
- name: Publish to PyPI
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
|
||||
uses: pypa/gh-action-pypi-publish@master
|
||||
with:
|
||||
skip_existing: true
|
||||
user: __token__
|
||||
password: ${{ secrets.pypi_password }}
|
||||
password: ${{ secrets.pypi_password }}
|
||||
|
||||
22
README.rst
22
README.rst
@@ -82,10 +82,10 @@ The usage of OSACA can be listed as:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
osaca [-h] [-V] [--arch ARCH] [--fixed] [--lines LINES] [--db-check]
|
||||
[--import MICROBENCH] [--insert-marker]
|
||||
[--export-graph GRAPHNAME] [--ignore-unknown] [--verbose]
|
||||
[--out OUT]
|
||||
osaca [-h] [-V] [--arch ARCH] [--fixed] [--lines LINES]
|
||||
[--ignore-unknown] [--lcd-timeout SECONDS]
|
||||
[--db-check] [--import MICROBENCH] [--insert-marker]
|
||||
[--export-graph GRAPHNAME] [--out OUT] [--verbose]
|
||||
FILEPATH
|
||||
|
||||
-h, --help
|
||||
@@ -95,7 +95,7 @@ The usage of OSACA can be listed as:
|
||||
--arch ARCH
|
||||
needs to be replaced with the target architecture abbreviation.
|
||||
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX`` and ``ICL`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2`` for AMD Zen architectures.
|
||||
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse and ``A64FX`` for Fujitsu's HPC ARM architecture are available.
|
||||
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72 and ``A64FX`` for Fujitsu's HPC ARM architecture are available.
|
||||
If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
|
||||
--fixed
|
||||
Run the throughput analysis with fixed port utilization for all suitable ports per instruction.
|
||||
@@ -118,6 +118,9 @@ The usage of OSACA can be listed as:
|
||||
--ignore-unknown
|
||||
Force OSACA to apply a throughput and latency of 0.0 cy for all unknown instruction forms.
|
||||
If not specified, a warning will be printed instead if one ore more isntruction form is unknown to OSACA.
|
||||
--lcd-timeout SECONDS
|
||||
Set timeout in seconds for LCD analysis. After timeout, OSACA will continue its analysis with the dependency paths found up to this point.
|
||||
Defaults to `10`.
|
||||
-v, --verbose
|
||||
Increases verbosity level
|
||||
-o OUT, --out OUT
|
||||
@@ -370,9 +373,16 @@ In the bottom, all loop-carried dependencies are shown, each with a list of line
|
||||
|
||||
You can find more (already marked) examples and sample outputs for various architectures in the `examples <examples/>`__ directory.
|
||||
|
||||
Citations
|
||||
=========
|
||||
If you use OSACA for scientific work you can cite us as (for the Bibtex, see the `Wiki <https://github.com/RRZE-HPC/OSACA/wiki#acknowledgement>`_):
|
||||
|
||||
* `Automated Instruction Stream Throughput Prediction for Intel and AMD Microarchitectures <https://doi.org/10.1109/PMBS.2018.8641578>`_ (`Pre-print PMBS18 <https://arxiv.org/abs/1809.00912>`_)
|
||||
* `Automatic Throughput and Critical Path Analysis of x86 and ARM Assembly Kernels <https://doi.org/10.1109/PMBS49563.2019.00006>`_ (`Pre-print PMBS19 <https://arxiv.org/abs/1910.00214>`_)
|
||||
|
||||
Credits
|
||||
=======
|
||||
Implementation: Jan Laukemann
|
||||
Implementation: Jan Laukemann, Julian Hammer
|
||||
|
||||
License
|
||||
=======
|
||||
|
||||
@@ -7,7 +7,8 @@ import re
|
||||
def __read(*names, **kwargs):
|
||||
"""Reads in file"""
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8"),
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""Open Source Architecture Code Analyzer"""
|
||||
name = "osaca"
|
||||
__version__ = "0.4.0"
|
||||
__version__ = "0.4.7"
|
||||
|
||||
# To trigger travis deployment to pypi, do the following:
|
||||
# 1. Increment __version___
|
||||
# 2. commit to RRZE-HPC/osaca's master branch
|
||||
# 3. wait for travis to complete successful (unless already tested)
|
||||
# 3. wait for Github Actions to complete successful (unless already tested)
|
||||
# 4. tag commit with 'v{}'.format(__version__) (`git tag vX.Y.Z`)
|
||||
# 5. push tag to github (`git push origin vX.Y.Z` or push all tags with `git push --tags`)
|
||||
|
||||
@@ -520,7 +520,7 @@ instruction_forms:
|
||||
width: '512'
|
||||
throughput: 11.5
|
||||
latency: 49.0 # 11*p0+12*p02
|
||||
port_pressure: [[10, '0'], [12, '02']]
|
||||
port_pressure: [[9, '0'], [14, '02']]
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1095,7 +1095,7 @@ instruction_forms:
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [2, ['5D', '6D']]]
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1d
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1113,7 +1113,7 @@ instruction_forms:
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [2, ['5D', '6D']]]
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1d
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1132,6 +1132,351 @@ instruction_forms:
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld1w
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: ~
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1w
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: x
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1w
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: z
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld1h
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: ~
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1h
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: x
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1h
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: z
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld1b
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: ~
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1b
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: x
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1b
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: z
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld1sw
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: ~
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1sw
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: x
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1sw
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: z
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld1sh
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: ~
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1sh
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: x
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1sh
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: z
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld1sb
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: ~
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1sb
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: x
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 1*p56+1*p5D6D
|
||||
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
|
||||
- name: ld1sb
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: z
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld2d
|
||||
operands:
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[2, '56'], [4, ['5D', '6D']]]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1414,6 +1759,22 @@ instruction_forms:
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: ld2
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 11.0 # 1*p56+2*p5D6D
|
||||
port_pressure: [[1, '56'], [2, ['5D','6D']]]
|
||||
- name: lsl
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1980,6 +2341,43 @@ instruction_forms:
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p5+1*p6+1*p0
|
||||
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
|
||||
- name: st2d
|
||||
operands:
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p5+1*p6+1*p0
|
||||
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
|
||||
- name: st2
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 11.0 # 1*p56+2*p5D6D
|
||||
port_pressure: [[1, '5'], [1, ['6']], [1, '0']]
|
||||
- name: sub
|
||||
operands:
|
||||
- class: register
|
||||
|
||||
4179
osaca/data/a72.yml
Normal file
4179
osaca/data/a72.yml
Normal file
File diff suppressed because it is too large
Load Diff
401
osaca/data/a72/mapping_pmevo.json
Normal file
401
osaca/data/a72/mapping_pmevo.json
Normal file
File diff suppressed because one or more lines are too long
808
osaca/data/a72/template.yml
Normal file
808
osaca/data/a72/template.yml
Normal file
@@ -0,0 +1,808 @@
|
||||
osaca_version: 0.3.11
|
||||
micro_architecture: Cortex A-72
|
||||
arch_code: a72
|
||||
isa: aarch64
|
||||
hidden_loads: false
|
||||
ports: ['0', '1', '2', '3', '4', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
+-------------------------------------------------------------------------------------+
|
||||
| scheduler |
|
||||
+-------------------------------------------------------------------------------------+
|
||||
0 |I 1 |L 2 |M 3 |S 4 |F1 5 |I 6 |F0 7 |B
|
||||
\/ \/ \/ \/ \/ \/ \/ \/
|
||||
+-------+ +-------+ +-------+ +-------+ +-----------+ +-------+ +---------+ +-------+
|
||||
|INT ALU| | LOAD | | MUL | | STORE | | ASIMD | |INT ALU| | ASIMD | | Branch|
|
||||
+-------+ +-------+ +-------+ +-------+ +-----------+ +-------+ +---------+ +-------+
|
||||
+-------+ +-------+ +-----------+ +-------+ +---------+
|
||||
| AGU | | DIV | | FP ALU | | AGU | |ASIMD MUL|
|
||||
+-------+ +-------+ +-----------+ +-------+ +---------+
|
||||
+-------+ +-----------+ +---------+
|
||||
| SHIFT | | FP MUL | | FP ALU |
|
||||
+-------+ +-----------+ +---------+
|
||||
+-------+ +-----------+ +---------+
|
||||
| CRC | | FP DIV | | FP MUL |
|
||||
+-------+ +-----------+ +---------+
|
||||
+-------+ +-----------+ +---------+
|
||||
| USAD | | FP SQRT | | FP DIV |
|
||||
+-------+ +-----------+ +---------+
|
||||
+-----------+ +---------+
|
||||
|ASIMD SHIFT| | FP CONV |
|
||||
+-----------+ +---------+
|
||||
+---------+
|
||||
| CRYPTO |
|
||||
+---------+
|
||||
# The port pressues do not always correctly match this schema, because most
|
||||
# instructions are imported from an experimentally determined mapping, which
|
||||
# is not always correct.
|
||||
load_latency: {x: 4.0, s: 5.0, d: 5.0, h: 6.0, q: 6.0}
|
||||
load_throughput: []
|
||||
load_throughput_default: [[1, '1']]
|
||||
store_throughput: []
|
||||
store_throughput_default: [[2, '3']]
|
||||
instruction_forms:
|
||||
|
||||
# Branch
|
||||
- name: b
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: bne
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: b.ne
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: br
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: ret
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '7']]
|
||||
throughput: 1.0
|
||||
- name: bl
|
||||
operands:
|
||||
- class: identifier
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '05'], [1, '7']]
|
||||
throughput: 1.0
|
||||
- name: blr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '05'], [1, '7']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load GPR
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load FP d
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load FP q
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 6.0
|
||||
port_pressure: [[1, '1'], [2, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store GPR
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store FP d
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store FP q
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [2, '05']]
|
||||
throughput: 2.0
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 4.0
|
||||
port_pressure: [[2, '3'], [2, '05']]
|
||||
throughput: 2.0
|
||||
|
||||
# Load unscaled GPR
|
||||
- name: ldur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load unscaled FP q
|
||||
- name: ldur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 5.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store unscaled GPR
|
||||
- name: stur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '3']]
|
||||
throughput: 1.0
|
||||
|
||||
# Store unscaled FP q
|
||||
- name: stur
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: '*'
|
||||
pre-indexed: '*'
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3']]
|
||||
throughput: 2.0
|
||||
|
||||
# Load pair GPR
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 4.0
|
||||
port_pressure: [[1, '1'], [1, '05']]
|
||||
throughput: 1.0
|
||||
|
||||
# Load pair FP q
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[2, '1']]
|
||||
throughput: 2.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 6.0
|
||||
port_pressure: [[2, '1'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 6.0
|
||||
port_pressure: [[2, '1'], [1, '05']]
|
||||
throughput: 2.0
|
||||
|
||||
# Store pair GPR
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3']]
|
||||
throughput: 2.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 2.0
|
||||
port_pressure: [[2, '3'], [1, '05']]
|
||||
throughput: 2.0
|
||||
|
||||
# Store pair FP q
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[4, '3'], [1, '05']]
|
||||
throughput: 4.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: true
|
||||
pre-indexed: false
|
||||
latency: 4.0
|
||||
port_pressure: [[4, '3'], [1, '05']]
|
||||
throughput: 4.0
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: true
|
||||
latency: 4.0
|
||||
port_pressure: [[4, '3'], [1, '05']]
|
||||
throughput: 4.0
|
||||
|
||||
# Fast-forward (measures 4 cycles, but can be 3)
|
||||
# Lower bound is used in order to ensure no over-estimates are possible.
|
||||
# Ports do not match documentation, but "fixing" requires also "fixing" almost
|
||||
# the entire rest of the model.
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
- class: register
|
||||
prefix: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
latency: 3.0
|
||||
port_pressure: [[1, '5']]
|
||||
throughput: 1.0
|
||||
|
||||
# Automatically generated instructions
|
||||
@@ -163,6 +163,16 @@ instruction_forms:
|
||||
port_pressure: [[1, '06']] # JH: assumed from SKX
|
||||
throughput: 0.5 # JH: measured on casclakesp2
|
||||
uops: 1
|
||||
- name: BT
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 1
|
||||
port_pressure: [[1, '06']] # JH: assumed from SKX
|
||||
throughput: 0.5 # JH: measured on casclakesp2
|
||||
uops: 1
|
||||
- name: BTS
|
||||
operands:
|
||||
- class: immediate
|
||||
@@ -369,6 +379,11 @@ instruction_forms:
|
||||
throughput: 0.25
|
||||
latency: 1.0
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: lock
|
||||
operands: []
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: cmp
|
||||
operands:
|
||||
- class: register
|
||||
@@ -746,6 +761,15 @@ instruction_forms:
|
||||
port_pressure: [[1, '06']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: SAR
|
||||
operands:
|
||||
# assume implicit immediate (0)
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 1
|
||||
port_pressure: [[1, '06']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: SARX
|
||||
operands:
|
||||
- class: register
|
||||
@@ -788,6 +812,15 @@ instruction_forms:
|
||||
port_pressure: [[1, '06']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: [shr, shl]
|
||||
# assume implicit immediate (0)
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 1
|
||||
port_pressure: [[1, '06']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: SHR
|
||||
operands:
|
||||
- class: immediate
|
||||
@@ -840,7 +873,7 @@ instruction_forms:
|
||||
throughput: 3.0
|
||||
latency: 16.0 # 1"*"p0+3"*"p0DV
|
||||
port_pressure: [[1, '0'], [3.0, [0DV]]]
|
||||
- name: subq
|
||||
- name: sub
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
@@ -849,6 +882,15 @@ instruction_forms:
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1"*"p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: sub
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1"*"p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: TEST
|
||||
operands:
|
||||
- class: immediate
|
||||
@@ -4620,7 +4662,7 @@ instruction_forms:
|
||||
port_pressure: [[1, '23'], [1, ['2D', '3D']]] # ./generate_mov_entries.py csx
|
||||
throughput: 0.5 # ./generate_mov_entries.py csx
|
||||
uops: 2 # ./generate_mov_entries.py csx
|
||||
- name: mov # ./generate_mov_entries.py csx
|
||||
- name: [mov, movabs] # ./generate_mov_entries.py csx
|
||||
operands: # ./generate_mov_entries.py csx
|
||||
- class: immediate # ./generate_mov_entries.py csx
|
||||
imd: int # ./generate_mov_entries.py csx
|
||||
@@ -4630,7 +4672,7 @@ instruction_forms:
|
||||
port_pressure: [[1, '0156']] # ./generate_mov_entries.py csx
|
||||
throughput: 0.25 # ./generate_mov_entries.py csx
|
||||
uops: 1 # ./generate_mov_entries.py csx
|
||||
- name: mov # with store, simple AGU # ./generate_mov_entries.py csx
|
||||
- name: [mov, movabs] # with store, simple AGU # ./generate_mov_entries.py csx
|
||||
operands: # ./generate_mov_entries.py csx
|
||||
- class: immediate # ./generate_mov_entries.py csx
|
||||
imd: int # ./generate_mov_entries.py csx
|
||||
@@ -4643,7 +4685,7 @@ instruction_forms:
|
||||
port_pressure: [[1, '237'], [1, '4']] # ./generate_mov_entries.py csx
|
||||
throughput: 1.0 # ./generate_mov_entries.py csx
|
||||
uops: 2 # ./generate_mov_entries.py csx
|
||||
- name: mov # with store, complex AGU # ./generate_mov_entries.py csx
|
||||
- name: [mov, movabs] # with store, complex AGU # ./generate_mov_entries.py csx
|
||||
operands: # ./generate_mov_entries.py csx
|
||||
- class: immediate # ./generate_mov_entries.py csx
|
||||
imd: int # ./generate_mov_entries.py csx
|
||||
@@ -8738,6 +8780,17 @@ instruction_forms:
|
||||
port_pressure: [[1, '0156']] # model_importer.py SKX
|
||||
throughput: 0.25 # model_importer.py SKX
|
||||
uops: 0 # model_importer.py SKX
|
||||
- name: NOP
|
||||
operands:
|
||||
- class: memory
|
||||
base: '*'
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
latency: ~
|
||||
port_pressure: [[1, '0156']]
|
||||
throughput: 0.25
|
||||
uops: 0
|
||||
- name: INC # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
@@ -8767,6 +8820,14 @@ instruction_forms:
|
||||
port_pressure: [[3, '0156'], [2, '23']] # model_importer.py SKX
|
||||
throughput: 1.0 # model_importer.py SKX
|
||||
uops: 5 # model_importer.py SKX
|
||||
- name: [sete, setne, setg, setge, seta, setae]
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 1
|
||||
port_pressure: [[1, '06']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: SETB # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
@@ -8904,6 +8965,16 @@ instruction_forms:
|
||||
port_pressure: [[1, '0156'], [2, '06'], [1, '23'], [1, '237'], [1, '4']] # model_importer.py SKX
|
||||
throughput: 1.25 # model_importer.py SKX
|
||||
uops: 6 # model_importer.py SKX
|
||||
- name: SBB
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 2
|
||||
port_pressure: [[1, '06']]
|
||||
throughput: 0.5
|
||||
uops: 2
|
||||
- name: SBB # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: immediate # model_importer.py SKX
|
||||
@@ -9791,7 +9862,7 @@ instruction_forms:
|
||||
port_pressure: [[1, '0156']] # model_importer.py SKX
|
||||
throughput: 0.25 # model_importer.py SKX
|
||||
uops: 10 # model_importer.py SKX
|
||||
- name: CDQ # model_importer.py SKX
|
||||
- name: [CDQ, CLTD] # model_importer.py SKX
|
||||
operands: [] # model_importer.py SKX
|
||||
latency: 1 # model_importer.py SKX
|
||||
port_pressure: [[1, '06']] # model_importer.py SKX
|
||||
@@ -9839,10 +9910,22 @@ instruction_forms:
|
||||
scale: '*' # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
latency: ~ # model_importer.py SKX
|
||||
latency: 3 # model_importer.py SKX
|
||||
port_pressure: [[1, '1'], [1, '23'], [1, [2D, 3D]]] # model_importer.py SKX
|
||||
throughput: 1.0 # model_importer.py SKX
|
||||
uops: 2 # model_importer.py SKX
|
||||
- name: IMUL
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 3
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.0
|
||||
uops: 1
|
||||
- name: IMUL # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
@@ -10077,6 +10160,30 @@ instruction_forms:
|
||||
port_pressure: [[1, '0156'], [1, '1'], [1, '23'], [1, '237'], [1, '4']] # model_importer.py SKX
|
||||
throughput: 1.25 # model_importer.py SKX
|
||||
uops: 5 # model_importer.py SKX
|
||||
- name: [SHLD, SHRD]
|
||||
operands:
|
||||
- class: register #CL
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 5
|
||||
port_pressure: [[1, '0156'], [2, '06'], [1, '1']]
|
||||
throughput: 1.00
|
||||
uops: 4
|
||||
- name: [SHLD, SHRD]
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 5
|
||||
port_pressure: [[1, '1']]
|
||||
throughput: 1.00
|
||||
uops: 1
|
||||
- name: SHLD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
@@ -10140,7 +10247,17 @@ instruction_forms:
|
||||
latency: ~ # model_importer.py SKX
|
||||
port_pressure: [[1, '0156'], [1, '23'], [1, '237'], [1, '4']] # model_importer.py SKX
|
||||
throughput: 1.0 # model_importer.py SKX
|
||||
uops: 4 # model_importer.py SKX
|
||||
uops: 4
|
||||
- name: OR
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
latency: 1
|
||||
port_pressure: [[1, '0156']]
|
||||
throughput: 0.25
|
||||
uops: 1
|
||||
- name: OR # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: immediate # model_importer.py SKX
|
||||
@@ -10664,6 +10781,29 @@ instruction_forms:
|
||||
port_pressure: [[1, '015']] # model_importer.py SKX
|
||||
throughput: 0.3333333333333333 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: [CMPEQSS, CMPLTSS, CMPLESS, CMPUNORDSS, CMPNEQSS, CMPNLTSS, CMPNLESS, CMPORDSS] # pseudo op to CMPSS
|
||||
operands:
|
||||
- class: memory
|
||||
base: '*'
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 4
|
||||
port_pressure: [[1, '015'], [1, '23'], [1, [2D, 3D]]]
|
||||
throughput: 0.5
|
||||
uops: 2
|
||||
- name: [CMPEQSS, CMPLTSS, CMPLESS, CMPUNORDSS, CMPNEQSS, CMPNLTSS, CMPNLESS, CMPORDSS] # pseudo op to CMPSS
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 4
|
||||
port_pressure: [[1, '015']]
|
||||
throughput: 0.3333333333333333
|
||||
uops: 1
|
||||
- name: FXSAVE64 # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
@@ -12144,6 +12284,30 @@ instruction_forms:
|
||||
port_pressure: [[1, '015']] # model_importer.py SKX
|
||||
throughput: 0.3333333333333333 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: [CMPEQSD, CMPLTSD, CMPLESD, CMPUNORDSD, CMPNEQSD, CMPNLTSD, CMPNLESD, CMPORDSD] # pseudo op to CMPSD
|
||||
operands:
|
||||
- class: memory
|
||||
base: '*'
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 4
|
||||
port_pressure: [[1, '015'], [1, '23'], [1, [2D, 3D]]]
|
||||
throughput: 0.5
|
||||
uops: 2
|
||||
- name: [CMPEQSD, CMPLTSD, CMPLESD, CMPUNORDSD, CMPNEQSD, CMPNLTSD, CMPNLESD, CMPORDSD] # pseudo op to CMPSD
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 4
|
||||
port_pressure: [[1, '015']]
|
||||
throughput: 0.3333333333333333
|
||||
uops: 1
|
||||
|
||||
- name: PMULHUW # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
@@ -15616,7 +15780,7 @@ instruction_forms:
|
||||
port_pressure: [[2, '0'], [6, '0156'], [4, '06'], [1, '23'], [1, '237'], [1, '4'], [4, '5']] # model_importer.py SKX
|
||||
throughput: 5.5 # model_importer.py SKX
|
||||
uops: 23 # model_importer.py SKX
|
||||
- name: CQO # model_importer.py SKX
|
||||
- name: [CQO, CQTO] # model_importer.py SKX
|
||||
operands: [] # model_importer.py SKX
|
||||
latency: 1 # model_importer.py SKX
|
||||
port_pressure: [[1, '06']] # model_importer.py SKX
|
||||
@@ -19074,6 +19238,18 @@ instruction_forms:
|
||||
port_pressure: [[1, '01']] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: [VCMPEQSS, VCMPGESS, VCMPLTSS, VCMPLESS, VCMPUNORDSS, VCMPNEQSS, VCMPNLTSS, VCMPNLESS, VCMPORDSS] # pseudo op to VCMPSS # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
latency: 4 # model_importer.py SKX
|
||||
port_pressure: [[1, '01']] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: VPSHUFLW # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: immediate # model_importer.py SKX
|
||||
@@ -19155,6 +19331,18 @@ instruction_forms:
|
||||
port_pressure: [[1, '01']] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: [VCMPEQSD, VCMPGESD, VCMPLTSD, VCMPLESD, VCMPUNORDSD, VCMPNEQSD, VCMPNLTSD, VCMPNLESD, VCMPORDSD] # pseudo op to VCMPSD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
latency: 4 # model_importer.py SKX
|
||||
port_pressure: [[1, '01']] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: VPSLLQ # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
@@ -20313,6 +20501,18 @@ instruction_forms:
|
||||
port_pressure: [[1, '01']] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: [VCMPEQPS, VCMPGEPS, VCMPLTPS, VCMPLEPS, VCMPUNORDPS, VCMPNEQPS, VCMPNLTPS, VCMPNLEPS, VCMPORDPS] # pseudo op to VCMPPS # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
latency: 4 # model_importer.py SKX
|
||||
port_pressure: [[1, '01']] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: VCMPPS # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: immediate # model_importer.py SKX
|
||||
@@ -57595,6 +57795,18 @@ instruction_forms:
|
||||
port_pressure: [[1, '01'], [1, '23'], [1, [2D, 3D]]] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 2 # model_importer.py SKX
|
||||
- name: VPSRLD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: immediate # model_importer.py SKX
|
||||
imd: int # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
latency: 1 # model_importer.py SKX
|
||||
port_pressure: [[1, '01']] # model_importer.py SKX
|
||||
throughput: 0.5 # model_importer.py SKX
|
||||
uops: 1 # model_importer.py SKX
|
||||
- name: VPSRLD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: immediate # model_importer.py SKX
|
||||
@@ -63257,8 +63469,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: zmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -63272,8 +63482,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -63287,8 +63495,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: ymm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -63577,8 +63783,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: ymm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -63592,8 +63796,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -67558,8 +67760,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: zmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -67573,8 +67773,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -67588,8 +67786,6 @@ instruction_forms:
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: ymm # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
@@ -68665,8 +68861,6 @@ instruction_forms:
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: zmm # model_importer.py SKX
|
||||
latency: 6 # model_importer.py SKX
|
||||
@@ -68680,8 +68874,6 @@ instruction_forms:
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
latency: 3 # model_importer.py SKX
|
||||
@@ -68695,8 +68887,6 @@ instruction_forms:
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: gpr # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: ymm # model_importer.py SKX
|
||||
latency: 4 # model_importer.py SKX
|
||||
|
||||
@@ -88,7 +88,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
||||
|
||||
comment = None
|
||||
if load:
|
||||
if 'ymm' in operand_types:
|
||||
if "ymm" in operand_types:
|
||||
port2D3D_pressure = 2
|
||||
else:
|
||||
port2D3D_pressure = 1
|
||||
@@ -96,7 +96,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
||||
latency += 4
|
||||
comment = "with load"
|
||||
if store:
|
||||
if 'ymm' in operand_types:
|
||||
if "ymm" in operand_types:
|
||||
port4_pressure = 2
|
||||
else:
|
||||
port4_pressure = 1
|
||||
@@ -716,14 +716,14 @@ skx_mov_instructions = list(
|
||||
# ('movapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd ymm ymm', ('1*p5', 1)),
|
||||
('vmovapd zmm zmm', ('', 0)),
|
||||
("vmovapd zmm zmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
# TODO with masking!
|
||||
# TODO the following may eliminate or be bound to 1*p0156:
|
||||
# ('movaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps ymm ymm', ('1*p5', 1)),
|
||||
('vmovaps zmm zmm', ('', 0)),
|
||||
("vmovaps zmm zmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movbe
|
||||
("movbe gpr mem", ("1*p15", 4)),
|
||||
("movbe mem gpr", ("1*p15", 4)),
|
||||
|
||||
@@ -62,6 +62,28 @@ instruction_forms:
|
||||
imd: int
|
||||
source: false
|
||||
destination: false
|
||||
- name: fmla
|
||||
operands:
|
||||
- class: register
|
||||
prefix: "*"
|
||||
shape: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: "*"
|
||||
shape: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: "*"
|
||||
shape: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: "*"
|
||||
shape: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- name: fmla
|
||||
operands:
|
||||
- class: register
|
||||
|
||||
@@ -212,6 +212,67 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: not
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: not
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: and
|
||||
operands:
|
||||
- class: "immediate"
|
||||
@@ -422,6 +483,17 @@ instruction_forms:
|
||||
name: "eax"
|
||||
source: false
|
||||
destination: true
|
||||
- name: [cqo, cqto]
|
||||
operands: []
|
||||
hidden_operands:
|
||||
- class: "register"
|
||||
name: "rax"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "rdx"
|
||||
source: false
|
||||
destination: true
|
||||
- name: [cltq, cdqe]
|
||||
operands: []
|
||||
hidden_operands:
|
||||
@@ -2639,7 +2711,7 @@ instruction_forms:
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pxor]
|
||||
breaks_pedendency_on_equal_operands: true
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
@@ -2660,7 +2732,7 @@ instruction_forms:
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pxor]
|
||||
breaks_pedendency_on_equal_operands: true
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
@@ -3321,6 +3393,42 @@ instruction_forms:
|
||||
name: "ID"
|
||||
source: true
|
||||
destination: false
|
||||
- name: sbb
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: true
|
||||
destination: true
|
||||
operation: "op2['value'] -= (op1['value'])" # + CF['value'])" TODO
|
||||
- name: sub
|
||||
operands:
|
||||
- class: "immediate"
|
||||
@@ -3358,7 +3466,7 @@ instruction_forms:
|
||||
destination: true
|
||||
operation: "op2['value'] -= op1['value']"
|
||||
- name: sub
|
||||
breaks_pedendency_on_equal_operands: true
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
@@ -3846,7 +3954,7 @@ instruction_forms:
|
||||
source: false
|
||||
destination: true
|
||||
- name: vzeroall
|
||||
breaks_pedendency_on_equal_operands: true
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands: []
|
||||
hidden_operands:
|
||||
- class: "register"
|
||||
@@ -4003,7 +4111,7 @@ instruction_forms:
|
||||
source: true
|
||||
destination: true
|
||||
- name: [xorps, xorpd]
|
||||
breaks_pedendency_on_equal_operands: true
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
@@ -4024,7 +4132,7 @@ instruction_forms:
|
||||
source: true
|
||||
destination: true
|
||||
- name: [vxorpd, vxorps]
|
||||
breaks_pedendency_on_equal_operands: true
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "*"
|
||||
@@ -4039,7 +4147,56 @@ instruction_forms:
|
||||
source: false
|
||||
destination: true
|
||||
- name: xor
|
||||
breaks_pedendency_on_equal_operands: true
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- name: xor
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: xor
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: xor
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- name: xor
|
||||
breaks_dependency_on_equal_operands: true
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
|
||||
@@ -140,9 +140,11 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
||||
continue
|
||||
# skip if measured TP is smaller than computed
|
||||
if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
|
||||
float(x.attrib["TP_unrolled"]))
|
||||
for x in arch_tag.findall("measurement")][0]:
|
||||
if [
|
||||
float(x.attrib["TP_ports"])
|
||||
> min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
|
||||
for x in arch_tag.findall("measurement")
|
||||
][0]:
|
||||
print(
|
||||
"Calculated TP is greater than measured TP.",
|
||||
iform,
|
||||
@@ -160,13 +162,15 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
throughput = float(measurement_tag.attrib["TP_ports"])
|
||||
else:
|
||||
throughput = min(
|
||||
measurement_tag.attrib.get("TP_loop", float('inf')),
|
||||
measurement_tag.attrib.get("TP_unroll", float('inf')),
|
||||
measurement_tag.attrib.get("TP", float('inf')),
|
||||
measurement_tag.attrib.get("TP_loop", float("inf")),
|
||||
measurement_tag.attrib.get("TP_unroll", float("inf")),
|
||||
measurement_tag.attrib.get("TP", float("inf")),
|
||||
)
|
||||
if throughput == float('inf'):
|
||||
if throughput == float("inf"):
|
||||
throughput = None
|
||||
uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
||||
uops = (
|
||||
int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
||||
)
|
||||
if "ports" in measurement_tag.attrib:
|
||||
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
||||
latencies = [
|
||||
@@ -202,7 +206,11 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
# Check if all are equal
|
||||
if port_pressure:
|
||||
if port_pressure[1:] != port_pressure[:-1]:
|
||||
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
|
||||
print(
|
||||
"Contradicting port occupancies, using latest IACA:",
|
||||
iform,
|
||||
file=sys.stderr,
|
||||
)
|
||||
port_pressure = port_pressure[-1]
|
||||
else:
|
||||
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
||||
@@ -222,10 +230,12 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
port_4 = True
|
||||
# Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
||||
if port_23 and not port_4:
|
||||
if arch.upper() in ["SNB", "IVB"] and any(
|
||||
[p.get('name', '') == 'ymm' for p in parameters]) and \
|
||||
not '128' in mnemonic:
|
||||
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
||||
if (
|
||||
arch.upper() in ["SNB", "IVB"]
|
||||
and any([p.get("name", "") == "ymm" for p in parameters])
|
||||
and not ("128" in mnemonic)
|
||||
):
|
||||
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
||||
# instruction name
|
||||
port2D3D_pressure = 2
|
||||
else:
|
||||
|
||||
321
osaca/data/pmevo_importer.py
Executable file
321
osaca/data/pmevo_importer.py
Executable file
@@ -0,0 +1,321 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
|
||||
from asmbench import bench, op
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
def build_bench_instruction(name, operands):
|
||||
# Converts an OSACA model instruction to an asmbench one.
|
||||
# Returns `None` in case something went wrong.
|
||||
asmbench_inst = name
|
||||
direction = "dst"
|
||||
separator = " "
|
||||
shift = ""
|
||||
for operand in operands:
|
||||
if operand["class"] == "register" or operand["class"] == "register_shift":
|
||||
if operand["prefix"] == "x":
|
||||
shape = "i64"
|
||||
constraint = "r"
|
||||
elif operand["prefix"] == "s":
|
||||
shape = "float"
|
||||
constraint = "w"
|
||||
elif operand["prefix"] == "d":
|
||||
shape = "double"
|
||||
constraint = "w"
|
||||
elif operand["prefix"] == "v":
|
||||
constraint = "w"
|
||||
if operand["shape"] == "b":
|
||||
shape = "<16 x i8>"
|
||||
elif operand["shape"] == "h":
|
||||
shape = "<8 x i16>"
|
||||
elif operand["shape"] == "s":
|
||||
shape = "<4 x float>"
|
||||
elif operand["shape"] == "d":
|
||||
shape = "<2 x double>"
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
if operand["class"] == "register_shift":
|
||||
shift = ", {}".format(operand["shift_op"])
|
||||
if operand["shift"] is not None:
|
||||
shift += " {}".format(operand["shift"])
|
||||
elif operand["class"] == "immediate" or operand["class"] == "immediate_shift":
|
||||
shape = "i32"
|
||||
# Different instructions have different ranges for literaly,
|
||||
# so need to pick something "reasonable" for each.
|
||||
if name in [
|
||||
"cmeq",
|
||||
"cmge",
|
||||
"cmgt",
|
||||
"cmle",
|
||||
"cmlt",
|
||||
"fcmeq",
|
||||
"fcmge",
|
||||
"fcmgt",
|
||||
"fcmle",
|
||||
"fcmlt",
|
||||
"fcmp",
|
||||
]:
|
||||
constraint = "0"
|
||||
elif name in ["and", "ands", "eor", "eors", "orr", "orrs"]:
|
||||
constraint = "255"
|
||||
elif name in ["bfi", "extr", "sbfiz", "sbfx", "shl", "sshr", "ubfiz", "ubfx", "ushr"]:
|
||||
constraint = "7"
|
||||
else:
|
||||
constraint = "42"
|
||||
if operand["class"] == "immediate_shift":
|
||||
shift = ", {}".format(operand["shift_op"])
|
||||
if operand["shift"] is not None:
|
||||
shift += " {}".format(operand["shift"])
|
||||
else:
|
||||
return None
|
||||
asmbench_inst += "{}{{{}:{}:{}}}{}".format(separator, direction, shape, constraint, shift)
|
||||
direction = "src"
|
||||
separator = ", "
|
||||
return asmbench_inst
|
||||
|
||||
|
||||
def bench_instruction(name, operands):
|
||||
# Converts an OSACA model instruction to an asmbench one and benchmarks it.
|
||||
# Returned tuple may contain a `None` in case something went wrong.
|
||||
asmbench_inst = build_bench_instruction(name, operands)
|
||||
if asmbench_inst is None:
|
||||
return (None, None)
|
||||
return bench.bench_instructions([op.Instruction.from_string(asmbench_inst)])
|
||||
|
||||
|
||||
def round_cycles(value):
|
||||
if value < 0.9:
|
||||
# Frequently found, so we might want to include them.
|
||||
# Measurements over-estimate a lot here, hence the high bound.
|
||||
return 0.5
|
||||
else:
|
||||
# Measurements usually over-estimate, so usually round down,
|
||||
# but still allow slightly smaller values.
|
||||
return float(math.floor(value + 0.1))
|
||||
|
||||
|
||||
def operand_parse(op, state):
|
||||
# Parses an operand from an PMEvo instruction and emits an OSACA model one.
|
||||
# State object is used to keep track of types for future operands, e.g. literals.
|
||||
# Future invocations may also modify previously returned objects.
|
||||
parameter = {}
|
||||
|
||||
if op.startswith("_((REG:"):
|
||||
parts = op.split(".")
|
||||
register = parts[0][7:-2]
|
||||
read_write, register_type, bits = register.split(":")
|
||||
|
||||
parameter["class"] = "register"
|
||||
if register_type == "G":
|
||||
if bits == "32":
|
||||
parameter["prefix"] = "r"
|
||||
elif bits == "64":
|
||||
parameter["prefix"] = "x"
|
||||
else:
|
||||
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
|
||||
elif register_type == "F":
|
||||
if bits == "32":
|
||||
parameter["prefix"] = "s"
|
||||
state["type"] = "float"
|
||||
elif bits == "64":
|
||||
parameter["prefix"] = "d"
|
||||
state["type"] = "double"
|
||||
elif bits == "128":
|
||||
parameter["prefix"] = "q"
|
||||
elif bits == "VEC":
|
||||
vec_shape = parts[1]
|
||||
parameter["prefix"] = "v"
|
||||
if vec_shape == "16b":
|
||||
parameter["shape"] = "b"
|
||||
elif vec_shape == "8h":
|
||||
parameter["shape"] = "h"
|
||||
elif vec_shape == "4s":
|
||||
parameter["shape"] = "s"
|
||||
state["type"] = "float"
|
||||
elif vec_shape == "2d":
|
||||
parameter["shape"] = "d"
|
||||
state["type"] = "double"
|
||||
else:
|
||||
raise ValueError("Invalid vector shape {}".format(vec_shape))
|
||||
else:
|
||||
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
|
||||
else:
|
||||
raise ValueError("Unknown register type {}".format(register_type))
|
||||
elif op.startswith("_[((MEM:"):
|
||||
bits = op[8:-2].split(":")[0]
|
||||
if bits == "64":
|
||||
state["memory_base"] = "x"
|
||||
else:
|
||||
raise ValueError("Invalid register bits for MEM {}".format(bits))
|
||||
return None
|
||||
elif op.startswith("_((MIMM:"):
|
||||
bits = op[8:-3].split(":")[0]
|
||||
if bits == "16":
|
||||
parameter["class"] = "memory"
|
||||
parameter["base"] = state["memory_base"]
|
||||
parameter["offset"] = "imd"
|
||||
parameter["index"] = "*"
|
||||
parameter["scale"] = "*"
|
||||
parameter["post-indexed"] = False
|
||||
parameter["pre-indexed"] = False
|
||||
else:
|
||||
raise ValueError("Invalid register bits for MEM {}".format(bits))
|
||||
elif re.fullmatch("_#?-?(0x)?[0-9a-f]+", op):
|
||||
parameter["class"] = "immediate"
|
||||
parameter["imd"] = "int"
|
||||
elif re.fullmatch("_#?-?[0-9]*\\.[0-9]*", op):
|
||||
parameter["class"] = "immediate"
|
||||
parameter["imd"] = state["type"]
|
||||
elif re.fullmatch("_((sxt|uxt)[bhw]|lsl|lsr|asr|rol|ror)(_[0-9]+)?", op):
|
||||
# split = op[1:].split('_')
|
||||
# shift_op = split[0]
|
||||
# shift = None
|
||||
# if len(split) >= 2:
|
||||
# shift = split[1]
|
||||
# state['previous']['class'] += '_shift'
|
||||
# state['previous']['shift_op'] = shift_op
|
||||
# if shift != None:
|
||||
# state['previous']['shift'] = shift
|
||||
# return None
|
||||
raise ValueError("Skipping instruction with shift operand: {}".format(op))
|
||||
else:
|
||||
raise ValueError("Unknown operand {}".format(op))
|
||||
|
||||
state["previous"] = parameter
|
||||
return parameter
|
||||
|
||||
|
||||
def port_convert(ports):
|
||||
# Try to merge repeated entries together and emit in OSACA's format.
|
||||
# FIXME: This does not handle having more than 10 ports.
|
||||
pressures = []
|
||||
previous = None
|
||||
cycles = 0
|
||||
|
||||
for entry in ports:
|
||||
possible_ports = "".join(entry)
|
||||
|
||||
if possible_ports != previous:
|
||||
if previous is not None:
|
||||
pressures.append([cycles, previous])
|
||||
previous = possible_ports
|
||||
cycles = 0
|
||||
|
||||
cycles += 1
|
||||
|
||||
if previous is not None:
|
||||
pressures.append([cycles, previous])
|
||||
|
||||
return pressures
|
||||
|
||||
|
||||
def throughput_guess(ports):
|
||||
# Minimum amount of possible ports per cycle should determine throughput
|
||||
# to some degree of accuracy. (THIS IS *NOT* ALWAYS TRUE!)
|
||||
bottleneck_ports = min(map(lambda it: len(it), ports))
|
||||
return float(len(ports)) / bottleneck_ports
|
||||
|
||||
|
||||
def latency_guess(ports):
|
||||
# Each entry in the ports array equates to one cycle on any of the ports.
|
||||
# So this is about as good as it is going to get.
|
||||
return float(len(ports))
|
||||
|
||||
|
||||
def extract_model(mapping, arch, template_model, asmbench):
|
||||
try:
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
except ValueError:
|
||||
print("Skipping...", file=sys.stderr)
|
||||
return None
|
||||
if template_model is None:
|
||||
mm = MachineModel(isa=isa)
|
||||
else:
|
||||
mm = template_model
|
||||
|
||||
for port in mapping["arch"]["ports"]:
|
||||
mm.add_port(port)
|
||||
|
||||
for insn in mapping["arch"]["insns"]:
|
||||
try:
|
||||
ports = mapping["assignment"][insn]
|
||||
|
||||
# Parse instruction
|
||||
insn_split = insn.split("_")
|
||||
name = insn_split[1]
|
||||
insn_parts = list(("_" + "_".join(insn_split[2:])).split(","))
|
||||
operands = []
|
||||
state = {}
|
||||
for operand in insn_parts:
|
||||
parsed = operand_parse(operand, state)
|
||||
if parsed is not None:
|
||||
operands.append(parsed)
|
||||
|
||||
# Port pressures from mapping
|
||||
port_pressure = port_convert(ports)
|
||||
|
||||
# Initial guessed throughput and latency
|
||||
throughput = throughput_guess(ports)
|
||||
latency = latency_guess(ports)
|
||||
|
||||
# Benchmark with asmbench
|
||||
# print(build_bench_instruction(name, operands))
|
||||
if asmbench:
|
||||
bench_latency, bench_throughput = bench_instruction(name, operands)
|
||||
if bench_throughput is not None:
|
||||
throughput = round_cycles(bench_throughput)
|
||||
else:
|
||||
print("Failed to measure throughput for instruction {}.".format(insn))
|
||||
if bench_latency is not None:
|
||||
latency = round_cycles(bench_latency)
|
||||
else:
|
||||
print("Failed to measure latency for instruction {}.".format(insn))
|
||||
|
||||
# No u-ops data available
|
||||
uops = None
|
||||
|
||||
# Insert instruction if not already found (can happen with template)
|
||||
if mm.get_instruction(name, operands) is None:
|
||||
mm.set_instruction(name, operands, latency, port_pressure, throughput, uops)
|
||||
except ValueError as e:
|
||||
print("Failed to parse instruction {}: {}.".format(insn, e))
|
||||
|
||||
return mm
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("json", help="path of mapping.json")
|
||||
parser.add_argument("yaml", help="path of template.yml", nargs="?")
|
||||
parser.add_argument(
|
||||
"--asmbench", help="Benchmark latency and throughput using asmbench.", action="store_true"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
json_file = open(args.json, "r")
|
||||
mapping = json.load(json_file)
|
||||
arch = mapping["arch"]["name"].lower()
|
||||
json_file.close()
|
||||
|
||||
template_model = None
|
||||
if args.yaml is not None:
|
||||
template_model = MachineModel(path_to_yaml=args.yaml)
|
||||
|
||||
if args.asmbench:
|
||||
bench.setup_llvm()
|
||||
|
||||
model = extract_model(mapping, arch, template_model, args.asmbench)
|
||||
|
||||
with open("{}.yml".format(arch.lower()), "w") as f:
|
||||
f.write(model.dump())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -40487,5 +40487,82 @@ instruction_forms:
|
||||
port_pressure: [[4, '0156'], [1, '06']] # uops.info import
|
||||
throughput: 1.5 # uops.info import
|
||||
uops: 51 # uops.info import
|
||||
# uops.info import
|
||||
- name: VPSCATTERDD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: zmm # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
latency: 2 # model_importer.py SKX
|
||||
port_pressure: [[1, '0'], [2, '0156'], [16, '23'], [16, '4'], [1, '5']] # model_importer.py SKX
|
||||
throughput: 16.0 # model_importer.py SKX
|
||||
uops: 42 # model_importer.py SKX
|
||||
- name: VPSCATTERDD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
latency: 3 # model_importer.py SKX
|
||||
port_pressure: [[1, '0'], [2, '0156'], [4, '23'], [4, '4'], [1, '5']] # model_importer.py SKX
|
||||
throughput: 4.0 # model_importer.py SKX
|
||||
uops: 18 # model_importer.py SKX
|
||||
- name: VPSCATTERDD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: ymm # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
latency: 1 # model_importer.py SKX
|
||||
port_pressure: [[1, '0'], [2, '0156'], [8, '23'], [8, '4'], [1, '5']] # model_importer.py SKX
|
||||
throughput: 8.0 # model_importer.py SKX
|
||||
uops: 26 # model_importer.py SKX
|
||||
- name: VPGATHERDD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: zmm # model_importer.py SKX
|
||||
latency: 6 # model_importer.py SKX
|
||||
port_pressure: [[1, '0'], [1, '0156'], [1, '05'], [16, '23'], [1, [2D, 3D]]] # model_importer.py SKX
|
||||
throughput: 8.0 # model_importer.py SKX
|
||||
uops: 4 # model_importer.py SKX
|
||||
- name: VPGATHERDD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: xmm # model_importer.py SKX
|
||||
latency: 3 # model_importer.py SKX
|
||||
port_pressure: [[1, '0'], [4, '23'], [1, '5'], [1, [2D, 3D]]] # model_importer.py SKX
|
||||
throughput: 2.0 # model_importer.py SKX
|
||||
uops: 4 # model_importer.py SKX
|
||||
- name: VPGATHERDD # model_importer.py SKX
|
||||
operands: # model_importer.py SKX
|
||||
- class: memory # model_importer.py SKX
|
||||
base: '*' # model_importer.py SKX
|
||||
offset: '*' # model_importer.py SKX
|
||||
index: '*' # model_importer.py SKX
|
||||
scale: '*' # model_importer.py SKX
|
||||
- class: register # model_importer.py SKX
|
||||
name: ymm # model_importer.py SKX
|
||||
latency: 4 # model_importer.py SKX
|
||||
port_pressure: [[1, '0'], [1, '015'], [1, '0156'], [8, '23'], [1, [2D, 3D]]] # model_importer.py SKX
|
||||
throughput: 4.0 # model_importer.py SKX
|
||||
uops: 4 # model_importer.py SKX
|
||||
|
||||
|
||||
@@ -125,7 +125,10 @@ def _get_asmbench_output(input_data, isa):
|
||||
db_entries = {}
|
||||
for i in range(0, len(input_data), 4):
|
||||
if input_data[i + 3].strip() != "":
|
||||
print("asmbench output not in the correct format! Format must be: ", file=sys.stderr)
|
||||
print(
|
||||
"asmbench output not in the correct format! Format must be: ",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n"
|
||||
"Throughput: Y cycles\n\n-------------",
|
||||
@@ -540,7 +543,16 @@ def _get_sanity_report(
|
||||
|
||||
|
||||
def _get_sanity_report_verbose(
|
||||
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, bad_operands, colors=False
|
||||
total,
|
||||
m_tp,
|
||||
m_l,
|
||||
m_pp,
|
||||
suspic_instr,
|
||||
dup_arch,
|
||||
dup_isa,
|
||||
only_isa,
|
||||
bad_operands,
|
||||
colors=False,
|
||||
):
|
||||
"""Get the verbose part of the sanity report with all missing instruction forms."""
|
||||
BRIGHT_CYAN = "\033[1;36;1m" if colors else ""
|
||||
|
||||
@@ -163,6 +163,7 @@ class Frontend(object):
|
||||
ignore_unknown=False,
|
||||
arch_warning=False,
|
||||
length_warning=False,
|
||||
lcd_warning=False,
|
||||
verbose=False,
|
||||
):
|
||||
"""
|
||||
@@ -176,17 +177,19 @@ class Frontend(object):
|
||||
:param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
|
||||
`False`
|
||||
:type ignore_unknown: boolean, optional
|
||||
:param print_arch_warning: flag for additional user warning to specify micro-arch
|
||||
:type print_arch_warning: boolean, optional
|
||||
:param print_length_warning: flag for additional user warning to specify kernel length with
|
||||
:param arch_warning: flag for additional user warning to specify micro-arch
|
||||
:type arch_warning: boolean, optional
|
||||
:param length_warning: flag for additional user warning to specify kernel length with
|
||||
--lines
|
||||
:type print_length_warning: boolean, optional
|
||||
:type length_warning: boolean, optional
|
||||
:param lcd_warning: flag for additional user warning due to LCD analysis timed out
|
||||
:type lcd_warning: boolean, optional
|
||||
:param verbose: flag for verbosity level, defaults to False
|
||||
:type verbose: boolean, optional
|
||||
"""
|
||||
return (
|
||||
self._header_report()
|
||||
+ self._user_warnings(arch_warning, length_warning)
|
||||
+ self._user_warnings_header(arch_warning, length_warning)
|
||||
+ self._symbol_map()
|
||||
+ self.combined_view(
|
||||
kernel,
|
||||
@@ -194,11 +197,17 @@ class Frontend(object):
|
||||
kernel_dg.get_loopcarried_dependencies(),
|
||||
ignore_unknown,
|
||||
)
|
||||
+ self._user_warnings_footer(lcd_warning)
|
||||
+ self.loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())
|
||||
)
|
||||
|
||||
def combined_view(
|
||||
self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True
|
||||
self,
|
||||
kernel,
|
||||
cp_kernel: KernelDG,
|
||||
dep_dict,
|
||||
ignore_unknown=False,
|
||||
show_cmnts=True,
|
||||
):
|
||||
"""
|
||||
Build combined view of kernel including port pressure (TP), a CP column and a
|
||||
@@ -225,7 +234,7 @@ class Frontend(object):
|
||||
separator += "--" + len(str(kernel[-1]["line_number"])) * "-"
|
||||
col_sep = "|"
|
||||
# for LCD/CP column
|
||||
separator += "-" * (2 * 6 + len(col_sep)) + "-" * len(col_sep)
|
||||
separator += "-" * (2 * 6 + len(col_sep)) + "-" * len(col_sep) + "--"
|
||||
sep_list = self._get_separator_list(col_sep)
|
||||
headline = "Port pressure in cycles"
|
||||
headline_str = "{{:^{}}}".format(len(separator))
|
||||
@@ -234,22 +243,20 @@ class Frontend(object):
|
||||
lcd_sum = 0.0
|
||||
lcd_lines = {}
|
||||
if dep_dict:
|
||||
longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
|
||||
lcd_sum = dep_dict[longest_lcd]['latency']
|
||||
lcd_lines = {instr["line_number"]: lat
|
||||
for instr, lat in dep_dict[longest_lcd]["dependencies"]}
|
||||
longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
|
||||
lcd_sum = dep_dict[longest_lcd]["latency"]
|
||||
lcd_lines = {
|
||||
instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
|
||||
}
|
||||
|
||||
s += headline_str.format(headline) + "\n"
|
||||
s += (
|
||||
(
|
||||
lineno_filler
|
||||
+ self._get_port_number_line(port_len, separator=col_sep)
|
||||
+ "{}{:^6}{}{:^6}{}".format(col_sep, "CP", col_sep, "LCD", col_sep)
|
||||
)
|
||||
+ "\n"
|
||||
+ separator
|
||||
+ "\n"
|
||||
port_line = (
|
||||
lineno_filler
|
||||
+ self._get_port_number_line(port_len, separator=col_sep)
|
||||
+ "{}{:^6}{}{:^6}{}".format(col_sep, "CP", col_sep, "LCD", col_sep)
|
||||
)
|
||||
separator = "-" * len(port_line)
|
||||
s += headline_str.format(headline) + "\n"
|
||||
s += port_line + "\n" + separator + "\n"
|
||||
for instruction_form in kernel:
|
||||
if show_cmnts is False and self._is_comment(instruction_form):
|
||||
continue
|
||||
@@ -290,7 +297,7 @@ class Frontend(object):
|
||||
s += (
|
||||
lineno_filler
|
||||
+ self._get_port_pressure(tp_sum, port_len, separator=" ")
|
||||
+ " {:^6} {:^6}\n".format(cp_sum, lcd_sum)
|
||||
+ " {:>5} {:>5} \n".format(cp_sum, lcd_sum)
|
||||
)
|
||||
return s
|
||||
|
||||
@@ -311,18 +318,24 @@ class Frontend(object):
|
||||
).format(amount, "-" * len(str(amount)))
|
||||
return s
|
||||
|
||||
def _user_warnings(self, arch_warning, length_warning):
|
||||
def _user_warnings_header(self, arch_warning, length_warning):
|
||||
"""Returns warning texts for giving the user more insight in what he is doing."""
|
||||
dashed_line = (
|
||||
"-------------------------------------------------------------------------"
|
||||
"------------------------\n"
|
||||
)
|
||||
arch_text = (
|
||||
"WARNING: No micro-architecture was specified and a default uarch was used.\n"
|
||||
" Specify the uarch with --arch. See --help for more information.\n"
|
||||
"-------------------------- WARNING: No micro-architecture was specified "
|
||||
"-------------------------\n"
|
||||
" A default uarch for this particular ISA was used. Specify "
|
||||
"the uarch with --arch.\n See --help for more information.\n" + dashed_line
|
||||
)
|
||||
length_text = (
|
||||
"WARNING: You are analyzing a large amount of instruction forms. Analysis "
|
||||
"across loops/block boundaries often do not make much sense.\n"
|
||||
" Specify the kernel length with --length. See --help for more "
|
||||
"information.\n"
|
||||
" If this is intentional, you can safely ignore this message.\n"
|
||||
"----------------- WARNING: You are analyzing a large amount of instruction forms "
|
||||
"----------------\n Analysis across loops/block boundaries often do not make"
|
||||
" much sense.\n Specify the kernel length with --length. See --help for more "
|
||||
"information.\n If this is intentional, you can safely ignore this message.\n"
|
||||
+ dashed_line
|
||||
)
|
||||
|
||||
warnings = ""
|
||||
@@ -331,6 +344,24 @@ class Frontend(object):
|
||||
warnings += "\n"
|
||||
return warnings
|
||||
|
||||
def _user_warnings_footer(self, lcd_warning):
|
||||
"""Returns warning texts for giving the user more insight in what he is doing."""
|
||||
dashed_line = (
|
||||
"-------------------------------------------------------------------------"
|
||||
"------------------------\n"
|
||||
)
|
||||
lcd_text = (
|
||||
"-------------------------------- WARNING: LCD analysis timed out "
|
||||
"-------------------------------\n While searching for all dependency chains"
|
||||
" the analysis timed out and might be\n incomplete. Decrease the number of "
|
||||
"instructions or set the timeout threshold\n with --lcd-timeout. See --help"
|
||||
" for more information.\n" + dashed_line
|
||||
)
|
||||
warnings = "\n"
|
||||
warnings += lcd_text if lcd_warning else ""
|
||||
warnings += "\n"
|
||||
return warnings
|
||||
|
||||
def _get_separator_list(self, separator, separator_2=" "):
|
||||
"""Creates column view for seperators in the TP/combined view."""
|
||||
separator_list = []
|
||||
|
||||
@@ -10,7 +10,13 @@ from functools import lru_cache
|
||||
from osaca.db_interface import import_benchmark_output, sanity_check
|
||||
from osaca.frontend import Frontend
|
||||
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
|
||||
from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section
|
||||
from osaca.semantics import (
|
||||
INSTR_FLAGS,
|
||||
ArchSemantics,
|
||||
KernelDG,
|
||||
MachineModel,
|
||||
reduce_to_section,
|
||||
)
|
||||
|
||||
|
||||
SUPPORTED_ARCHS = [
|
||||
@@ -26,6 +32,7 @@ SUPPORTED_ARCHS = [
|
||||
"TX2",
|
||||
"N1",
|
||||
"A64FX",
|
||||
"A72",
|
||||
]
|
||||
DEFAULT_ARCHS = {
|
||||
"aarch64": "A64FX",
|
||||
@@ -37,7 +44,8 @@ DEFAULT_ARCHS = {
|
||||
def __read(*names, **kwargs):
|
||||
"""Reads in file"""
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8"),
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
@@ -79,13 +87,16 @@ def create_parser(parser=None):
|
||||
|
||||
# Add arguments
|
||||
parser.add_argument(
|
||||
"-V", "--version", action="version", version="%(prog)s " + __find_version("__init__.py")
|
||||
"-V",
|
||||
"--version",
|
||||
action="version",
|
||||
version="%(prog)s " + __find_version("__init__.py"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--arch",
|
||||
type=str,
|
||||
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, N1, "
|
||||
"A64FX). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.",
|
||||
"A64FX, A72). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fixed",
|
||||
@@ -146,6 +157,16 @@ def create_parser(parser=None):
|
||||
action="store_true",
|
||||
help="Ignore if instructions cannot be found in the data file and print analysis anyway.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lcd-timeout",
|
||||
dest="lcd_timeout",
|
||||
metavar="SECONDS",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Set timeout in seconds for LCD analysis. After timeout, OSACA will continue"
|
||||
" its analysis with the dependency paths found up to this point. Defaults to 10."
|
||||
" Set to -1 for no timeout.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="count", default=0, help="Increases verbosity level."
|
||||
)
|
||||
@@ -157,7 +178,9 @@ def create_parser(parser=None):
|
||||
help="Write analysis to this file (default to stdout).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"file", type=argparse.FileType("r"), help="Path to object (ASM or instruction file)."
|
||||
"file",
|
||||
type=argparse.FileType("r"),
|
||||
help="Path to object (ASM or instruction file).",
|
||||
)
|
||||
|
||||
return parser
|
||||
@@ -172,6 +195,9 @@ def check_arguments(args, parser):
|
||||
"""
|
||||
supported_import_files = ["ibench", "asmbench"]
|
||||
|
||||
# manually set CLX to CSX to support both abbreviations
|
||||
if args.arch and args.arch.upper() == "CLX":
|
||||
args.arch = "CSX"
|
||||
if args.arch is None and (args.check_db or "import_data" in args):
|
||||
parser.error(
|
||||
"DB check and data import cannot work with a default microarchitecture. "
|
||||
@@ -303,7 +329,7 @@ def inspect(args, output_file=sys.stdout):
|
||||
semantics.assign_optimal_throughput(kernel)
|
||||
|
||||
# Create DiGrahps
|
||||
kernel_graph = KernelDG(kernel, parser, machine_model, semantics)
|
||||
kernel_graph = KernelDG(kernel, parser, machine_model, semantics, args.lcd_timeout)
|
||||
if args.dotpath is not None:
|
||||
kernel_graph.export_graph(args.dotpath if args.dotpath != "." else None)
|
||||
# Print analysis
|
||||
@@ -315,6 +341,7 @@ def inspect(args, output_file=sys.stdout):
|
||||
ignore_unknown=ignore_unknown,
|
||||
arch_warning=print_arch_warning,
|
||||
length_warning=print_length_warning,
|
||||
lcd_warning=kernel_graph.timed_out,
|
||||
verbose=verbose,
|
||||
),
|
||||
file=output_file,
|
||||
@@ -333,7 +360,10 @@ def run(args, output_file=sys.stdout):
|
||||
# Sanity check on DB
|
||||
verbose = True if args.verbose > 0 else False
|
||||
sanity_check(
|
||||
args.arch, verbose=verbose, internet_check=args.internet_check, output_file=output_file
|
||||
args.arch,
|
||||
verbose=verbose,
|
||||
internet_check=args.internet_check,
|
||||
output_file=output_file,
|
||||
)
|
||||
elif "import_data" in args:
|
||||
# Import microbench output file into DB
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
from copy import deepcopy
|
||||
import pyparsing as pp
|
||||
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
@@ -27,9 +26,9 @@ class ParserAArch64(BaseParser):
|
||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||
).setResultsName(self.COMMENT_ID)
|
||||
# Define ARM assembly identifier
|
||||
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
|
||||
"value"
|
||||
)
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
|
||||
).setResultsName("value")
|
||||
hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value")
|
||||
relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":"))
|
||||
first = pp.Word(pp.alphas + "_.", exact=1)
|
||||
@@ -153,7 +152,9 @@ class ParserAArch64(BaseParser):
|
||||
pp.Literal("{")
|
||||
+ (
|
||||
pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list")
|
||||
^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName("range")
|
||||
^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName(
|
||||
"range"
|
||||
)
|
||||
)
|
||||
+ pp.Literal("}")
|
||||
+ pp.Optional(index)
|
||||
@@ -292,8 +293,9 @@ class ParserAArch64(BaseParser):
|
||||
try:
|
||||
result = self.parse_instruction(line)
|
||||
except (pp.ParseException, KeyError) as e:
|
||||
raise e
|
||||
raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
|
||||
raise ValueError(
|
||||
"Unable to parse {!r} on line {}".format(line, line_number)
|
||||
) from e
|
||||
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
||||
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
||||
instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
|
||||
@@ -313,19 +315,24 @@ class ParserAArch64(BaseParser):
|
||||
# Add operands to list
|
||||
# Check first operand
|
||||
if "operand1" in result:
|
||||
operands.append(self.process_operand(result["operand1"]))
|
||||
operand = self.process_operand(result["operand1"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check second operand
|
||||
if "operand2" in result:
|
||||
operands.append(self.process_operand(result["operand2"]))
|
||||
operand = self.process_operand(result["operand2"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check third operand
|
||||
if "operand3" in result:
|
||||
operands.append(self.process_operand(result["operand3"]))
|
||||
operand = self.process_operand(result["operand3"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check fourth operand
|
||||
if "operand4" in result:
|
||||
operands.append(self.process_operand(result["operand4"]))
|
||||
operand = self.process_operand(result["operand4"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check fifth operand
|
||||
if "operand5" in result:
|
||||
operands.append(self.process_operand(result["operand5"]))
|
||||
operand = self.process_operand(result["operand5"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
|
||||
return_dict = AttrDict(
|
||||
{
|
||||
@@ -347,8 +354,8 @@ class ParserAArch64(BaseParser):
|
||||
if self.REGISTER_ID in operand and (
|
||||
"list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID]
|
||||
):
|
||||
# TODO: discuss if ranges should be converted to lists
|
||||
return self.process_register_list(operand[self.REGISTER_ID])
|
||||
# resolve ranges and lists
|
||||
return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID]))
|
||||
if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"] == "sp":
|
||||
return self.process_sp_register(operand[self.REGISTER_ID])
|
||||
# add value attribute to floating point immediates without exponent
|
||||
@@ -366,6 +373,8 @@ class ParserAArch64(BaseParser):
|
||||
offset = memory_address.get("offset", None)
|
||||
if isinstance(offset, list) and len(offset) == 1:
|
||||
offset = offset[0]
|
||||
if offset is not None and "value" in offset:
|
||||
offset["value"] = int(offset["value"], 0)
|
||||
base = memory_address.get("base", None)
|
||||
index = memory_address.get("index", None)
|
||||
scale = 1
|
||||
@@ -382,7 +391,12 @@ class ParserAArch64(BaseParser):
|
||||
if "pre_indexed" in memory_address:
|
||||
new_dict["pre_indexed"] = True
|
||||
if "post_indexed" in memory_address:
|
||||
new_dict["post_indexed"] = memory_address["post_indexed"]
|
||||
if "value" in memory_address["post_indexed"]:
|
||||
new_dict["post_indexed"] = {
|
||||
"value": int(memory_address["post_indexed"]["value"], 0)
|
||||
}
|
||||
else:
|
||||
new_dict["post_indexed"] = memory_address["post_indexed"]
|
||||
return AttrDict({self.MEMORY_ID: new_dict})
|
||||
|
||||
def process_sp_register(self, register):
|
||||
@@ -391,6 +405,37 @@ class ParserAArch64(BaseParser):
|
||||
reg["prefix"] = "x"
|
||||
return AttrDict({self.REGISTER_ID: reg})
|
||||
|
||||
def resolve_range_list(self, operand):
|
||||
"""
|
||||
Resolve range or list register operand to list of registers.
|
||||
Returns None if neither list nor range
|
||||
"""
|
||||
if "register" in operand:
|
||||
if "list" in operand.register:
|
||||
index = operand.register.get("index")
|
||||
range_list = []
|
||||
for reg in operand.register.list:
|
||||
reg = deepcopy(reg)
|
||||
if index is not None:
|
||||
reg["index"] = int(index, 0)
|
||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return range_list
|
||||
elif "range" in operand.register:
|
||||
base_register = operand.register.range[0]
|
||||
index = operand.register.get("index")
|
||||
range_list = []
|
||||
start_name = base_register.name
|
||||
end_name = operand.register.range[1].name
|
||||
for name in range(int(start_name), int(end_name) + 1):
|
||||
reg = deepcopy(base_register)
|
||||
if index is not None:
|
||||
reg["index"] = int(index, 0)
|
||||
reg["name"] = str(name)
|
||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return range_list
|
||||
# neither register list nor range, return unmodified
|
||||
return operand
|
||||
|
||||
def process_register_list(self, register_list):
|
||||
"""Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})"""
|
||||
# Remove unnecessarily created dictionary entries during parsing
|
||||
@@ -419,11 +464,13 @@ class ParserAArch64(BaseParser):
|
||||
if "value" in immediate:
|
||||
# normal integer value
|
||||
immediate["type"] = "int"
|
||||
# convert hex/bin immediates to dec
|
||||
immediate["value"] = self.normalize_imd(immediate)
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
if "base_immediate" in immediate:
|
||||
# arithmetic immediate, add calculated value as value
|
||||
immediate["shift"] = immediate["shift"][0]
|
||||
immediate["value"] = int(immediate["base_immediate"]["value"], 0) << int(
|
||||
immediate["value"] = self.normalize_imd(immediate["base_immediate"]) << int(
|
||||
immediate["shift"]["value"]
|
||||
)
|
||||
immediate["type"] = "int"
|
||||
@@ -437,10 +484,12 @@ class ParserAArch64(BaseParser):
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
else:
|
||||
# change 'mantissa' key to 'value'
|
||||
return AttrDict({
|
||||
self.IMMEDIATE_ID: AttrDict({
|
||||
"value": immediate[dict_name]["mantissa"],
|
||||
"type": dict_name})}
|
||||
return AttrDict(
|
||||
{
|
||||
self.IMMEDIATE_ID: AttrDict(
|
||||
{"value": immediate[dict_name]["mantissa"], "type": dict_name}
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
def process_label(self, label):
|
||||
@@ -471,10 +520,11 @@ class ParserAArch64(BaseParser):
|
||||
def normalize_imd(self, imd):
|
||||
"""Normalize immediate to decimal based representation"""
|
||||
if "value" in imd:
|
||||
if imd["value"].lower().startswith("0x"):
|
||||
# hex, return decimal
|
||||
return int(imd["value"], 16)
|
||||
return int(imd["value"], 10)
|
||||
if isinstance(imd["value"], str):
|
||||
# hex or bin, return decimal
|
||||
return int(imd["value"], 0)
|
||||
else:
|
||||
return imd["value"]
|
||||
elif "float" in imd:
|
||||
return self.ieee_to_float(imd["float"])
|
||||
elif "double" in imd:
|
||||
|
||||
@@ -22,10 +22,10 @@ class ParserX86ATT(BaseParser):
|
||||
self.isa = "x86"
|
||||
|
||||
def construct_parser(self):
|
||||
"""Create parser for ARM AArch64 ISA."""
|
||||
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
|
||||
"value"
|
||||
)
|
||||
"""Create parser for x86 AT&T ISA."""
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
|
||||
).setResultsName("value")
|
||||
hex_number = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
|
||||
).setResultsName("value")
|
||||
@@ -36,12 +36,13 @@ class ParserX86ATT(BaseParser):
|
||||
# Define x86 assembly identifier
|
||||
relocation = pp.Combine(pp.Literal("@") + pp.Word(pp.alphas))
|
||||
id_offset = pp.Word(pp.nums) + pp.Suppress(pp.Literal("+"))
|
||||
first = pp.Word(pp.alphas + "_.", exact=1)
|
||||
first = pp.Word(pp.alphas + "-_.", exact=1)
|
||||
rest = pp.Word(pp.alphanums + "$_.+-")
|
||||
identifier = pp.Group(
|
||||
pp.Optional(id_offset).setResultsName("offset")
|
||||
+ pp.Combine(
|
||||
pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), joinString="::"
|
||||
pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"),
|
||||
joinString="::",
|
||||
).setResultsName("name")
|
||||
+ pp.Optional(relocation).setResultsName("relocation")
|
||||
).setResultsName("identifier")
|
||||
@@ -88,7 +89,7 @@ class ParserX86ATT(BaseParser):
|
||||
).setResultsName(self.IMMEDIATE_ID)
|
||||
|
||||
# Memory preparations
|
||||
offset = pp.Group(identifier | hex_number | decimal_number).setResultsName(
|
||||
offset = pp.Group(hex_number | decimal_number | identifier).setResultsName(
|
||||
self.IMMEDIATE_ID
|
||||
)
|
||||
scale = pp.Word("1248", exact=1)
|
||||
@@ -108,7 +109,8 @@ class ParserX86ATT(BaseParser):
|
||||
)
|
||||
)
|
||||
memory_segmentation = (
|
||||
self.register.setResultsName("base")
|
||||
pp.Optional(pp.Suppress(pp.Literal("*")))
|
||||
+ self.register.setResultsName("base")
|
||||
+ pp.Literal(":")
|
||||
+ segment_extension.setResultsName(self.SEGMENT_EXT_ID)
|
||||
)
|
||||
@@ -326,9 +328,14 @@ class ParserX86ATT(BaseParser):
|
||||
offset = memory_address.get("offset", None)
|
||||
base = memory_address.get("base", None)
|
||||
index = memory_address.get("index", None)
|
||||
scale = 1 if "scale" not in memory_address else int(memory_address["scale"])
|
||||
scale = 1 if "scale" not in memory_address else int(memory_address["scale"], 0)
|
||||
if isinstance(offset, str) and base is None and index is None:
|
||||
offset = {"value": offset}
|
||||
try:
|
||||
offset = {"value": int(offset, 0)}
|
||||
except ValueError:
|
||||
offset = {"value": offset}
|
||||
elif offset is not None and "value" in offset:
|
||||
offset["value"] = int(offset["value"], 0)
|
||||
new_dict = AttrDict({"offset": offset, "base": base, "index": index, "scale": scale})
|
||||
# Add segmentation extension if existing
|
||||
if self.SEGMENT_EXT_ID in memory_address:
|
||||
@@ -346,7 +353,8 @@ class ParserX86ATT(BaseParser):
|
||||
if "identifier" in immediate:
|
||||
# actually an identifier, change declaration
|
||||
return immediate
|
||||
# otherwise nothing to do
|
||||
# otherwise just make sure the immediate is a decimal
|
||||
immediate["value"] = int(immediate["value"], 0)
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
|
||||
def get_full_reg_name(self, register):
|
||||
@@ -357,10 +365,11 @@ class ParserX86ATT(BaseParser):
|
||||
def normalize_imd(self, imd):
|
||||
"""Normalize immediate to decimal based representation"""
|
||||
if "value" in imd:
|
||||
if imd["value"].lower().startswith("0x"):
|
||||
# hex, return decimal
|
||||
return int(imd["value"], 16)
|
||||
return int(imd["value"], 10)
|
||||
if isinstance(imd["value"], str):
|
||||
# return decimal
|
||||
return int(imd["value"], 0)
|
||||
else:
|
||||
return imd["value"]
|
||||
# identifier
|
||||
return imd
|
||||
|
||||
@@ -435,7 +444,12 @@ class ParserX86ATT(BaseParser):
|
||||
"""Check if register is a vector register"""
|
||||
if register is None:
|
||||
return False
|
||||
if register["name"].rstrip(string.digits).lower() in ["mm", "xmm", "ymm", "zmm"]:
|
||||
if register["name"].rstrip(string.digits).lower() in [
|
||||
"mm",
|
||||
"xmm",
|
||||
"ymm",
|
||||
"zmm",
|
||||
]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -47,7 +47,9 @@ class ArchSemantics(ISASemantics):
|
||||
indices = [port_list.index(p) for p in ports]
|
||||
# check if port sum of used ports for uop are unbalanced
|
||||
port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
|
||||
instr_ports = self._to_list(itemgetter(*indices)(instruction_form["port_pressure"]))
|
||||
instr_ports = self._to_list(
|
||||
itemgetter(*indices)(instruction_form["port_pressure"])
|
||||
)
|
||||
if len(set(port_sums)) > 1:
|
||||
# balance ports
|
||||
# init list for keeping track of the current change
|
||||
@@ -270,7 +272,8 @@ class ArchSemantics(ISASemantics):
|
||||
reg_type
|
||||
]
|
||||
st_data_port_pressure = [
|
||||
pp * multiplier for pp in st_data_port_pressure]
|
||||
pp * multiplier for pp in st_data_port_pressure
|
||||
]
|
||||
data_port_pressure = [
|
||||
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
|
||||
]
|
||||
@@ -343,7 +346,9 @@ class ArchSemantics(ISASemantics):
|
||||
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
|
||||
"""Apply performance data to instruction if it was found in the archDB"""
|
||||
throughput = instruction_data["throughput"]
|
||||
port_pressure = self._machine_model.average_port_pressure(instruction_data["port_pressure"])
|
||||
port_pressure = self._machine_model.average_port_pressure(
|
||||
instruction_data["port_pressure"]
|
||||
)
|
||||
instruction_form["port_uops"] = instruction_data["port_pressure"]
|
||||
try:
|
||||
assert isinstance(port_pressure, list)
|
||||
|
||||
@@ -1,20 +1,19 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import string
|
||||
from collections import defaultdict
|
||||
from copy import deepcopy
|
||||
from itertools import product
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
import ruamel.yaml
|
||||
from ruamel.yaml.compat import StringIO
|
||||
|
||||
from osaca import __version__, utils
|
||||
from osaca.parser import ParserX86ATT
|
||||
from ruamel.yaml.compat import StringIO
|
||||
|
||||
|
||||
class MachineModel(object):
|
||||
@@ -37,7 +36,13 @@ class MachineModel(object):
|
||||
"hidden_loads": None,
|
||||
"load_latency": {},
|
||||
"load_throughput": [
|
||||
{"base": b, "index": i, "offset": o, "scale": s, "port_pressure": []}
|
||||
{
|
||||
"base": b,
|
||||
"index": i,
|
||||
"offset": o,
|
||||
"scale": s,
|
||||
"port_pressure": [],
|
||||
}
|
||||
for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
|
||||
],
|
||||
"load_throughput_default": [],
|
||||
@@ -128,7 +133,8 @@ class MachineModel(object):
|
||||
instruction_form
|
||||
for instruction_form in name_matched_iforms
|
||||
if self._match_operands(
|
||||
instruction_form["operands"] if "operands" in instruction_form else [], operands
|
||||
instruction_form["operands"] if "operands" in instruction_form else [],
|
||||
operands,
|
||||
)
|
||||
)
|
||||
except StopIteration:
|
||||
@@ -150,7 +156,13 @@ class MachineModel(object):
|
||||
return average_pressure
|
||||
|
||||
def set_instruction(
|
||||
self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None
|
||||
self,
|
||||
name,
|
||||
operands=None,
|
||||
latency=None,
|
||||
port_pressure=None,
|
||||
throughput=None,
|
||||
uops=None,
|
||||
):
|
||||
"""Import instruction form information."""
|
||||
# If it already exists. Overwrite information.
|
||||
@@ -254,11 +266,13 @@ class MachineModel(object):
|
||||
"""Return ISA for given micro-arch ``arch``."""
|
||||
arch_dict = {
|
||||
"a64fx": "aarch64",
|
||||
"a72": "aarch64",
|
||||
"tx2": "aarch64",
|
||||
"n1": "aarch64",
|
||||
"zen1": "x86",
|
||||
"zen+": "x86",
|
||||
"zen2": "x86",
|
||||
"icl": "x86",
|
||||
"con": "x86", # Intel Conroe
|
||||
"wol": "x86", # Intel Wolfdale
|
||||
"snb": "x86",
|
||||
@@ -500,7 +514,11 @@ class MachineModel(object):
|
||||
"""Check if the types of operand ``i_operand`` and ``operand`` match."""
|
||||
# check for wildcard
|
||||
if self.WILDCARD in operand:
|
||||
if "class" in i_operand and i_operand["class"] == "register" or "register" in i_operand:
|
||||
if (
|
||||
"class" in i_operand
|
||||
and i_operand["class"] == "register"
|
||||
or "register" in i_operand
|
||||
):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -527,20 +545,27 @@ class MachineModel(object):
|
||||
return self._is_AArch64_mem_type(i_operand, operand["memory"])
|
||||
# immediate
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD:
|
||||
return "value" in operand or \
|
||||
("immediate" in operand and "value" in operand["immediate"])
|
||||
return "value" in operand or (
|
||||
"immediate" in operand and "value" in operand["immediate"]
|
||||
)
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "int":
|
||||
return ("value" in operand and operand.get("type", None) == "int") or \
|
||||
("immediate" in operand and "value" in operand["immediate"] and
|
||||
operand["immediate"].get("type", None) == "int")
|
||||
return ("value" in operand and operand.get("type", None) == "int") or (
|
||||
"immediate" in operand
|
||||
and "value" in operand["immediate"]
|
||||
and operand["immediate"].get("type", None) == "int"
|
||||
)
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "float":
|
||||
return ("float" in operand and operand.get("type", None) == "float") or \
|
||||
("immediate" in operand and "float" in operand["immediate"] and
|
||||
operand["immediate"].get("type", None) == "float")
|
||||
return ("float" in operand and operand.get("type", None) == "float") or (
|
||||
"immediate" in operand
|
||||
and "float" in operand["immediate"]
|
||||
and operand["immediate"].get("type", None) == "float"
|
||||
)
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "double":
|
||||
return ("double" in operand and operand.get("type", None) == "double") or \
|
||||
("immediate" in operand and "double" in operand["immediate"] and
|
||||
operand["immediate"].get("type", None) == "double")
|
||||
return ("double" in operand and operand.get("type", None) == "double") or (
|
||||
"immediate" in operand
|
||||
and "double" in operand["immediate"]
|
||||
and operand["immediate"].get("type", None) == "double"
|
||||
)
|
||||
# identifier
|
||||
if "identifier" in operand or (
|
||||
"immediate" in operand and "identifier" in operand["immediate"]
|
||||
@@ -577,7 +602,10 @@ class MachineModel(object):
|
||||
def _compare_db_entries(self, operand_1, operand_2):
|
||||
"""Check if operand types in DB format (i.e., not parsed) match."""
|
||||
operand_attributes = list(
|
||||
filter(lambda x: True if x != "source" and x != "destination" else False, operand_1)
|
||||
filter(
|
||||
lambda x: True if x != "source" and x != "destination" else False,
|
||||
operand_1,
|
||||
)
|
||||
)
|
||||
for key in operand_attributes:
|
||||
try:
|
||||
|
||||
@@ -99,33 +99,46 @@ class ISASemantics(object):
|
||||
# post-process pre- and post-indexing for aarch64 memory operands
|
||||
if self._isa == "aarch64":
|
||||
for operand in [op for op in op_dict["source"] if "memory" in op]:
|
||||
post_indexed = ("post_indexed" in operand["memory"] and
|
||||
operand["memory"]["post_indexed"])
|
||||
pre_indexed = ("pre_indexed" in operand["memory"] and
|
||||
operand["memory"]["pre_indexed"])
|
||||
post_indexed = (
|
||||
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||
)
|
||||
pre_indexed = (
|
||||
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||
)
|
||||
if post_indexed or pre_indexed:
|
||||
op_dict["src_dst"].append(
|
||||
AttrDict.convert_dict({
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed})
|
||||
AttrDict.convert_dict(
|
||||
{
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed,
|
||||
}
|
||||
)
|
||||
)
|
||||
for operand in [op for op in op_dict["destination"] if "memory" in op]:
|
||||
post_indexed = ("post_indexed" in operand["memory"] and
|
||||
operand["memory"]["post_indexed"])
|
||||
pre_indexed = ("pre_indexed" in operand["memory"] and
|
||||
operand["memory"]["pre_indexed"])
|
||||
post_indexed = (
|
||||
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||
)
|
||||
pre_indexed = (
|
||||
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||
)
|
||||
if post_indexed or pre_indexed:
|
||||
op_dict["src_dst"].append(
|
||||
AttrDict.convert_dict({
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed})
|
||||
AttrDict.convert_dict(
|
||||
{
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# store operand list in dict and reassign operand key/value pair
|
||||
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
|
||||
# assign LD/ST flags
|
||||
instruction_form["flags"] = instruction_form["flags"] if "flags" in instruction_form else []
|
||||
instruction_form["flags"] = (
|
||||
instruction_form["flags"] if "flags" in instruction_form else []
|
||||
)
|
||||
if self._has_load(instruction_form):
|
||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
|
||||
if self._has_store(instruction_form):
|
||||
@@ -134,16 +147,20 @@ class ISASemantics(object):
|
||||
def get_reg_changes(self, instruction_form, only_postindexed=False):
|
||||
"""
|
||||
Returns register changes, as dict, for insruction_form, based on operation defined in isa.
|
||||
|
||||
|
||||
Empty dict if no changes of registers occured. None for registers with unknown changes.
|
||||
If only_postindexed is True, only considers changes due to post_indexed memory references.
|
||||
"""
|
||||
if instruction_form.get('instruction') is None:
|
||||
if instruction_form.get("instruction") is None:
|
||||
return {}
|
||||
dest_reg_names = [op.register.get('prefix', '') + op.register.name
|
||||
for op in chain(instruction_form.semantic_operands.destination,
|
||||
instruction_form.semantic_operands.src_dst)
|
||||
if 'register' in op]
|
||||
dest_reg_names = [
|
||||
op.register.get("prefix", "") + op.register.name
|
||||
for op in chain(
|
||||
instruction_form.semantic_operands.destination,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
)
|
||||
if "register" in op
|
||||
]
|
||||
isa_data = self._isa_model.get_instruction(
|
||||
instruction_form["instruction"], instruction_form["operands"]
|
||||
)
|
||||
@@ -159,58 +176,58 @@ class ISASemantics(object):
|
||||
|
||||
if only_postindexed:
|
||||
for o in instruction_form.operands:
|
||||
if 'post_indexed' in o.get('memory', {}):
|
||||
base_name = o.memory.base.get('prefix', '')+o.memory.base.name
|
||||
return {base_name: {
|
||||
'name': o.memory.base.get('prefix', '')+o.memory.base.name,
|
||||
'value': int(o.memory.post_indexed.value)
|
||||
}}
|
||||
if "post_indexed" in o.get("memory", {}):
|
||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||
return {
|
||||
base_name: {
|
||||
"name": o.memory.base.get("prefix", "") + o.memory.base.name,
|
||||
"value": o.memory.post_indexed.value,
|
||||
}
|
||||
}
|
||||
return {}
|
||||
|
||||
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
||||
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
||||
|
||||
|
||||
for o in instruction_form.operands:
|
||||
if 'pre_indexed' in o.get('memory', {}):
|
||||
if "pre_indexed" in o.get("memory", {}):
|
||||
# Assuming no isa_data.operation
|
||||
if isa_data.get("operation", None) is not None:
|
||||
raise ValueError(
|
||||
"ISA information for pre-indexed instruction {!r} has operation set."
|
||||
"This is currently not supprted.".format(instruction_form.line))
|
||||
base_name = o.memory.base.get('prefix', '')+o.memory.base.name
|
||||
reg_operand_names = {base_name: 'op1'}
|
||||
operand_state = {'op1': {
|
||||
'name': base_name,
|
||||
'value': int(o.memory.offset.value)
|
||||
}}
|
||||
"This is currently not supprted.".format(instruction_form.line)
|
||||
)
|
||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||
reg_operand_names = {base_name: "op1"}
|
||||
operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
|
||||
|
||||
if isa_data is not None and 'operation' in isa_data:
|
||||
if isa_data is not None and "operation" in isa_data:
|
||||
for i, o in enumerate(instruction_form.operands):
|
||||
operand_name = "op{}".format(i+1)
|
||||
operand_name = "op{}".format(i + 1)
|
||||
if "register" in o:
|
||||
o_reg_name = o["register"].get('prefix', '')+o["register"]["name"]
|
||||
o_reg_name = o["register"].get("prefix", "") + o["register"]["name"]
|
||||
reg_operand_names[o_reg_name] = operand_name
|
||||
operand_state[operand_name] = {
|
||||
'name': o_reg_name,
|
||||
'value': 0}
|
||||
operand_state[operand_name] = {"name": o_reg_name, "value": 0}
|
||||
elif "immediate" in o:
|
||||
operand_state[operand_name] = {'value': int(o["immediate"]["value"])}
|
||||
operand_state[operand_name] = {"value": o["immediate"]["value"]}
|
||||
elif "memory" in o:
|
||||
# TODO lea needs some thinking about
|
||||
pass
|
||||
|
||||
operand_changes = exec(isa_data['operation'], {}, operand_state)
|
||||
exec(isa_data["operation"], {}, operand_state)
|
||||
|
||||
change_dict = {reg_name: operand_state.get(reg_operand_names.get(reg_name))
|
||||
for reg_name in dest_reg_names}
|
||||
change_dict = {
|
||||
reg_name: operand_state.get(reg_operand_names.get(reg_name))
|
||||
for reg_name in dest_reg_names
|
||||
}
|
||||
return change_dict
|
||||
|
||||
def _apply_found_ISA_data(self, isa_data, operands):
|
||||
"""
|
||||
Create operand dictionary containing src/dst operands out of the ISA data entry and
|
||||
the oeprands of an instruction form
|
||||
|
||||
If breaks_pedendency_on_equal_operands is True (configuted per instruction in ISA db)
|
||||
|
||||
If breaks_dependency_on_equal_operands is True (configuted per instruction in ISA db)
|
||||
and all operands are equal, place operand into destination only.
|
||||
|
||||
:param dict isa_data: ISA DB entry
|
||||
@@ -223,13 +240,19 @@ class ISASemantics(object):
|
||||
op_dict["src_dst"] = []
|
||||
|
||||
# handle dependency breaking instructions
|
||||
if "breaks_pedendency_on_equal_operands" in isa_data and operands[1:] == operands[:-1]:
|
||||
if "breaks_dependency_on_equal_operands" in isa_data and operands[1:] == operands[:-1]:
|
||||
op_dict["destination"] += operands
|
||||
if "hidden_operands" in isa_data:
|
||||
op_dict["destination"] += [
|
||||
AttrDict.convert_dict(
|
||||
{hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}})
|
||||
for hop in isa_data["hidden_operands"]]
|
||||
{
|
||||
hop["class"]: {
|
||||
k: hop[k] for k in ["name", "class", "source", "destination"]
|
||||
}
|
||||
}
|
||||
)
|
||||
for hop in isa_data["hidden_operands"]
|
||||
]
|
||||
return op_dict
|
||||
|
||||
for i, op in enumerate(isa_data["operands"]):
|
||||
|
||||
@@ -1,22 +1,44 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import copy
|
||||
from itertools import chain, product
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from itertools import chain
|
||||
from multiprocessing import Manager, Process, cpu_count
|
||||
|
||||
import networkx as nx
|
||||
from osaca.semantics import INSTR_FLAGS, ArchSemantics, MachineModel
|
||||
|
||||
from osaca.parser import AttrDict
|
||||
from osaca.semantics import INSTR_FLAGS, MachineModel, ArchSemantics
|
||||
|
||||
class KernelDG(nx.DiGraph):
|
||||
def __init__(self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics):
|
||||
# threshold for checking dependency graph sequential or in parallel
|
||||
INSTRUCTION_THRESHOLD = 50
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parsed_kernel,
|
||||
parser,
|
||||
hw_model: MachineModel,
|
||||
semantics: ArchSemantics,
|
||||
timeout=10,
|
||||
):
|
||||
self.timed_out = False
|
||||
self.kernel = parsed_kernel
|
||||
self.parser = parser
|
||||
self.model = hw_model
|
||||
self.arch_sem = semantics
|
||||
self.dg = self.create_DG(self.kernel)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel, timeout)
|
||||
|
||||
def _extend_path(self, dst_list, kernel, dg, offset):
|
||||
for instr in kernel:
|
||||
generator_path = nx.algorithms.simple_paths.all_simple_paths(
|
||||
dg, instr.line_number, instr.line_number + offset
|
||||
)
|
||||
tmp_list = list(generator_path)
|
||||
dst_list.extend(tmp_list)
|
||||
# print('Thread [{}-{}] done'.format(kernel[0]['line_number'], kernel[-1]['line_number']))
|
||||
|
||||
def create_DG(self, kernel):
|
||||
"""
|
||||
@@ -56,7 +78,7 @@ class KernelDG(nx.DiGraph):
|
||||
else instruction_form["latency_wo_load"]
|
||||
)
|
||||
if "storeload_dep" in dep_flags:
|
||||
edge_weight += self.model.get('store_to_load_forward_latency', 0)
|
||||
edge_weight += self.model.get("store_to_load_forward_latency", 0)
|
||||
dg.add_edge(
|
||||
instruction_form["line_number"],
|
||||
dep["line_number"],
|
||||
@@ -65,39 +87,90 @@ class KernelDG(nx.DiGraph):
|
||||
dg.nodes[dep["line_number"]]["instruction_form"] = dep
|
||||
return dg
|
||||
|
||||
def check_for_loopcarried_dep(self, kernel):
|
||||
def check_for_loopcarried_dep(self, kernel, timeout=10):
|
||||
"""
|
||||
Try to find loop-carried dependencies in given kernel.
|
||||
|
||||
:param kernel: Parsed asm kernel with assigned semantic information
|
||||
:type kernel: list
|
||||
:param timeout: Timeout in seconds for parallel execution, defaults
|
||||
to `10`. Set to `0` for no timeout
|
||||
:type timeout: int
|
||||
:returns: `dict` -- dependency dictionary with all cyclic LCDs
|
||||
"""
|
||||
# increase line number for second kernel loop
|
||||
offset = max(1000, max([i.line_number for i in kernel]))
|
||||
first_line_no = kernel[0].line_number
|
||||
tmp_kernel = [] + kernel
|
||||
for orig_iform in kernel:
|
||||
temp_iform = copy.copy(orig_iform)
|
||||
temp_iform['line_number'] += offset
|
||||
temp_iform["line_number"] += offset
|
||||
tmp_kernel.append(temp_iform)
|
||||
# get dependency graph
|
||||
dg = self.create_DG(tmp_kernel)
|
||||
|
||||
# build cyclic loop-carried dependencies
|
||||
loopcarried_deps = []
|
||||
paths = []
|
||||
for instr in kernel:
|
||||
paths += list(nx.algorithms.simple_paths.all_simple_paths(
|
||||
dg, instr.line_number, instr.line_number + offset))
|
||||
all_paths = []
|
||||
|
||||
klen = len(kernel)
|
||||
if klen >= self.INSTRUCTION_THRESHOLD:
|
||||
# parallel execution with static scheduling
|
||||
num_cores = cpu_count()
|
||||
workload = int((klen - 1) / num_cores) + 1
|
||||
starts = [tid * workload for tid in range(num_cores)]
|
||||
ends = [min((tid + 1) * workload, klen) for tid in range(num_cores)]
|
||||
instrs = [kernel[s:e] for s, e in zip(starts, ends)]
|
||||
with Manager() as manager:
|
||||
all_paths = manager.list()
|
||||
processes = [
|
||||
Process(
|
||||
target=self._extend_path,
|
||||
args=(all_paths, instr_section, dg, offset),
|
||||
)
|
||||
for instr_section in instrs
|
||||
]
|
||||
for p in processes:
|
||||
p.start()
|
||||
if timeout == -1:
|
||||
# no timeout
|
||||
for p in processes:
|
||||
p.join()
|
||||
else:
|
||||
start_time = time.time()
|
||||
while time.time() - start_time <= timeout:
|
||||
if any(p.is_alive() for p in processes):
|
||||
time.sleep(0.2)
|
||||
else:
|
||||
# all procs done
|
||||
for p in processes:
|
||||
p.join()
|
||||
break
|
||||
else:
|
||||
self.timed_out = True
|
||||
# terminate running processes
|
||||
for p in processes:
|
||||
if p.is_alive():
|
||||
# Python 3.6 does not support Process.kill().
|
||||
# Can be changed to `p.kill()` after EoL (01/22) of Py3.6
|
||||
os.kill(p.pid, signal.SIGKILL)
|
||||
p.join()
|
||||
all_paths = list(all_paths)
|
||||
else:
|
||||
# sequential execution to avoid overhead when analyzing smaller kernels
|
||||
for instr in kernel:
|
||||
all_paths.extend(
|
||||
nx.algorithms.simple_paths.all_simple_paths(
|
||||
dg, instr.line_number, instr.line_number + offset
|
||||
)
|
||||
)
|
||||
|
||||
paths_set = set()
|
||||
for path in paths:
|
||||
for path in all_paths:
|
||||
lat_sum = 0.0
|
||||
# extend path by edge bound latencies (e.g., store-to-load latency)
|
||||
lat_path = []
|
||||
for s, d in nx.utils.pairwise(path):
|
||||
edge_lat = dg.edges[s, d]['latency']
|
||||
edge_lat = dg.edges[s, d]["latency"]
|
||||
# map source node back to original line numbers
|
||||
if s >= offset:
|
||||
s -= offset
|
||||
@@ -120,8 +193,10 @@ class KernelDG(nx.DiGraph):
|
||||
for lat_sum, involved_lines in loopcarried_deps:
|
||||
loopcarried_deps_dict[involved_lines[0][0]] = {
|
||||
"root": self._get_node_by_lineno(involved_lines[0][0]),
|
||||
"dependencies": [(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines],
|
||||
"latency": lat_sum
|
||||
"dependencies": [
|
||||
(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines
|
||||
],
|
||||
"latency": lat_sum,
|
||||
}
|
||||
return loopcarried_deps_dict
|
||||
|
||||
@@ -140,6 +215,7 @@ class KernelDG(nx.DiGraph):
|
||||
max_latency_instr = max(self.kernel, key=lambda k: k["latency"])
|
||||
if nx.algorithms.dag.is_directed_acyclic_graph(self.dg):
|
||||
longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight="latency")
|
||||
# TODO verify that we can remove the next two lince due to earlier initialization
|
||||
for line_number in longest_path:
|
||||
self._get_node_by_lineno(int(line_number))["latency_cp"] = 0
|
||||
# set cp latency to instruction
|
||||
@@ -148,6 +224,9 @@ class KernelDG(nx.DiGraph):
|
||||
node = self._get_node_by_lineno(int(s))
|
||||
node["latency_cp"] = self.dg.edges[(s, d)]["latency"]
|
||||
path_latency += node["latency_cp"]
|
||||
# add latency for last instruction
|
||||
node = self._get_node_by_lineno(longest_path[-1])
|
||||
node["latency_cp"] = node["latency"]
|
||||
if max_latency_instr["latency"] > path_latency:
|
||||
max_latency_instr["latency_cp"] = float(max_latency_instr["latency"])
|
||||
return [max_latency_instr]
|
||||
@@ -167,9 +246,7 @@ class KernelDG(nx.DiGraph):
|
||||
# split to DAG
|
||||
raise NotImplementedError("Kernel is cyclic.")
|
||||
|
||||
def find_depending(
|
||||
self, instruction_form, instructions, flag_dependencies=False
|
||||
):
|
||||
def find_depending(self, instruction_form, instructions, flag_dependencies=False):
|
||||
"""
|
||||
Find instructions in `instructions` depending on a given instruction form's results.
|
||||
|
||||
@@ -189,15 +266,15 @@ class KernelDG(nx.DiGraph):
|
||||
# TODO instructions before must be considered as well, if they update registers
|
||||
# not used by insruction_form. E.g., validation/build/A64FX/gcc/O1/gs-2d-5pt.marked.s
|
||||
register_changes = self._update_reg_changes(instruction_form)
|
||||
#print("FROM", instruction_form.line, register_changes)
|
||||
# print("FROM", instruction_form.line, register_changes)
|
||||
for i, instr_form in enumerate(instructions):
|
||||
self._update_reg_changes(instr_form, register_changes)
|
||||
#print(" TO", instr_form.line, register_changes)
|
||||
# print(" TO", instr_form.line, register_changes)
|
||||
if "register" in dst:
|
||||
# read of register
|
||||
if self.is_read(dst.register, instr_form) and not (
|
||||
dst.get("pre_indexed", False) or
|
||||
dst.get("post_indexed", False)):
|
||||
dst.get("pre_indexed", False) or dst.get("post_indexed", False)
|
||||
):
|
||||
yield instr_form, []
|
||||
# write to register -> abort
|
||||
if self.is_written(dst.register, instr_form):
|
||||
@@ -214,10 +291,10 @@ class KernelDG(nx.DiGraph):
|
||||
if "pre_indexed" in dst.memory:
|
||||
if self.is_written(dst.memory.base, instr_form):
|
||||
break
|
||||
#if dst.memory.base:
|
||||
# if dst.memory.base:
|
||||
# if self.is_read(dst.memory.base, instr_form):
|
||||
# yield instr_form, []
|
||||
#if dst.memory.index:
|
||||
# if dst.memory.index:
|
||||
# if self.is_read(dst.memory.index, instr_form):
|
||||
# yield instr_form, []
|
||||
if "post_indexed" in dst.memory:
|
||||
@@ -225,7 +302,7 @@ class KernelDG(nx.DiGraph):
|
||||
if self.is_written(dst.memory.base, instr_form):
|
||||
break
|
||||
# TODO record register changes
|
||||
# (e.g., mov, leaadd, sub, inc, dec) in instructions[:i]
|
||||
# (e.g., mov, leaadd, sub, inc, dec) in instructions[:i]
|
||||
# and pass to is_memload and is_memstore to consider relevance.
|
||||
# load from same location (presumed)
|
||||
if self.is_memload(dst.memory, instr_form, register_changes):
|
||||
@@ -245,17 +322,17 @@ class KernelDG(nx.DiGraph):
|
||||
if change is None or reg_state.get(reg, {}) is None:
|
||||
reg_state[reg] = None
|
||||
else:
|
||||
reg_state.setdefault(reg, {'name': reg, 'value': 0})
|
||||
if change['name'] != reg:
|
||||
reg_state.setdefault(reg, {"name": reg, "value": 0})
|
||||
if change["name"] != reg:
|
||||
# renaming occured, ovrwrite value with up-to-now change of source register
|
||||
reg_state[reg]['name'] = change['name']
|
||||
src_reg_state = reg_state.get(change['name'], {'value': 0})
|
||||
reg_state[reg]["name"] = change["name"]
|
||||
src_reg_state = reg_state.get(change["name"], {"value": 0})
|
||||
if src_reg_state is None:
|
||||
# original register's state was changed beyond reconstruction
|
||||
reg_state[reg] = None
|
||||
continue
|
||||
reg_state[reg]['value'] = src_reg_state['value']
|
||||
reg_state[reg]['value'] += change['value']
|
||||
reg_state[reg]["value"] = src_reg_state["value"]
|
||||
reg_state[reg]["value"] += change["value"]
|
||||
return reg_state
|
||||
|
||||
def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
|
||||
@@ -275,7 +352,8 @@ class KernelDG(nx.DiGraph):
|
||||
if instruction_form.semantic_operands is None:
|
||||
return is_read
|
||||
for src in chain(
|
||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
||||
instruction_form.semantic_operands.source,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
if "register" in src:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
|
||||
@@ -285,7 +363,9 @@ class KernelDG(nx.DiGraph):
|
||||
if src.memory.base is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
|
||||
if src.memory.index is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
|
||||
)
|
||||
# Check also if read in destination memory address
|
||||
for dst in chain(
|
||||
instruction_form.semantic_operands.destination,
|
||||
@@ -295,7 +375,9 @@ class KernelDG(nx.DiGraph):
|
||||
if dst.memory.base is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
|
||||
if dst.memory.index is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
|
||||
)
|
||||
return is_read
|
||||
|
||||
def is_memload(self, mem, instruction_form, register_changes={}):
|
||||
@@ -303,7 +385,8 @@ class KernelDG(nx.DiGraph):
|
||||
if instruction_form.semantic_operands is None:
|
||||
return False
|
||||
for src in chain(
|
||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
||||
instruction_form.semantic_operands.source,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
# Here we check for mem dependecies only
|
||||
if "memory" not in src:
|
||||
@@ -313,41 +396,43 @@ class KernelDG(nx.DiGraph):
|
||||
# determine absolute address change
|
||||
addr_change = 0
|
||||
if src.offset and "value" in src.offset:
|
||||
addr_change += int(src.offset.value)
|
||||
addr_change += src.offset.value
|
||||
if mem.offset:
|
||||
addr_change -= int(mem.offset.value)
|
||||
addr_change -= mem.offset.value
|
||||
if mem.base and src.base:
|
||||
base_change = register_changes.get(
|
||||
src.base.get('prefix', '')+src.base.name,
|
||||
{'name': src.base.get('prefix', '')+src.base.name, 'value': 0})
|
||||
src.base.get("prefix", "") + src.base.name,
|
||||
{"name": src.base.get("prefix", "") + src.base.name, "value": 0},
|
||||
)
|
||||
if base_change is None:
|
||||
# Unknown change occurred
|
||||
continue
|
||||
if mem.base.get('prefix', '')+mem.base['name'] != base_change['name']:
|
||||
if mem.base.get("prefix", "") + mem.base["name"] != base_change["name"]:
|
||||
# base registers do not match
|
||||
continue
|
||||
addr_change += base_change['value']
|
||||
addr_change += base_change["value"]
|
||||
elif mem.base or src.base:
|
||||
# base registers do not match
|
||||
continue
|
||||
# base registers do not match
|
||||
continue
|
||||
if mem.index and src.index:
|
||||
index_change = register_changes.get(
|
||||
src.index.get('prefix', '')+src.index.name,
|
||||
{'name': src.index.get('prefix', '')+src.index.name, 'value': 0})
|
||||
src.index.get("prefix", "") + src.index.name,
|
||||
{"name": src.index.get("prefix", "") + src.index.name, "value": 0},
|
||||
)
|
||||
if index_change is None:
|
||||
# Unknown change occurred
|
||||
continue
|
||||
if mem.scale != src.scale:
|
||||
# scale factors do not match
|
||||
continue
|
||||
if mem.index.get('prefix', '')+mem.index['name'] != index_change['name']:
|
||||
if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
|
||||
# index registers do not match
|
||||
continue
|
||||
addr_change += index_change['value'] * src.scale
|
||||
addr_change += index_change["value"] * src.scale
|
||||
elif mem.index or src.index:
|
||||
# index registers do not match
|
||||
continue
|
||||
#if instruction_form.line_number == 3:
|
||||
# index registers do not match
|
||||
continue
|
||||
# if instruction_form.line_number == 3:
|
||||
if addr_change == 0:
|
||||
return True
|
||||
return False
|
||||
@@ -372,7 +457,8 @@ class KernelDG(nx.DiGraph):
|
||||
)
|
||||
# Check also for possible pre- or post-indexing in memory addresses
|
||||
for src in chain(
|
||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
||||
instruction_form.semantic_operands.source,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
if "memory" in src:
|
||||
if "pre_indexed" in src.memory or "post_indexed" in src.memory:
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
import os.path
|
||||
|
||||
DATA_DIRS = [os.path.expanduser("~/.osaca/data"), os.path.join(os.path.dirname(__file__), "data")]
|
||||
DATA_DIRS = [
|
||||
os.path.expanduser("~/.osaca/data"),
|
||||
os.path.join(os.path.dirname(__file__), "data"),
|
||||
]
|
||||
CACHE_DIR = os.path.expanduser("~/.osaca/cache")
|
||||
|
||||
|
||||
|
||||
17
setup.py
17
setup.py
@@ -18,7 +18,8 @@ here = os.path.abspath(os.path.dirname(__file__))
|
||||
# Stolen from pip
|
||||
def read(*names, **kwargs):
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8"),
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
@@ -38,13 +39,20 @@ def _run_build_cache(dir):
|
||||
# This is run inside the install staging directory (that had no .pyc files)
|
||||
# We don't want to generate any.
|
||||
# https://github.com/eliben/pycparser/pull/135
|
||||
check_call([sys.executable, "-B", "_build_cache.py"], cwd=os.path.join(dir, "osaca", "data"))
|
||||
check_call(
|
||||
[sys.executable, "-B", "_build_cache.py"],
|
||||
cwd=os.path.join(dir, "osaca", "data"),
|
||||
)
|
||||
|
||||
|
||||
class install(_install):
|
||||
def run(self):
|
||||
_install.run(self)
|
||||
self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
|
||||
self.execute(
|
||||
_run_build_cache,
|
||||
(self.install_lib,),
|
||||
msg="Build ISA and architecture cache",
|
||||
)
|
||||
|
||||
|
||||
class sdist(_sdist):
|
||||
@@ -91,7 +99,6 @@ setup(
|
||||
# Specify the Python versions you support here. In particular, ensure
|
||||
# that you indicate wheter you support Python2, Python 3 or both.
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
@@ -107,7 +114,7 @@ setup(
|
||||
# requirements files see:
|
||||
# https://packaging.python.org/en/latest/requirements.html
|
||||
install_requires=["networkx", "pyparsing>=2.3.1", "ruamel.yaml>=0.15.71"],
|
||||
python_requires=">=3.5",
|
||||
python_requires=">=3.6",
|
||||
# List additional groups of dependencies here (e.g. development
|
||||
# dependencies). You can install these using the following syntax,
|
||||
# for example:
|
||||
|
||||
@@ -33,7 +33,13 @@ class TestCLI(unittest.TestCase):
|
||||
with self.assertRaises(ValueError):
|
||||
osaca.check_arguments(args, parser)
|
||||
args = parser.parse_args(
|
||||
["--arch", "csx", "--import", "WRONG_BENCH", self._find_file("gs", "csx", "gcc")]
|
||||
[
|
||||
"--arch",
|
||||
"csx",
|
||||
"--import",
|
||||
"WRONG_BENCH",
|
||||
self._find_file("gs", "csx", "gcc"),
|
||||
]
|
||||
)
|
||||
with self.assertRaises(ValueError):
|
||||
osaca.check_arguments(args, parser)
|
||||
@@ -65,7 +71,13 @@ class TestCLI(unittest.TestCase):
|
||||
def test_check_db(self):
|
||||
parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
|
||||
args = parser.parse_args(
|
||||
["--arch", "tx2", "--db-check", "--verbose", self._find_test_file("triad_x86_iaca.s")]
|
||||
[
|
||||
"--arch",
|
||||
"tx2",
|
||||
"--db-check",
|
||||
"--verbose",
|
||||
self._find_test_file("triad_x86_iaca.s"),
|
||||
]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
@@ -134,7 +146,13 @@ class TestCLI(unittest.TestCase):
|
||||
for c in comps[a]:
|
||||
with self.subTest(kernel=k, arch=a, comp=c):
|
||||
args = parser.parse_args(
|
||||
["--arch", a, self._find_file(k, a, c), "--export-graph", "/dev/null"]
|
||||
[
|
||||
"--arch",
|
||||
a,
|
||||
self._find_file(k, a, c),
|
||||
"--export-graph",
|
||||
"/dev/null",
|
||||
]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
@@ -183,14 +201,34 @@ class TestCLI(unittest.TestCase):
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
# WARNING for length
|
||||
self.assertTrue(output.getvalue().count("WARNING") == 1)
|
||||
self.assertTrue(
|
||||
output.getvalue().count(
|
||||
"WARNING: You are analyzing a large amount of instruction forms"
|
||||
)
|
||||
== 1
|
||||
)
|
||||
# WARNING for arch
|
||||
args = parser.parse_args(
|
||||
["--lines", "100-199", "--ignore-unknown", self._find_test_file(kernel)]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
# WARNING for arch
|
||||
self.assertTrue(output.getvalue().count("WARNING") == 1)
|
||||
self.assertTrue(
|
||||
output.getvalue().count("WARNING: No micro-architecture was specified") == 1
|
||||
)
|
||||
# WARNING for timeout
|
||||
args = parser.parse_args(
|
||||
["--ignore-unknown", "--lcd-timeout", "0", self._find_test_file(kernel)]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 1)
|
||||
args = parser.parse_args(
|
||||
["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 0)
|
||||
|
||||
def test_lines_arg(self):
|
||||
# Run tests with --lines option
|
||||
@@ -203,12 +241,24 @@ class TestCLI(unittest.TestCase):
|
||||
args = []
|
||||
args.append(
|
||||
parser.parse_args(
|
||||
["--lines", "146-154", "--arch", "csx", self._find_test_file(kernel_x86)]
|
||||
[
|
||||
"--lines",
|
||||
"146-154",
|
||||
"--arch",
|
||||
"csx",
|
||||
self._find_test_file(kernel_x86),
|
||||
]
|
||||
)
|
||||
)
|
||||
args.append(
|
||||
parser.parse_args(
|
||||
["--lines", "146:154", "--arch", "csx", self._find_test_file(kernel_x86)]
|
||||
[
|
||||
"--lines",
|
||||
"146:154",
|
||||
"--arch",
|
||||
"csx",
|
||||
self._find_test_file(kernel_x86),
|
||||
]
|
||||
)
|
||||
)
|
||||
args.append(
|
||||
|
||||
@@ -17,7 +17,13 @@ class TestDBInterface(unittest.TestCase):
|
||||
sample_entry = {
|
||||
"name": "DoItRightAndDoItFast",
|
||||
"operands": [
|
||||
{"class": "memory", "offset": "imd", "base": "gpr", "index": "gpr", "scale": 8},
|
||||
{
|
||||
"class": "memory",
|
||||
"offset": "imd",
|
||||
"base": "gpr",
|
||||
"index": "gpr",
|
||||
"scale": 8,
|
||||
},
|
||||
{"class": "register", "name": "xmm"},
|
||||
],
|
||||
"throughput": 1.25,
|
||||
@@ -35,7 +41,12 @@ class TestDBInterface(unittest.TestCase):
|
||||
del self.entry_tx2["operands"][1]["name"]
|
||||
self.entry_tx2["operands"][1]["prefix"] = "x"
|
||||
# self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
|
||||
self.entry_zen1["port_pressure"] = [[4, "0123"], [1, "4"], [1, "89"], [2, ["8D", "9D"]]]
|
||||
self.entry_zen1["port_pressure"] = [
|
||||
[4, "0123"],
|
||||
[1, "4"],
|
||||
[1, "89"],
|
||||
[2, ["8D", "9D"]],
|
||||
]
|
||||
|
||||
###########
|
||||
# Tests
|
||||
|
||||
32
tests/test_files/kernel_aarch64_sve.s
Normal file
32
tests/test_files/kernel_aarch64_sve.s
Normal file
@@ -0,0 +1,32 @@
|
||||
// OSACA-BEGIN
|
||||
.L5:
|
||||
add x10, x1, x11
|
||||
add x6, x1, x8
|
||||
ld2d {z0.d - z1.d}, p1/z, [x10]
|
||||
ld2d {z2.d - z3.d}, p1/z, [x6]
|
||||
mov z5.d, z1.d
|
||||
fadd z20.d, z3.d, z3.d
|
||||
mov z1.d, z0.d
|
||||
add x6, x1, x7
|
||||
fadd z2.d, z2.d, z2.d
|
||||
ld2d {z6.d - z7.d}, p1/z, [x6]
|
||||
fmul z4.d, z5.d, z20.d
|
||||
add x10, x1, x12
|
||||
mov z0.d, z7.d
|
||||
ld2d {z16.d - z17.d}, p1/z, [x10]
|
||||
mov z3.d, z4.d
|
||||
fmls z3.d, p0/m, z0.d, z17.d
|
||||
fmul z0.d, z0.d, z16.d
|
||||
fmla z3.d, p0/m, z6.d, z16.d
|
||||
fmla z0.d, p0/m, z6.d, z17.d
|
||||
fmls z3.d, p0/m, z1.d, z2.d
|
||||
fmls z0.d, p0/m, z1.d, z20.d
|
||||
mov z18.d, z3.d
|
||||
fmsb z5.d, p0/m, z2.d, z0.d
|
||||
mov z19.d, z5.d
|
||||
st2d {z18.d - z19.d}, p1, [x6]
|
||||
add x5, x5, 8
|
||||
add x1, x1, 128
|
||||
whilelo p1.d, x5, x9
|
||||
bne .L5
|
||||
// OSACA-END
|
||||
192
tests/test_files/kernel_x86_long_LCD.s
Normal file
192
tests/test_files/kernel_x86_long_LCD.s
Normal file
@@ -0,0 +1,192 @@
|
||||
# OSACA-BEGIN
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
push %rbp
|
||||
mov %ecx,%r12d
|
||||
mov %esi,%r14d
|
||||
mov %r12d,%ecx
|
||||
mov %r14d,%esi
|
||||
mov %rdx,%r13
|
||||
mov %rdi,%rbp
|
||||
callq 0x4210d0
|
||||
mov %rdx,%r8
|
||||
movzbl (%rdi),%r9d
|
||||
movslq %esi,%rsi
|
||||
movslq %ecx,%rcx
|
||||
movzbl (%r8),%r10d
|
||||
vmovd %r9d,%xmm13
|
||||
movzbl 0x4(%r8),%r9d
|
||||
vpinsrb $0x1,(%rsi,%rdi,1),%xmm13,%xmm14
|
||||
lea (%rsi,%rsi,2),%rdx
|
||||
vmovd %r10d,%xmm1
|
||||
vpinsrb $0x1,(%rcx,%r8,1),%xmm1,%xmm0
|
||||
vmovd %r9d,%xmm7
|
||||
vpinsrb $0x1,0x4(%rcx,%r8,1),%xmm7,%xmm5
|
||||
vpinsrb $0x2,(%rdi,%rsi,2),%xmm14,%xmm15
|
||||
vpinsrb $0x2,(%r8,%rcx,2),%xmm0,%xmm6
|
||||
vpinsrb $0x2,0x4(%r8,%rcx,2),%xmm5,%xmm9
|
||||
vpinsrb $0x3,(%rdx,%rdi,1),%xmm15,%xmm4
|
||||
movzbl 0x4(%rdi),%r11d
|
||||
lea (%rcx,%rcx,2),%rax
|
||||
vpinsrb $0x3,(%rax,%r8,1),%xmm6,%xmm10
|
||||
vpinsrb $0x3,0x4(%rax,%r8,1),%xmm9,%xmm11
|
||||
vmovd %r11d,%xmm2
|
||||
vpinsrb $0x1,0x4(%rsi,%rdi,1),%xmm2,%xmm8
|
||||
vpinsrb $0x2,0x4(%rdi,%rsi,2),%xmm8,%xmm3
|
||||
movzbl 0x1(%rdi),%r10d
|
||||
movzbl 0x5(%rdi),%r9d
|
||||
movzbl 0x1(%r8),%r11d
|
||||
vmovd %r10d,%xmm1
|
||||
movzbl 0x5(%r8),%r10d
|
||||
vmovd %r9d,%xmm7
|
||||
vpmovzxbd %xmm4,%xmm4
|
||||
vmovd %r11d,%xmm2
|
||||
vpmovzxbd %xmm10,%xmm10
|
||||
vpinsrb $0x3,0x4(%rdx,%rdi,1),%xmm3,%xmm12
|
||||
vpsubd %xmm10,%xmm4,%xmm14
|
||||
vpinsrb $0x1,0x5(%rsi,%rdi,1),%xmm7,%xmm5
|
||||
vmovd %r10d,%xmm4
|
||||
vpinsrb $0x1,0x5(%rcx,%r8,1),%xmm4,%xmm10
|
||||
vpinsrb $0x1,0x1(%rcx,%r8,1),%xmm2,%xmm8
|
||||
vpinsrb $0x1,0x1(%rsi,%rdi,1),%xmm1,%xmm0
|
||||
vpinsrb $0x2,0x5(%rdi,%rsi,2),%xmm5,%xmm9
|
||||
vpinsrb $0x2,0x1(%r8,%rcx,2),%xmm8,%xmm3
|
||||
vpinsrb $0x2,0x1(%rdi,%rsi,2),%xmm0,%xmm6
|
||||
vpmovzxbd %xmm12,%xmm12
|
||||
vpmovzxbd %xmm11,%xmm11
|
||||
vpsubd %xmm11,%xmm12,%xmm13
|
||||
vpinsrb $0x2,0x5(%r8,%rcx,2),%xmm10,%xmm11
|
||||
vpslld $0x10,%xmm13,%xmm15
|
||||
vpinsrb $0x3,0x1(%rdx,%rdi,1),%xmm6,%xmm13
|
||||
vpaddd %xmm15,%xmm14,%xmm12
|
||||
vpinsrb $0x3,0x5(%rdx,%rdi,1),%xmm9,%xmm15
|
||||
vpinsrb $0x3,0x1(%rax,%r8,1),%xmm3,%xmm14
|
||||
vpinsrb $0x3,0x5(%rax,%r8,1),%xmm11,%xmm1
|
||||
movzbl 0x2(%rdi),%r11d
|
||||
movzbl 0x2(%r8),%r9d
|
||||
vpmovzxbd %xmm15,%xmm15
|
||||
vmovd %r11d,%xmm8
|
||||
vmovd %r9d,%xmm5
|
||||
vpinsrb $0x1,0x2(%rsi,%rdi,1),%xmm8,%xmm3
|
||||
vpinsrb $0x1,0x2(%rcx,%r8,1),%xmm5,%xmm9
|
||||
vpinsrb $0x2,0x2(%rdi,%rsi,2),%xmm3,%xmm7
|
||||
vpinsrb $0x2,0x2(%r8,%rcx,2),%xmm9,%xmm4
|
||||
vpinsrb $0x3,0x2(%rdx,%rdi,1),%xmm7,%xmm3
|
||||
vpinsrb $0x3,0x2(%rax,%r8,1),%xmm4,%xmm7
|
||||
vpmovzxbd %xmm1,%xmm1
|
||||
movzbl 0x6(%r8),%r11d
|
||||
vpsubd %xmm1,%xmm15,%xmm0
|
||||
vpmovzxbd %xmm13,%xmm13
|
||||
vpslld $0x10,%xmm0,%xmm2
|
||||
vpmovzxbd %xmm14,%xmm14
|
||||
vpsubd %xmm14,%xmm13,%xmm6
|
||||
vpaddd %xmm2,%xmm6,%xmm11
|
||||
vmovd %r11d,%xmm6
|
||||
vpinsrb $0x1,0x6(%rcx,%r8,1),%xmm6,%xmm2
|
||||
movzbl 0x6(%rdi),%r10d
|
||||
vpinsrb $0x2,0x6(%r8,%rcx,2),%xmm2,%xmm8
|
||||
vmovd %r10d,%xmm10
|
||||
vpinsrb $0x1,0x6(%rsi,%rdi,1),%xmm10,%xmm1
|
||||
vpinsrb $0x3,0x6(%rax,%r8,1),%xmm8,%xmm9
|
||||
vpinsrb $0x2,0x6(%rdi,%rsi,2),%xmm1,%xmm0
|
||||
movzbl 0x3(%rdi),%r9d
|
||||
movzbl 0x7(%rdi),%r11d
|
||||
vpmovzxbd %xmm3,%xmm3
|
||||
vpmovzxbd %xmm7,%xmm7
|
||||
vmovd %r9d,%xmm14
|
||||
vmovd %r11d,%xmm8
|
||||
vpsubd %xmm7,%xmm3,%xmm10
|
||||
vpinsrb $0x1,0x3(%rsi,%rdi,1),%xmm14,%xmm15
|
||||
vpinsrb $0x1,0x7(%rsi,%rdi,1),%xmm8,%xmm3
|
||||
vpinsrb $0x3,0x6(%rdx,%rdi,1),%xmm0,%xmm5
|
||||
vpinsrb $0x2,0x3(%rdi,%rsi,2),%xmm15,%xmm1
|
||||
vpinsrb $0x2,0x7(%rdi,%rsi,2),%xmm3,%xmm7
|
||||
vpaddd %xmm11,%xmm12,%xmm3
|
||||
vpmovzxbd %xmm5,%xmm5
|
||||
vpmovzxbd %xmm9,%xmm9
|
||||
vpsubd %xmm9,%xmm5,%xmm4
|
||||
vpslld $0x10,%xmm4,%xmm13
|
||||
vpinsrb $0x3,0x7(%rdx,%rdi,1),%xmm7,%xmm15
|
||||
vpaddd %xmm13,%xmm10,%xmm10
|
||||
vpinsrb $0x3,0x3(%rdx,%rdi,1),%xmm1,%xmm13
|
||||
movzbl 0x7(%r8),%edx
|
||||
movzbl 0x3(%r8),%r10d
|
||||
vpmovzxbd %xmm15,%xmm15
|
||||
vmovd %edx,%xmm5
|
||||
vpinsrb $0x1,0x7(%rcx,%r8,1),%xmm5,%xmm9
|
||||
vmovd %r10d,%xmm0
|
||||
vpinsrb $0x1,0x3(%rcx,%r8,1),%xmm0,%xmm6
|
||||
vpinsrb $0x2,0x7(%r8,%rcx,2),%xmm9,%xmm4
|
||||
vpinsrb $0x2,0x3(%r8,%rcx,2),%xmm6,%xmm2
|
||||
vpinsrb $0x3,0x7(%rax,%r8,1),%xmm4,%xmm1
|
||||
vpinsrb $0x3,0x3(%rax,%r8,1),%xmm2,%xmm14
|
||||
vpmovzxbd %xmm1,%xmm1
|
||||
vpmovzxbd %xmm13,%xmm13
|
||||
vpsubd %xmm1,%xmm15,%xmm0
|
||||
vpmovzxbd %xmm14,%xmm14
|
||||
vpslld $0x10,%xmm0,%xmm2
|
||||
vpsubd %xmm14,%xmm13,%xmm6
|
||||
vpsubd %xmm11,%xmm12,%xmm1
|
||||
vpaddd %xmm2,%xmm6,%xmm8
|
||||
vpaddd %xmm8,%xmm10,%xmm12
|
||||
vpsubd %xmm8,%xmm10,%xmm0
|
||||
vpaddd %xmm12,%xmm3,%xmm8
|
||||
vpaddd %xmm0,%xmm1,%xmm7
|
||||
vpsubd %xmm12,%xmm3,%xmm3
|
||||
vpsubd %xmm0,%xmm1,%xmm5
|
||||
vunpcklps %xmm7,%xmm8,%xmm6
|
||||
vunpcklps %xmm5,%xmm3,%xmm2
|
||||
vunpckhps %xmm7,%xmm8,%xmm9
|
||||
vunpckhps %xmm5,%xmm3,%xmm4
|
||||
vunpcklpd %xmm2,%xmm6,%xmm10
|
||||
vunpckhpd %xmm2,%xmm6,%xmm11
|
||||
vunpcklpd %xmm4,%xmm9,%xmm12
|
||||
vpaddd %xmm11,%xmm10,%xmm14
|
||||
vunpckhpd %xmm4,%xmm9,%xmm13
|
||||
vpsubd %xmm11,%xmm10,%xmm1
|
||||
vpaddd %xmm13,%xmm12,%xmm15
|
||||
vpsubd %xmm13,%xmm12,%xmm0
|
||||
vpaddd %xmm15,%xmm14,%xmm9
|
||||
vpaddd %xmm0,%xmm1,%xmm7
|
||||
vpsubd %xmm15,%xmm14,%xmm8
|
||||
vpsubd %xmm0,%xmm1,%xmm6
|
||||
vmovdqu 0x279d68(%rip),%xmm15
|
||||
vpsrld $0xf,%xmm9,%xmm2
|
||||
vpsrld $0xf,%xmm7,%xmm10
|
||||
vpand %xmm15,%xmm2,%xmm3
|
||||
vmovdqu 0x279d40(%rip),%xmm4
|
||||
vpand %xmm15,%xmm10,%xmm11
|
||||
vpsrld $0xf,%xmm8,%xmm12
|
||||
vpsrld $0xf,%xmm6,%xmm14
|
||||
vpmulld %xmm3,%xmm4,%xmm5
|
||||
vpand %xmm15,%xmm12,%xmm13
|
||||
vpmulld %xmm11,%xmm4,%xmm3
|
||||
vpand %xmm15,%xmm14,%xmm1
|
||||
vpmulld %xmm13,%xmm4,%xmm2
|
||||
vpaddd %xmm3,%xmm7,%xmm7
|
||||
vpmulld %xmm1,%xmm4,%xmm0
|
||||
vpaddd %xmm5,%xmm9,%xmm4
|
||||
vpxor %xmm5,%xmm4,%xmm5
|
||||
vpxor %xmm3,%xmm7,%xmm9
|
||||
vpaddd %xmm2,%xmm8,%xmm8
|
||||
vpaddd %xmm9,%xmm5,%xmm3
|
||||
vpxor %xmm2,%xmm8,%xmm2
|
||||
vpaddd %xmm0,%xmm6,%xmm6
|
||||
vpaddd %xmm2,%xmm3,%xmm4
|
||||
vpxor %xmm0,%xmm6,%xmm0
|
||||
vpaddd %xmm0,%xmm4,%xmm2
|
||||
vpxor %xmm1,%xmm1,%xmm1
|
||||
vpaddd %xmm2,%xmm1,%xmm1
|
||||
vpsrldq $0x8,%xmm1,%xmm3
|
||||
vpaddd %xmm3,%xmm1,%xmm5
|
||||
vpsrlq $0x20,%xmm5,%xmm6
|
||||
vpaddd %xmm6,%xmm5,%xmm7
|
||||
vmovd %xmm7,%ecx
|
||||
movzwl %cx,%eax
|
||||
shr $0x10,%ecx
|
||||
add %ecx,%eax
|
||||
shr %eax
|
||||
retq
|
||||
# OSACA-END
|
||||
@@ -1,15 +1,15 @@
|
||||
# OSACA-BEGIN
|
||||
.L4:
|
||||
vmovsd %xmm0, 8(%rax)
|
||||
addq $8, %rax
|
||||
vmovsd %xmm0, 8(%rax,%rcx,8)
|
||||
vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8)
|
||||
subq $-8, %rax
|
||||
vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16)
|
||||
dec %rcx
|
||||
vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
|
||||
movq %rcx, %rdx
|
||||
vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
|
||||
vmovsd %xmm0, 8(%rax) # line 3 <----------------------------------+
|
||||
addq $8, %rax # |
|
||||
vmovsd %xmm0, 8(%rax,%rcx,8) # line 5 <-----------------------------------------------+
|
||||
vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8) ---+ |
|
||||
subq $-8, %rax # | |
|
||||
vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16) ---+ |
|
||||
dec %rcx # |
|
||||
vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
|
||||
movq %rcx, %rdx # |
|
||||
vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
|
||||
vmulsd %xmm1, %xmm0, %xmm0
|
||||
addq $8, %rax
|
||||
cmpq %rsi, %rax
|
||||
|
||||
@@ -34,7 +34,8 @@ class TestFrontend(unittest.TestCase):
|
||||
)
|
||||
self.machine_model_tx2 = MachineModel(arch="tx2")
|
||||
self.semantics_csx = ArchSemantics(
|
||||
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml")
|
||||
self.machine_model_csx,
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||
)
|
||||
self.semantics_tx2 = ArchSemantics(
|
||||
self.machine_model_tx2,
|
||||
@@ -71,7 +72,11 @@ class TestFrontend(unittest.TestCase):
|
||||
|
||||
def test_frontend_AArch64(self):
|
||||
dg = KernelDG(
|
||||
self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2)
|
||||
self.kernel_AArch64,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_tx2,
|
||||
self.semantics_tx2,
|
||||
)
|
||||
fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
|
||||
fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
|
||||
# TODO compare output with checked string
|
||||
|
||||
@@ -109,7 +109,8 @@ class TestMarkerUtils(unittest.TestCase):
|
||||
kernel_start = len(
|
||||
list(
|
||||
filter(
|
||||
None, (prologue + mov_start_var + bytes_var_1).split("\n")
|
||||
None,
|
||||
(prologue + mov_start_var + bytes_var_1).split("\n"),
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -142,7 +143,12 @@ class TestMarkerUtils(unittest.TestCase):
|
||||
epilogue = ".LE9:\t\t#12.2\n" "call dummy\n"
|
||||
kernel_length = len(list(filter(None, kernel.split("\n"))))
|
||||
|
||||
bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines]
|
||||
bytes_variations = [
|
||||
bytes_1_line,
|
||||
bytes_2_lines_1,
|
||||
bytes_2_lines_2,
|
||||
bytes_3_lines,
|
||||
]
|
||||
mov_start_variations = [mov_start_1, mov_start_2]
|
||||
mov_end_variations = [mov_end_1, mov_end_2]
|
||||
# actual tests
|
||||
@@ -171,7 +177,8 @@ class TestMarkerUtils(unittest.TestCase):
|
||||
kernel_start = len(
|
||||
list(
|
||||
filter(
|
||||
None, (prologue + mov_start_var + bytes_var_1).split("\n")
|
||||
None,
|
||||
(prologue + mov_start_var + bytes_var_1).split("\n"),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -24,7 +24,9 @@ class TestParserAArch64(unittest.TestCase):
|
||||
|
||||
def test_comment_parser(self):
|
||||
self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments")
|
||||
self.assertEqual(self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end")
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end"
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, "\t//// comment //// comment"),
|
||||
"// comment //// comment",
|
||||
@@ -36,7 +38,8 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
||||
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1")
|
||||
self.assertEqual(
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), "label1"
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment),
|
||||
"label1",
|
||||
)
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_label(self.parser, "\t.cfi_startproc")
|
||||
@@ -102,7 +105,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(parsed_3.instruction, "mov")
|
||||
self.assertEqual(parsed_3.operands[0].register.name, "2")
|
||||
self.assertEqual(parsed_3.operands[0].register.prefix, "x")
|
||||
self.assertEqual(parsed_3.operands[1].immediate.value, "0x222")
|
||||
self.assertEqual(parsed_3.operands[1].immediate.value, int("0x222", 0))
|
||||
self.assertEqual(parsed_3.comment, "NOT IACA END")
|
||||
|
||||
self.assertEqual(parsed_4.instruction, "str")
|
||||
@@ -208,7 +211,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
{"prfop": {"type": ["PLD"], "target": ["L1"], "policy": ["KEEP"]}},
|
||||
{
|
||||
"memory": {
|
||||
"offset": {"value": "2048"},
|
||||
"offset": {"value": 2048},
|
||||
"base": {"prefix": "x", "name": "26"},
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
@@ -228,7 +231,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
{"register": {"prefix": "x", "name": "30"}},
|
||||
{
|
||||
"memory": {
|
||||
"offset": {"value": "-16"},
|
||||
"offset": {"value": -16},
|
||||
"base": {"name": "sp", "prefix": "x"},
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
@@ -253,7 +256,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
"base": {"prefix": "x", "name": "11"},
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
"post_indexed": {"value": "64"},
|
||||
"post_indexed": {"value": 64},
|
||||
}
|
||||
},
|
||||
],
|
||||
@@ -270,7 +273,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
{"register": {"prefix": "p", "name": "0", "predication": "m"}},
|
||||
{"register": {"prefix": "z", "name": "29", "shape": "d"}},
|
||||
{"register": {"prefix": "z", "name": "21", "shape": "d"}},
|
||||
{"immediate": {"value": "90", "type": "int"}},
|
||||
{"immediate": {"value": 90, "type": "int"}},
|
||||
],
|
||||
"directive": None,
|
||||
"comment": None,
|
||||
@@ -316,7 +319,8 @@ class TestParserAArch64(unittest.TestCase):
|
||||
value1 = self.parser.normalize_imd(imd_decimal_1)
|
||||
self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
||||
self.parser.normalize_imd(imd_decimal_2),
|
||||
self.parser.normalize_imd(imd_hex_2),
|
||||
)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
|
||||
@@ -326,32 +330,34 @@ class TestParserAArch64(unittest.TestCase):
|
||||
|
||||
def test_multiple_regs(self):
|
||||
instr_range = "PUSH {x5-x7}"
|
||||
reg_range = AttrDict(
|
||||
{
|
||||
"register": {
|
||||
"range": [{"prefix": "x", "name": "5"}, {"prefix": "x", "name": "7"}],
|
||||
"index": None,
|
||||
}
|
||||
}
|
||||
)
|
||||
instr_list = "POP {x5, x7, x9}"
|
||||
reg_list = AttrDict(
|
||||
{
|
||||
"register": {
|
||||
"list": [
|
||||
{"prefix": "x", "name": "5"},
|
||||
{"prefix": "x", "name": "7"},
|
||||
{"prefix": "x", "name": "9"},
|
||||
],
|
||||
"index": None,
|
||||
}
|
||||
}
|
||||
)
|
||||
instr_list = "POP {x5, x6, x7}"
|
||||
instr_range_with_index = "ld4 {v0.S - v3.S}[2]"
|
||||
instr_list_with_index = "ld4 {v0.S, v1.S, v2.S, v3.S}[2]"
|
||||
instr_range_single = "dummy { z1.d }"
|
||||
reg_list = [
|
||||
AttrDict({"register": {"prefix": "x", "name": "5"}}),
|
||||
AttrDict({"register": {"prefix": "x", "name": "6"}}),
|
||||
AttrDict({"register": {"prefix": "x", "name": "7"}}),
|
||||
]
|
||||
reg_list_idx = [
|
||||
AttrDict({"register": {"prefix": "v", "name": "0", "shape": "S", "index": 2}}),
|
||||
AttrDict({"register": {"prefix": "v", "name": "1", "shape": "S", "index": 2}}),
|
||||
AttrDict({"register": {"prefix": "v", "name": "2", "shape": "S", "index": 2}}),
|
||||
AttrDict({"register": {"prefix": "v", "name": "3", "shape": "S", "index": 2}}),
|
||||
]
|
||||
reg_list_single = [AttrDict({"register": {"prefix": "z", "name": "1", "shape": "d"}})]
|
||||
|
||||
prange = self.parser.parse_line(instr_range)
|
||||
plist = self.parser.parse_line(instr_list)
|
||||
p_idx_range = self.parser.parse_line(instr_range_with_index)
|
||||
p_idx_list = self.parser.parse_line(instr_list_with_index)
|
||||
p_single = self.parser.parse_line(instr_range_single)
|
||||
|
||||
self.assertEqual(prange.operands[0], reg_range)
|
||||
self.assertEqual(plist.operands[0], reg_list)
|
||||
self.assertEqual(prange.operands, reg_list)
|
||||
self.assertEqual(plist.operands, reg_list)
|
||||
self.assertEqual(p_idx_range.operands, reg_list_idx)
|
||||
self.assertEqual(p_idx_list.operands, reg_list_idx)
|
||||
self.assertEqual(p_single.operands, reg_list_single)
|
||||
|
||||
def test_reg_dependency(self):
|
||||
reg_1_1 = AttrDict({"prefix": "b", "name": "1"})
|
||||
|
||||
@@ -26,7 +26,8 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments")
|
||||
self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end")
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, "\t## comment ## comment"), "# comment ## comment"
|
||||
self._get_comment(self.parser, "\t## comment ## comment"),
|
||||
"# comment ## comment",
|
||||
)
|
||||
|
||||
def test_label_parser(self):
|
||||
@@ -35,7 +36,8 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
||||
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1")
|
||||
self.assertEqual(
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), "label1"
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment),
|
||||
"label1",
|
||||
)
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_label(self.parser, "\t.cfi_startproc")
|
||||
@@ -47,7 +49,8 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2)
|
||||
self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0)
|
||||
self.assertEqual(
|
||||
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
|
||||
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters),
|
||||
2,
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
|
||||
@@ -62,7 +65,12 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.parser,
|
||||
"\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support",
|
||||
).parameters,
|
||||
["__TEXT", "__eh_frame", "coalesced", "no_toc+strip_static_syms+live_support"],
|
||||
[
|
||||
"__TEXT",
|
||||
"__eh_frame",
|
||||
"coalesced",
|
||||
"no_toc+strip_static_syms+live_support",
|
||||
],
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(
|
||||
@@ -74,7 +82,9 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90"
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, " .byte 100,103,144 #IACA START")["name"],
|
||||
self._get_directive(self.parser, " .byte 100,103,144 #IACA START")[
|
||||
"name"
|
||||
],
|
||||
"byte",
|
||||
)
|
||||
self.assertEqual(
|
||||
@@ -120,12 +130,12 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertIsNone(parsed_2.comment)
|
||||
|
||||
self.assertEqual(parsed_3.instruction, "movl")
|
||||
self.assertEqual(parsed_3.operands[0].immediate.value, "222")
|
||||
self.assertEqual(parsed_3.operands[0].immediate.value, 222)
|
||||
self.assertEqual(parsed_3.operands[1].register.name, "ebx")
|
||||
self.assertEqual(parsed_3.comment, "IACA END")
|
||||
|
||||
self.assertEqual(parsed_4.instruction, "vmovss")
|
||||
self.assertEqual(parsed_4.operands[1].memory.offset.value, "-4")
|
||||
self.assertEqual(parsed_4.operands[1].memory.offset.value, -4)
|
||||
self.assertEqual(parsed_4.operands[1].memory.base.name, "rsp")
|
||||
self.assertEqual(parsed_4.operands[1].memory.index.name, "rax")
|
||||
self.assertEqual(parsed_4.operands[1].memory.scale, 8)
|
||||
@@ -146,7 +156,7 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(parsed_6.operands[0].memory.scale, 8)
|
||||
self.assertEqual(parsed_6.operands[1].register.name, "rbx")
|
||||
|
||||
self.assertEqual(parsed_7.operands[0].immediate.value, "0x1")
|
||||
self.assertEqual(parsed_7.operands[0].immediate.value, 0x1)
|
||||
self.assertEqual(parsed_7.operands[1].register.name, "xmm0")
|
||||
self.assertEqual(parsed_7.operands[2].register.name, "ymm1")
|
||||
self.assertEqual(parsed_7.operands[3].register.name, "ymm1")
|
||||
@@ -189,7 +199,7 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
"operands": [
|
||||
{
|
||||
"memory": {
|
||||
"offset": {"value": "2"},
|
||||
"offset": {"value": 2},
|
||||
"base": {"name": "rax"},
|
||||
"index": {"name": "rax"},
|
||||
"scale": 1,
|
||||
@@ -240,12 +250,14 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
imd_decimal_1 = {"value": "79"}
|
||||
imd_hex_1 = {"value": "0x4f"}
|
||||
imd_decimal_2 = {"value": "8"}
|
||||
imd_hex_2 = {"value": "0x8"}
|
||||
imd_hex_2 = {"value": "8"}
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
|
||||
self.parser.normalize_imd(imd_decimal_1),
|
||||
self.parser.normalize_imd(imd_hex_1),
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
||||
self.parser.normalize_imd(imd_decimal_2),
|
||||
self.parser.normalize_imd(imd_hex_2),
|
||||
)
|
||||
|
||||
def test_reg_dependency(self):
|
||||
|
||||
@@ -5,14 +5,19 @@ Unit tests for Semantic Analysis
|
||||
|
||||
import os
|
||||
import unittest
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from osaca.osaca import get_unmatched_instruction_ratio
|
||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||
from osaca.semantics import (
|
||||
INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section, ISASemantics
|
||||
INSTR_FLAGS,
|
||||
ArchSemantics,
|
||||
ISASemantics,
|
||||
KernelDG,
|
||||
MachineModel,
|
||||
reduce_to_section,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,17 +35,30 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.code_x86 = f.read()
|
||||
with open(cls._find_file("kernel_x86_memdep.s")) as f:
|
||||
cls.code_x86_memdep = f.read()
|
||||
with open(cls._find_file("kernel_x86_long_LCD.s")) as f:
|
||||
cls.code_x86_long_LCD = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_memdep.s")) as f:
|
||||
cls.code_aarch64_memdep = f.read()
|
||||
with open(cls._find_file("kernel_aarch64.s")) as f:
|
||||
cls.code_AArch64 = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
|
||||
cls.code_AArch64_SVE = f.read()
|
||||
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
|
||||
cls.kernel_x86_memdep = reduce_to_section(
|
||||
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
|
||||
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86"
|
||||
)
|
||||
cls.kernel_x86_long_LCD = reduce_to_section(
|
||||
cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86"
|
||||
)
|
||||
cls.kernel_AArch64 = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64"
|
||||
)
|
||||
cls.kernel_aarch64_memdep = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
|
||||
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64"
|
||||
)
|
||||
cls.kernel_aarch64_SVE = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64"
|
||||
)
|
||||
|
||||
# set up machine models
|
||||
cls.machine_model_csx = MachineModel(
|
||||
@@ -49,15 +67,23 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.machine_model_tx2 = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
|
||||
)
|
||||
cls.machine_model_a64fx = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")
|
||||
)
|
||||
cls.semantics_x86 = ISASemantics("x86")
|
||||
cls.semantics_csx = ArchSemantics(
|
||||
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
|
||||
cls.machine_model_csx,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||
)
|
||||
cls.semantics_aarch64 = ISASemantics("aarch64")
|
||||
cls.semantics_tx2 = ArchSemantics(
|
||||
cls.machine_model_tx2,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
|
||||
)
|
||||
cls.semantics_a64fx = ArchSemantics(
|
||||
cls.machine_model_a64fx,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
|
||||
)
|
||||
cls.machine_model_zen = MachineModel(arch="zen1")
|
||||
|
||||
for i in range(len(cls.kernel_x86)):
|
||||
@@ -66,12 +92,18 @@ class TestSemanticTools(unittest.TestCase):
|
||||
for i in range(len(cls.kernel_x86_memdep)):
|
||||
cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i])
|
||||
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i])
|
||||
for i in range(len(cls.kernel_x86_long_LCD)):
|
||||
cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i])
|
||||
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i])
|
||||
for i in range(len(cls.kernel_AArch64)):
|
||||
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
|
||||
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
|
||||
for i in range(len(cls.kernel_aarch64_memdep)):
|
||||
cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
|
||||
cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
|
||||
for i in range(len(cls.kernel_aarch64_SVE)):
|
||||
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
|
||||
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
|
||||
|
||||
###########
|
||||
# Tests
|
||||
@@ -148,7 +180,12 @@ class TestSemanticTools(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(
|
||||
test_mm_x86.get_store_throughput(
|
||||
{"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": "NOT_NONE", "scale": 1}
|
||||
{
|
||||
"base": {"prefix": "NOT_IN_DB"},
|
||||
"offset": None,
|
||||
"index": "NOT_NONE",
|
||||
"scale": 1,
|
||||
}
|
||||
),
|
||||
[[1, "23"], [1, "4"]],
|
||||
)
|
||||
@@ -160,7 +197,12 @@ class TestSemanticTools(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(
|
||||
test_mm_arm.get_store_throughput(
|
||||
{"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": None, "scale": 1}
|
||||
{
|
||||
"base": {"prefix": "NOT_IN_DB"},
|
||||
"offset": None,
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
}
|
||||
),
|
||||
[[1, "34"], [1, "5"]],
|
||||
)
|
||||
@@ -284,8 +326,12 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.export_graph(filepath="/dev/null")
|
||||
|
||||
def test_memdependency_x86(self):
|
||||
dg = KernelDG(self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx,
|
||||
self.semantics_csx)
|
||||
dg = KernelDG(
|
||||
self.kernel_x86_memdep,
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_csx,
|
||||
)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
|
||||
@@ -295,8 +341,12 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.export_graph(filepath="/dev/null")
|
||||
|
||||
def test_kernelDG_AArch64(self):
|
||||
dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2,
|
||||
self.semantics_tx2)
|
||||
dg = KernelDG(
|
||||
self.kernel_AArch64,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_tx2,
|
||||
self.semantics_tx2,
|
||||
)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10})
|
||||
@@ -321,6 +371,15 @@ class TestSemanticTools(unittest.TestCase):
|
||||
# test dot creation
|
||||
dg.export_graph(filepath="/dev/null")
|
||||
|
||||
def test_kernelDG_SVE(self):
|
||||
KernelDG(
|
||||
self.kernel_aarch64_SVE,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_a64fx,
|
||||
self.semantics_a64fx,
|
||||
)
|
||||
# TODO check for correct analysis
|
||||
|
||||
def test_hidden_load(self):
|
||||
machine_model_hld = MachineModel(
|
||||
path_to_yaml=self._find_file("hidden_load_machine_model.yml")
|
||||
@@ -353,14 +412,20 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.get_loopcarried_dependencies()
|
||||
|
||||
def test_loop_carried_dependency_aarch64(self):
|
||||
dg = KernelDG(self.kernel_aarch64_memdep, self.parser_AArch64, self.machine_model_tx2,
|
||||
self.semantics_tx2)
|
||||
dg = KernelDG(
|
||||
self.kernel_aarch64_memdep,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_tx2,
|
||||
self.semantics_tx2,
|
||||
)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
# based on line 6
|
||||
self.assertEqual(lc_deps[6]["latency"], 28.0)
|
||||
self.assertEqual([(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)])
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
|
||||
)
|
||||
|
||||
def test_loop_carried_dependency_x86(self):
|
||||
lcd_id = 8
|
||||
@@ -375,13 +440,14 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]["dependencies"][0][0],
|
||||
dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
|
||||
dg.dg.nodes(data=True)[lcd_id]["instruction_form"],
|
||||
)
|
||||
# w/ flag dependencies: ID 9 w/ len=2
|
||||
# w/o flag dependencies: ID 5 w/ len=1
|
||||
# TODO discuss
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"]
|
||||
lc_deps[lcd_id2]["root"],
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
@@ -389,6 +455,31 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
)
|
||||
|
||||
def test_timeout_during_loop_carried_dependency(self):
|
||||
start_time = time.perf_counter()
|
||||
KernelDG(
|
||||
self.kernel_x86_long_LCD,
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_x86,
|
||||
timeout=10,
|
||||
)
|
||||
end_time = time.perf_counter()
|
||||
time_10 = end_time - start_time
|
||||
start_time = time.perf_counter()
|
||||
KernelDG(
|
||||
self.kernel_x86_long_LCD,
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_x86,
|
||||
timeout=2,
|
||||
)
|
||||
end_time = time.perf_counter()
|
||||
time_2 = end_time - start_time
|
||||
self.assertTrue(time_10 > 10)
|
||||
self.assertTrue(2 < time_2)
|
||||
self.assertTrue(time_2 < (time_10 - 7))
|
||||
|
||||
def test_is_read_is_written_x86(self):
|
||||
# independent form HW model
|
||||
dag = KernelDG(self.kernel_x86, self.parser_x86, None, None)
|
||||
|
||||
@@ -232,7 +232,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -284,8 +284,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -296,8 +298,672 @@
|
||||
"ZEN has 156 tests, compiled to 126 unique assembly representations.\n",
|
||||
"ZEN2 has 156 tests, compiled to 126 unique assembly representations.\n",
|
||||
"TX2 has 104 tests, compiled to 78 unique assembly representations.\n",
|
||||
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n"
|
||||
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n",
|
||||
"High-level iterations in assembly block: 16\n",
|
||||
"Measured: 1.1903856655856655\n",
|
||||
"IACA Predicted: 1.96875 TP: 1.875 LCD: None CP: None\n",
|
||||
"Ithemal Predicted: nan TP: None LCD: None CP: None\n",
|
||||
"LLVM-MCA Predicted: 2.240625 TP: 1.948125 LCD: 2.240625 CP: 3.8125\n",
|
||||
"OSACA Predicted: 1.875 TP: 1.875 LCD: 0.5 CP: 2.75\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Open Source Architecture Code Analyzer (OSACA) - 0.3.14\n",
|
||||
"Analyzed file: build/SKX/icc/O3/pi.marked.s\n",
|
||||
"Architecture: SKX\n",
|
||||
"Timestamp: 2021-04-15 12:15:40\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction\n",
|
||||
" * - Instruction micro-ops not bound to a port\n",
|
||||
" X - No throughput/latency information for this instruction in data file\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Combined Analysis Report\n",
|
||||
"------------------------\n",
|
||||
" Port pressure in cycles \n",
|
||||
" | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |\n",
|
||||
"-------------------------------------------------------------------------------------------------\n",
|
||||
" 62 | | | | | | | | || | | # pointer_increment=128 fa3c665ee18e1e5f704c8a6026891c36\n",
|
||||
" 63 | | | | | | | | || | | ..B1.4: # Preds ..B1.4 ..B1.3\n",
|
||||
" 64 | | | | | | | | || | | # Execution count [5.00e+00]\n",
|
||||
" 65 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $32, %ecx #16.5\n",
|
||||
" 66 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm5, %ymm9, %ymm14 #17.9\n",
|
||||
" 67 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm9, %zmm8 #17.14\n",
|
||||
" 68 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm8, %zmm1, %zmm10 #17.18\n",
|
||||
" 69 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm2, %zmm11 #17.25\n",
|
||||
" 70 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm11, %zmm11 #18.38\n",
|
||||
" 71 | | | | | | | | || | | * vmovaps %zmm0, %zmm29 #18.38\n",
|
||||
" 72 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm11, %zmm13 #18.38\n",
|
||||
" 73 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11 #18.38\n",
|
||||
" 74 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm13, %k0 #18.38\n",
|
||||
" 75 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm11, %zmm11, %zmm12 #18.38\n",
|
||||
" 76 | 1.00 | | | | | | | || | | knotw %k0, %k1 #18.38\n",
|
||||
" 77 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm11, %zmm13{%k1} #18.38\n",
|
||||
" 78 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm12, %zmm13{%k1} #18.38\n",
|
||||
" 79 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38\n",
|
||||
" 80 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm14, %ymm20 #17.9\n",
|
||||
" 81 | 0.50 | | | | | 1.50 | | || 7.0 | | vcvtdq2pd %ymm14, %zmm15 #17.14\n",
|
||||
" 82 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddpd %zmm15, %zmm1, %zmm16 #17.18\n",
|
||||
" 83 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm16, %zmm2, %zmm17 #17.25\n",
|
||||
" 84 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm0, %zmm17, %zmm17 #18.38\n",
|
||||
" 85 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm17, %zmm19 #18.38\n",
|
||||
" 86 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17 #18.38\n",
|
||||
" 87 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm19, %k2 #18.38\n",
|
||||
" 88 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm17, %zmm18 #18.38\n",
|
||||
" 89 | 1.00 | | | | | | | || | | knotw %k2, %k3 #18.38\n",
|
||||
" 90 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm19, %zmm17, %zmm19{%k3} #18.38\n",
|
||||
" 91 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm19, %zmm18, %zmm19{%k3} #18.38\n",
|
||||
" 92 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38\n",
|
||||
" 93 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm20, %ymm26 #17.9\n",
|
||||
" 94 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm20, %zmm21 #17.14\n",
|
||||
" 95 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm21, %zmm1, %zmm22 #17.18\n",
|
||||
" 96 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm22, %zmm2, %zmm23 #17.25\n",
|
||||
" 97 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm23, %zmm23 #18.38\n",
|
||||
" 98 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm23, %zmm25 #18.38\n",
|
||||
" 99 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23 #18.38\n",
|
||||
" 100 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm25, %k4 #18.38\n",
|
||||
" 101 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm23, %zmm23, %zmm24 #18.38\n",
|
||||
" 102 | 1.00 | | | | | | | || | | knotw %k4, %k5 #18.38\n",
|
||||
" 103 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm23, %zmm25{%k5} #18.38\n",
|
||||
" 104 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm24, %zmm25{%k5} #18.38\n",
|
||||
" 105 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm25, %zmm6 #18.38\n",
|
||||
" 106 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm26, %zmm27 #17.14\n",
|
||||
" 107 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm26, %ymm9 #17.9\n",
|
||||
" 108 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm27, %zmm1, %zmm28 #17.18\n",
|
||||
" 109 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm28, %zmm2, %zmm8 #17.25\n",
|
||||
" 110 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm8, %zmm8, %zmm29 #18.38\n",
|
||||
" 111 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm29, %zmm31 #18.38\n",
|
||||
" 112 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29 #18.38\n",
|
||||
" 113 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm31, %k6 #18.38\n",
|
||||
" 114 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm29, %zmm29, %zmm30 #18.38\n",
|
||||
" 115 | 1.00 | | | | | | | || | | knotw %k6, %k7 #18.38\n",
|
||||
" 116 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm29, %zmm31{%k7} #18.38\n",
|
||||
" 117 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm30, %zmm31{%k7} #18.38\n",
|
||||
" 118 | 0.00 | | | | | 1.00 | | || 0.0 | 4.0 | vfmadd231pd %zmm4, %zmm31, %zmm3 #18.38\n",
|
||||
" 119 | 0.00 | 0.34 | | | | 0.00 | 0.66 | || | | cmpl %edx, %ecx #16.5\n",
|
||||
" 120 | 0.00 | | | | | | 1.00 | || | | jb ..B1.4 # Prob 82% #16.5\n",
|
||||
"\n",
|
||||
" 30.0 4.34 2.00 2.00 2.00 2.00 30.0 2.66 44 8.0 \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Loop-Carried Dependencies Analysis Report\n",
|
||||
"-----------------------------------------\n",
|
||||
" 92 | 8.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38| [92, 118]\n",
|
||||
" 79 | 8.0 | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38| [79, 105]\n",
|
||||
" 66 | 4.0 | vpaddd %ymm5, %ymm9, %ymm14 #17.9| [66, 80, 93, 107]\n",
|
||||
" 65 | 1.0 | addl $32, %ecx #16.5| [65]\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Iterations: 100\n",
|
||||
"Instructions: 5600\n",
|
||||
"Total Cycles: 3585\n",
|
||||
"Total uOps: 7200\n",
|
||||
"\n",
|
||||
"Dispatch Width: 6\n",
|
||||
"uOps Per Cycle: 2.01\n",
|
||||
"IPC: 1.56\n",
|
||||
"Block RThroughput: 18.0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Instruction Info:\n",
|
||||
"[1]: #uOps\n",
|
||||
"[2]: Latency\n",
|
||||
"[3]: RThroughput\n",
|
||||
"[4]: MayLoad\n",
|
||||
"[5]: MayStore\n",
|
||||
"[6]: HasSideEffects (U)\n",
|
||||
"\n",
|
||||
"[1] [2] [3] [4] [5] [6] Instructions:\n",
|
||||
" 1 1 0.25 addl\t$32, %ecx\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
" 1 1 0.33 vmovaps\t%zmm0, %zmm29\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm11, %zmm13\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
" 1 1 1.00 knotw\t%k0, %k1\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm17, %zmm19\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
" 1 1 1.00 knotw\t%k2, %k3\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm23, %zmm25\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
" 1 1 1.00 knotw\t%k4, %k5\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm29, %zmm31\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
" 1 1 1.00 knotw\t%k6, %k7\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
" 1 1 0.25 cmpl\t%edx, %ecx\n",
|
||||
" 1 1 0.50 jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
"[0] - SKXDivider\n",
|
||||
"[1] - SKXFPDivider\n",
|
||||
"[2] - SKXPort0\n",
|
||||
"[3] - SKXPort1\n",
|
||||
"[4] - SKXPort2\n",
|
||||
"[5] - SKXPort3\n",
|
||||
"[6] - SKXPort4\n",
|
||||
"[7] - SKXPort5\n",
|
||||
"[8] - SKXPort6\n",
|
||||
"[9] - SKXPort7\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Resource pressure per iteration:\n",
|
||||
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] \n",
|
||||
" - - 31.17 5.72 2.00 2.00 - 29.10 2.01 - \n",
|
||||
"\n",
|
||||
"Resource pressure by instruction:\n",
|
||||
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:\n",
|
||||
" - - - 0.80 - - - 0.19 0.01 - addl\t$32, %ecx\n",
|
||||
" - - 0.07 0.92 - - - 0.01 - - vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
" - - 0.42 - - - - 0.58 - - vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
" - - 0.51 - - - - 0.49 - - vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
" - - 0.45 - - - - 0.55 - - vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
" - - - 1.00 - - - - - - vmovaps\t%zmm0, %zmm29\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm11, %zmm13\n",
|
||||
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
" - - 0.49 - - - - 0.51 - - vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k0, %k1\n",
|
||||
" - - 0.44 - - - - 0.56 - - vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
" - - 0.70 - - - - 0.30 - - vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
" - - 0.48 - - - - 0.52 - - vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
" - - 0.42 - - - - 0.58 - - vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
" - - 0.32 - - - - 0.68 - - vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm17, %zmm19\n",
|
||||
" - - 0.32 - 1.00 - - 0.68 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k2, %k3\n",
|
||||
" - - 0.53 - - - - 0.47 - - vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
" - - 0.57 - - - - 0.43 - - vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
" - - 0.52 - - - - 0.48 - - vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm23, %zmm25\n",
|
||||
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
" - - 0.53 - - - - 0.47 - - vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k4, %k5\n",
|
||||
" - - 0.42 - - - - 0.58 - - vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
" - - 0.60 - - - - 0.40 - - vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
" - - 0.26 - - - - 0.74 - - vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
" - - 0.34 - - - - 0.66 - - vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm29, %zmm31\n",
|
||||
" - - 0.34 - 1.00 - - 0.66 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
" - - 0.52 - - - - 0.48 - - vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k6, %k7\n",
|
||||
" - - 0.47 - - - - 0.53 - - vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
" - - 0.66 - - - - 0.34 - - vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
" - - - - - - - - 1.00 - cmpl\t%edx, %ecx\n",
|
||||
" - - - - - - - - 1.00 - jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Timeline view:\n",
|
||||
" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 \n",
|
||||
"Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678\n",
|
||||
"\n",
|
||||
"[0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[0,2] D=eeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[0,3] D========eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[0,4] D============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[0,5] .D===============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[0,6] .DeE------------------R . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[0,7] .D===================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[0,8] . D======================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[0,9] . D======================eeeeE-------R . . . . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[0,10] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[0,11] . D==========================eE----------R . . . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[0,12] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[0,13] . D====================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[0,14] . D========================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[0,15] . DeE-------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[0,16] . DeeeeeeeE-------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[0,17] . D=======eeeeE---------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[0,18] . D==========eeeeE-----------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[0,19] . D==============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[0,20] . D==================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[0,21] . D=====================eeeeeeeeeeeE----------R. . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[0,22] . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[0,23] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[0,24] . D==========================eE---------------R. . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[0,25] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[0,26] . .D===================================eeeeE--R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[0,27] . .D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[0,28] . .DeE------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[0,29] . .DeeeeeeeE------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[0,30] . .D=======eeeeE--------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[0,31] . . D==========eeeeE----------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[0,32] . . D==============eeeeE------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[0,33] . . D==================eeeeE--------------------R . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[0,34] . . D=====================eeeeeeeeeeeE---------R . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[0,35] . . D=====================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[0,36] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[0,37] . . D==========================eE--------------R . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[0,38] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[0,39] . . D===================================eeeeE-R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[0,40] . . D=======================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[0,41] . . DeeeeeeeE------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[0,42] . . DeE------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[0,43] . . D=======eeeeE--------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[0,44] . . D=============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[0,45] . . D=================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[0,46] . . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[0,47] . . .D=========================eeeeeeeeeeeE-----R. . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[0,48] . . .D=========================eeeeE------------R. . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[0,49] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[0,50] . . .D==============================eE----------R. . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[0,51] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[0,52] . . . D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[0,53] . . . D===========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[0,54] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[0,55] . . . D=eE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . jb\t..B1.4\n",
|
||||
"[1,0] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[1,1] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[1,2] . . . D==eeeeeeeE-------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[1,3] . . . D===============eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[1,4] . . . D====================eeeeE----------------------R . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[1,5] . . . D=========================eeeeE-----------------R . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[1,6] . . . DeE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[1,7] . . . D============================eeeeE-------------R . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[1,8] . . . D================================eeeeeeeeeeeE--R . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[1,9] . . . D================================eeeeE---------R . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[1,10] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[1,11] . . . D====================================eE---------R . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[1,12] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[1,13] . . . D==============================================eeeeER . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[1,14] . . . D==================================================eeeeER . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[1,15] . . . DeE-----------------------------------------------------R . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[1,16] . . . .D===eeeeeeeE-------------------------------------------R . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[1,17] . . . .D==============eeeeE-----------------------------------R . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[1,18] . . . .D==================eeeeE-------------------------------R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[1,19] . . . .D======================eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[1,20] . . . . D================================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[1,21] . . . . D====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[1,22] . . . . D=====================================eeeeE-----------R . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[1,23] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[1,24] . . . . D========================================eE----------R . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[1,25] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[1,26] . . . . D==================================================eeeeER. . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[1,27] . . . . D======================================================eeeeER . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[1,28] . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[1,29] . . . . D=================================eeeeeeeE-----------------R . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[1,30] . . . . D========================================eeeeE-------------R . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[1,31] . . . . D===========================================eeeeE---------R . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[1,32] . . . . .D==============================================eeeeE-----R . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[1,33] . . . . . D=================================================eeeeE-R . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[1,34] . . . . . D====================================================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[1,35] . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[1,36] . . . . . D=============================================================eeeeER . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[1,37] . . . . . D======================================================eE----------R . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[1,38] . . . . . .D============================================================eeeeER . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[1,39] . . . . . . D===============================================================eeeeER . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[1,40] . . . . . . D==================================================================eeeeER . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[1,41] . . . . . . D============================eeeeeeeE-----------------------------------R . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[1,42] . . . . . . DeE--------------------------------------------------------------------R . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[1,43] . . . . . . D==================================eeeeE-------------------------------R . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[1,44] . . . . . . D=====================================eeeeE---------------------------R . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[1,45] . . . . . . D===========================================eeeeE---------------------R . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[1,46] . . . . . . D===============================================eeeeE-----------------R . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[1,47] . . . . . . .D==================================================eeeeeeeeeeeE------R . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[1,48] . . . . . . . D=================================================eeeeE-------------R . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[1,49] . . . . . . . D===========================================================eeeeE--R . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[1,50] . . . . . . . D=====================================================eE----------R . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[1,51] . . . . . . . D==========================================================eeeeE-R . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[1,52] . . . . . . . D==============================================================eeeeER . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[1,53] . . . . . . . .D=================================================================eeeeER . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[1,54] . . . . . . . .DeE--------------------------------------------------------------------R . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[1,55] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . jb\t..B1.4\n",
|
||||
"[2,0] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[2,1] . . . . . . . . D=eE------------------------------------------------------------------R . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[2,2] . . . . . . . . D======================eeeeeeeE--------------------------------------R . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[2,3] . . . . . . . . D==============================eeeeE---------------------------------R . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[2,4] . . . . . . . . D===================================eeeeE----------------------------R . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[2,5] . . . . . . . . D========================================eeeeE-----------------------R . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[2,6] . . . . . . . . DeE-----------------------------------------------------------------R . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[2,7] . . . . . . . . D===========================================eeeeE-------------------R . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[2,8] . . . . . . . . D================================================eeeeeeeeeeeE-------R . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[2,9] . . . . . . . . D================================================eeeeE-------------R . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[2,10] . . . . . . . . D==========================================================eeeeE---R . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[2,11] . . . . . . . . .D======================================================eE---------R . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[2,12] . . . . . . . . .D=========================================================eeeeE---R . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[2,13] . . . . . . . . . D============================================================eeeeER . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[2,14] . . . . . . . . . D================================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[2,15] . . . . . . . . . DeE------------------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[2,16] . . . . . . . . . D==================eeeeeeeE-----------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[2,17] . . . . . . . . . D=========================eeeeE-------------------------------------R . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[2,18] . . . . . . . . . D=============================eeeeE--------------------------------R . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[2,19] . . . . . . . . . .D=================================eeeeE---------------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[2,20] . . . . . . . . . . D=====================================eeeeE----------------------R . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[2,21] . . . . . . . . . . D=========================================eeeeeeeeeeeE-----------R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[2,22] . . . . . . . . . . D=========================================eeeeE-----------------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[2,23] . . . . . . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[2,24] . . . . . . . . . . D===============================================eE-------------R . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[2,25] . . . . . . . . . . D=================================================eeeeE-------R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[2,26] . . . . . . . . . . . D===================================================eeeeE---R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[2,27] . . . . . . . . . . . D=======================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[2,28] . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[2,29] . . . . . . . . . . . D============eeeeeeeE--------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[2,30] . . . . . . . . . . . D====================eeeeE---------------------------------R . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[2,31] . . . . . . . . . . . D=========================eeeeE---------------------------R . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[2,32] . . . . . . . . . . . .D=============================eeeeE----------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[2,33] . . . . . . . . . . . . D==================================eeeeE----------------R . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[2,34] . . . . . . . . . . . . D=====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[2,35] . . . . . . . . . . . . D======================================eeeeE-----------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[2,36] . . . . . . . . . . . . D===============================================eeeeE-R . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[2,37] . . . . . . . . . . . . D========================================eE----------R . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[2,38] . . . . . . . . . . . . .D==============================================eeeeER . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[2,39] . . . . . . . . . . . . . D=================================================eeeeER . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[2,40] . . . . . . . . . . . . . D====================================================eeeeER . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[2,41] . . . . . . . . . . . . . D======eeeeeeeE------------------------------------------R . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[2,42] . . . . . . . . . . . . . DeE------------------------------------------------------R . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[2,43] . . . . . . . . . . . . . D===============eeeeE-----------------------------------R . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[2,44] . . . . . . . . . . . . . D========================eeeeE--------------------------R . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[2,45] . . . . . . . . . . . . . D============================eeeeE----------------------R . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[2,46] . . . . . . . . . . . . . .D======================================eeeeE-----------R . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[2,47] . . . . . . . . . . . . . . D=========================================eeeeeeeeeeeER . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[2,48] . . . . . . . . . . . . . . D=========================================eeeeE------R . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[2,49] . . . . . . . . . . . . . . D===================================================eeeeER . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[2,50] . . . . . . . . . . . . . . D============================================eE---------R . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[2,51] . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[2,52] . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[2,53] . . . . . . . . . . . . . . .D========================================================eeeeER . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[2,54] . . . . . . . . . . . . . . . DeE----------------------------------------------------------R . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[2,55] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . jb\t..B1.4\n",
|
||||
"[3,0] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[3,1] . . . . . . . . . . . . . . . DeE--------------------------------------------------------R . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[3,2] . . . . . . . . . . . . . . . D==eeeeeeeE------------------------------------------------R . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[3,3] . . . . . . . . . . . . . . . D=========eeeeE--------------------------------------------R . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[3,4] . . . . . . . . . . . . . . . D================eeeeE-------------------------------------R . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[3,5] . . . . . . . . . . . . . . . D===================eeeeE---------------------------------R . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[3,6] . . . . . . . . . . . . . . . DeE-------------------------------------------------------R . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[3,7] . . . . . . . . . . . . . . . D===================================eeeeE-----------------R . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[3,8] . . . . . . . . . . . . . . . .D======================================eeeeeeeeeeeE------R . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[3,9] . . . . . . . . . . . . . . . .D=======================================eeeeE------------R . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[3,10] . . . . . . . . . . . . . . . .D=================================================eeeeE--R . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[3,11] . . . . . . . . . . . . . . . . D===========================================eE----------R . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[3,12] . . . . . . . . . . . . . . . . D===============================================eeeeE--R . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[3,13] . . . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[3,14] . . . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[3,15] . . . . . . . . . . . . . . . . DeE--------------------------------------------------------R. . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[3,16] . . . . . . . . . . . . . . . . .D===============================eeeeeeeE------------------R. . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[3,17] . . . . . . . . . . . . . . . . .D=======================================eeeeE-------------R. . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[3,18] . . . . . . . . . . . . . . . . .D===========================================eeeeE---------R. . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[3,19] . . . . . . . . . . . . . . . . . D==============================================eeeeE-----R. . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[3,20] . . . . . . . . . . . . . . . . . D==================================================eeeeE-R. . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[3,21] . . . . . . . . . . . . . . . . . D=====================================================eeeeeeeeeeeER. . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[3,22] . . . . . . . . . . . . . . . . . D=====================================================eeeeE------R. . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[3,23] . . . . . . . . . . . . . . . . . D==============================================================eeeeER . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[3,24] . . . . . . . . . . . . . . . . . .D=======================================================eE---------R . . . . knotw\t%k2, %k3\n",
|
||||
"[3,25] . . . . . . . . . . . . . . . . . . D============================================================eeeeER . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[3,26] . . . . . . . . . . . . . . . . . . D================================================================eeeeER . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[3,27] . . . . . . . . . . . . . . . . . . D===================================================================eeeeER . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[3,28] . . . . . . . . . . . . . . . . . . DeE----------------------------------------------------------------------R . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[3,29] . . . . . . . . . . . . . . . . . . D===========================eeeeeeeE------------------------------------R . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[3,30] . . . . . . . . . . . . . . . . . . D==================================eeeeE--------------------------------R . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[3,31] . . . . . . . . . . . . . . . . . . D======================================eeeeE----------------------------R . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[3,32] . . . . . . . . . . . . . . . . . . D=========================================eeeeE------------------------R . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[3,33] . . . . . . . . . . . . . . . . . . D=============================================eeeeE--------------------R . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[3,34] . . . . . . . . . . . . . . . . . . .D================================================eeeeeeeeeeeE---------R . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[3,35] . . . . . . . . . . . . . . . . . . .D=================================================eeeeE---------------R . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[3,36] . . . . . . . . . . . . . . . . . . . D==========================================================eeeeE-----R . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[3,37] . . . . . . . . . . . . . . . . . . . D====================================================eE-------------R . . knotw\t%k4, %k5\n",
|
||||
"[3,38] . . . . . . . . . . . . . . . . . . . D========================================================eeeeE-----R . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[3,39] . . . . . . . . . . . . . . . . . . . D============================================================eeeeE-R . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[3,40] . . . . . . . . . . . . . . . . . . . D===============================================================eeeeER. . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[3,41] . . . . . . . . . . . . . . . . . . . D======================eeeeeeeE--------------------------------------R. . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[3,42] . . . . . . . . . . . . . . . . . . . .DeE-----------------------------------------------------------------R. . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[3,43] . . . . . . . . . . . . . . . . . . . .D============================eeeeE----------------------------------R. . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[3,44] . . . . . . . . . . . . . . . . . . . . D===============================eeeeE------------------------------R. . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[3,45] . . . . . . . . . . . . . . . . . . . . D=====================================eeeeE------------------------R. . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[3,46] . . . . . . . . . . . . . . . . . . . . D=========================================eeeeE--------------------R. . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[3,47] . . . . . . . . . . . . . . . . . . . . D============================================eeeeeeeeeeeE---------R. . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[3,48] . . . . . . . . . . . . . . . . . . . . D===========================================eeeeE----------------R. . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[3,49] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE-----R. . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[3,50] . . . . . . . . . . . . . . . . . . . . D==============================================eE---------------R. . knotw\t%k6, %k7\n",
|
||||
"[3,51] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE----R. . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[3,52] . . . . . . . . . . . . . . . . . . . . .D=========================================================eeeeER. . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[3,53] . . . . . . . . . . . . . . . . . . . . . D============================================================eeeeER vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[3,54] . . . . . . . . . . . . . . . . . . . . . DeE--------------------------------------------------------------R cmpl\t%edx, %ecx\n",
|
||||
"[3,55] . . . . . . . . . . . . . . . . . . . . . DeE-------------------------------------------------------------R jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Average Wait times (based on the timeline view):\n",
|
||||
"[0]: Executions\n",
|
||||
"[1]: Average time spent waiting in a scheduler's queue\n",
|
||||
"[2]: Average time spent waiting in a scheduler's queue while ready\n",
|
||||
"[3]: Average time elapsed from WB until retire stage\n",
|
||||
"\n",
|
||||
" [0] [1] [2] [3]\n",
|
||||
"0. 4 1.0 1.0 42.5 addl\t$32, %ecx\n",
|
||||
"1. 4 1.3 1.3 42.0 vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"2. 4 7.8 7.8 30.8 vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"3. 4 16.5 1.8 26.0 vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"4. 4 21.8 1.3 21.8 vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"5. 4 25.8 0.5 18.3 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"6. 4 1.0 1.0 45.8 vmovaps\t%zmm0, %zmm29\n",
|
||||
"7. 4 32.3 3.0 12.3 vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"8. 4 36.0 0.3 3.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"9. 4 36.3 0.8 10.3 vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"10. 4 46.5 0.0 1.3 vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"11. 4 40.8 1.3 9.5 knotw\t%k0, %k1\n",
|
||||
"12. 4 45.8 0.0 1.3 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"13. 4 49.0 0.0 0.0 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"14. 4 52.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"15. 4 1.0 1.0 54.5 vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"16. 4 14.0 14.0 34.8 vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"17. 4 22.3 1.3 29.5 vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"18. 4 26.0 0.3 25.3 vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"19. 4 29.8 0.3 21.0 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"20. 4 35.3 2.0 15.0 vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"21. 4 38.8 0.0 6.5 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"22. 4 39.3 1.0 12.5 vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"23. 4 48.8 0.0 3.5 vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"24. 4 43.0 0.8 11.8 knotw\t%k2, %k3\n",
|
||||
"25. 4 47.8 0.0 3.5 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"26. 4 51.0 0.0 1.3 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"27. 4 54.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"28. 4 1.0 1.0 56.5 vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"29. 4 19.0 19.0 31.8 vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"30. 4 26.3 0.3 27.5 vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"31. 4 30.0 0.5 23.0 vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"32. 4 33.5 0.3 18.8 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"33. 4 37.5 0.5 14.3 vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"34. 4 40.5 0.0 5.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"35. 4 40.8 0.5 12.3 vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"36. 4 50.5 0.0 2.8 vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"37. 4 44.0 0.5 11.8 knotw\t%k4, %k5\n",
|
||||
"38. 4 49.5 0.3 2.5 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"39. 4 52.8 0.0 0.5 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"40. 4 56.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"41. 4 15.0 15.0 37.8 vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"42. 4 1.0 1.0 57.3 vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"43. 4 22.0 0.8 33.0 vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"44. 4 27.3 2.0 27.0 vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"45. 4 32.3 1.0 22.0 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"46. 4 38.0 2.0 16.0 vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"47. 4 41.0 0.0 5.0 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"48. 4 40.5 0.3 11.8 vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"49. 4 51.0 0.0 2.0 vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"50. 4 44.3 0.8 11.0 knotw\t%k6, %k7\n",
|
||||
"51. 4 50.5 0.5 1.5 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"52. 4 53.8 0.0 0.0 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"53. 4 57.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"54. 4 1.0 1.0 58.5 cmpl\t%edx, %ecx\n",
|
||||
"55. 4 1.3 0.0 57.5 jb\t..B1.4\n",
|
||||
" 4 32.5 1.6 18.4 <total>\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-30;16:57:45\n",
|
||||
"Analyzed File - build/SKX/icc/O3/pi.marked.o\n",
|
||||
"Binary Format - 64Bit\n",
|
||||
"Architecture - SKX\n",
|
||||
"Analysis Type - Throughput\n",
|
||||
"\n",
|
||||
"Throughput Analysis Report\n",
|
||||
"--------------------------\n",
|
||||
"Block Throughput: 31.50 Cycles Throughput Bottleneck: Backend\n",
|
||||
"Loop Count: 103\n",
|
||||
"Port Binding In Cycles Per Iteration:\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"| Cycles | 30.0 0.0 | 4.0 | 2.0 2.0 | 2.0 2.0 | 0.0 | 30.0 | 1.0 | 0.0 |\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"\n",
|
||||
"DV - Divider pipe (on port 0)\n",
|
||||
"D - Data fetch pipe (on ports 2 and 3)\n",
|
||||
"F - Macro Fusion with the previous instruction occurred\n",
|
||||
"* - instruction micro-ops not bound to a port\n",
|
||||
"^ - Micro Fusion occurred\n",
|
||||
"# - ESP Tracking sync uop was issued\n",
|
||||
"@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected\n",
|
||||
"X - instruction not supported, was not accounted in Analysis\n",
|
||||
"\n",
|
||||
"| Num Of | Ports pressure in cycles | |\n",
|
||||
"| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
|
||||
"-----------------------------------------------------------------------------------------\n",
|
||||
"| 1 | | | | | | | 1.0 | | add ecx, 0x20\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm14, ymm9, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm8, ymm9\n",
|
||||
"| 1 | | | | | | 1.0 | | | vaddpd zmm10, zmm1, zmm8\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm11, zmm2, zmm10\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm11, zmm11, zmm0\n",
|
||||
"| 1* | | | | | | | | | vmovaps zmm29, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm13, zmm11\n",
|
||||
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm11, zmm13, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k0, zmm13, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm11, zmm11\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k1, k0\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm13{k1}, zmm11, zmm13\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm13{k1}, zmm12, zmm13\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm13, zmm4\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm20, ymm14, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm15, ymm14\n",
|
||||
"| 1 | 1.0 | | | | | | | | vaddpd zmm16, zmm1, zmm15\n",
|
||||
"| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm2, zmm16\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm17, zmm17, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm19, zmm17\n",
|
||||
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm17, zmm19, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k2, zmm19, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm17, zmm17\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k3, k2\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm17, zmm19\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm18, zmm19\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm4\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm26, ymm20, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm21, ymm20\n",
|
||||
"| 1 | | | | | | 1.0 | | | vaddpd zmm22, zmm1, zmm21\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm23, zmm2, zmm22\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm23, zmm23, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm25, zmm23\n",
|
||||
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm23, zmm25, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k4, zmm25, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm24, zmm23, zmm23\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k5, k4\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm25{k5}, zmm23, zmm25\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm25{k5}, zmm24, zmm25\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm25, zmm4\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm27, ymm26\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm9, ymm26, ymm5\n",
|
||||
"| 1 | 1.0 | | | | | | | | vaddpd zmm28, zmm1, zmm27\n",
|
||||
"| 1 | | | | | | 1.0 | | | vmulpd zmm8, zmm2, zmm28\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm29, zmm8, zmm8\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm31, zmm29\n",
|
||||
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm29, zmm31, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k6, zmm31, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm30, zmm29, zmm29\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k7, k6\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm29, zmm31\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm30, zmm31\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm31, zmm4\n",
|
||||
"| 1* | | | | | | | | | cmp ecx, edx\n",
|
||||
"| 0*F | | | | | | | | | jb 0xfffffffffffffeb3\n",
|
||||
"Total Num Of Uops: 71\n",
|
||||
"Analysis Notes:\n",
|
||||
"Backend allocation was stalled due to unavailable allocation resources.\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -307,7 +973,8 @@
|
||||
" for l in r['analyzed kernel']\n",
|
||||
" if l['instruction']]))\n",
|
||||
"for a in archs:\n",
|
||||
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')"
|
||||
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')\n",
|
||||
"get_info((\"SKX\", \"icc\", \"O3\", \"pi\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -343,7 +1010,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"hideCode": false,
|
||||
"hidePrompt": false,
|
||||
|
||||
@@ -1,33 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
from subprocess import check_call, check_output, CalledProcessError, STDOUT
|
||||
from itertools import chain
|
||||
import shutil
|
||||
from functools import lru_cache
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
import socket
|
||||
import pickle
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from glob import glob
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from subprocess import STDOUT, CalledProcessError, check_call, check_output
|
||||
|
||||
import requests
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from osaca.osaca import reduce_to_section
|
||||
|
||||
from kerncraft.models import benchmark
|
||||
from kerncraft.incore_model import (
|
||||
parse_asm,
|
||||
asm_instrumentation,
|
||||
iaca_analyse_instrumented_binary,
|
||||
llvm_mca_analyse_instrumented_assembly,
|
||||
osaca_analyse_instrumented_assembly,
|
||||
llvm_mca_analyse_instrumented_assembly
|
||||
parse_asm,
|
||||
)
|
||||
|
||||
from kerncraft.models import benchmark
|
||||
from osaca.osaca import reduce_to_section
|
||||
|
||||
# Scaling of inner dimension for 1D, 2D and 3D kernels
|
||||
# * consider kernels to be compiled with multiple compilers and different options
|
||||
@@ -39,37 +32,50 @@ from kerncraft.incore_model import (
|
||||
# Collect inner loop body assembly for each kernel/compiler/options combination
|
||||
# * analyze with OSACA, IACA and LLVM-MCA
|
||||
|
||||
hosts_arch_map = {r"skylakesp2": "SKX",
|
||||
r"ivyep1": "IVB",
|
||||
r"naples1": "ZEN",
|
||||
r"rome1": "ZEN2",
|
||||
r"warmup": "TX2",
|
||||
r"qp4-node-[0-9]+": "A64FX"}
|
||||
hosts_arch_map = {
|
||||
r"skylakesp2": "SKX",
|
||||
r"ivyep1": "IVB",
|
||||
r"naples1": "ZEN",
|
||||
r"rome1": "ZEN2",
|
||||
r"warmup": "TX2",
|
||||
r"qp4-node-[0-9]+": "A64FX",
|
||||
}
|
||||
|
||||
arch_info = {
|
||||
'SKX': {
|
||||
'prepare': ['likwid-setFrequencies -f 2.4 -t 0'.split()],
|
||||
'IACA': 'SKX',
|
||||
'OSACA': 'SKX',
|
||||
'LLVM-MCA': '-mcpu=skylake-avx512',
|
||||
'Ithemal': 'skl',
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"SKX": {
|
||||
"prepare": ["likwid-setFrequencies -f 2.4 -t 0".split()],
|
||||
"IACA": "SKX",
|
||||
"OSACA": "SKX",
|
||||
"LLVM-MCA": "-mcpu=skylake-avx512",
|
||||
"Ithemal": "skl",
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
'icc': {
|
||||
"Ofast": "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O3": "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"icc": {
|
||||
"Ofast": (
|
||||
"-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": (
|
||||
"-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O2": (
|
||||
"-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O1": (
|
||||
"-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
},
|
||||
'clang': {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(),
|
||||
"O3": "-O3 -march=skylake-avx512 -ffreestanding".split(),
|
||||
"O2": "-O2 -march=skylake-avx512 -ffreestanding".split(),
|
||||
"O1": "-O1 -march=skylake-avx512 -ffreestanding".split(),
|
||||
|
||||
},
|
||||
'gcc': {
|
||||
"gcc": {
|
||||
"Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||
"O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||
"O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||
@@ -77,17 +83,19 @@ arch_info = {
|
||||
},
|
||||
},
|
||||
},
|
||||
'IVB': {
|
||||
'prepare': ['likwid-setFrequencies -f 3.0 -t 0'.split()],
|
||||
'IACA': 'IVB',
|
||||
'OSACA': 'IVB',
|
||||
'LLVM-MCA': '-mcpu=ivybridge',
|
||||
'Ithemal': 'ivb',
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"IVB": {
|
||||
"prepare": ["likwid-setFrequencies -f 3.0 -t 0".split()],
|
||||
"IACA": "IVB",
|
||||
"OSACA": "IVB",
|
||||
"LLVM-MCA": "-mcpu=ivybridge",
|
||||
"Ithemal": "ivb",
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"icc": {
|
||||
"Ofast": "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"Ofast": (
|
||||
"-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
@@ -106,14 +114,14 @@ arch_info = {
|
||||
},
|
||||
},
|
||||
},
|
||||
'ZEN': {
|
||||
'prepare': ['likwid-setFrequencies -f 2.3 -t 0'.split()],
|
||||
'IACA': None,
|
||||
'OSACA': 'ZEN1',
|
||||
'LLVM-MCA': '-mcpu=znver1',
|
||||
'Ithemal': None,
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"ZEN": {
|
||||
"prepare": ["likwid-setFrequencies -f 2.3 -t 0".split()],
|
||||
"IACA": None,
|
||||
"OSACA": "ZEN1",
|
||||
"LLVM-MCA": "-mcpu=znver1",
|
||||
"Ithemal": None,
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -march=znver1 -ffreestanding".split(),
|
||||
@@ -128,21 +136,23 @@ arch_info = {
|
||||
"O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(),
|
||||
},
|
||||
"icc": {
|
||||
"Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"Ofast": (
|
||||
"-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
},
|
||||
},
|
||||
},
|
||||
'ZEN2': {
|
||||
'prepare': ['likwid-setFrequencies -f 2.35 -t 0'.split()],
|
||||
'IACA': None,
|
||||
'OSACA': 'ZEN2',
|
||||
'LLVM-MCA': '-mcpu=znver2',
|
||||
'Ithemal': None,
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"ZEN2": {
|
||||
"prepare": ["likwid-setFrequencies -f 2.35 -t 0".split()],
|
||||
"IACA": None,
|
||||
"OSACA": "ZEN2",
|
||||
"LLVM-MCA": "-mcpu=znver2",
|
||||
"Ithemal": None,
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -march=znver2 -ffreestanding".split(),
|
||||
@@ -157,22 +167,24 @@ arch_info = {
|
||||
"O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(),
|
||||
},
|
||||
"icc": {
|
||||
"Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"Ofast": (
|
||||
"-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
},
|
||||
},
|
||||
},
|
||||
'TX2': {
|
||||
'Clock [MHz]': 2200, # reading out via perf. counters is not supported
|
||||
'IACA': None,
|
||||
'OSACA': 'TX2',
|
||||
'assign_optimal_throughput': True,
|
||||
'LLVM-MCA': '-mcpu=thunderx2t99 -march=aarch64',
|
||||
'Ithemal': None,
|
||||
'isa': 'aarch64',
|
||||
'perfevents': [],
|
||||
"TX2": {
|
||||
"Clock [MHz]": 2200, # reading out via perf. counters is not supported
|
||||
"IACA": None,
|
||||
"OSACA": "TX2",
|
||||
"assign_optimal_throughput": True,
|
||||
"LLVM-MCA": "-mcpu=thunderx2t99 -march=aarch64",
|
||||
"Ithemal": None,
|
||||
"isa": "aarch64",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||
@@ -188,16 +200,16 @@ arch_info = {
|
||||
},
|
||||
},
|
||||
},
|
||||
'A64FX': {
|
||||
'Clock [MHz]': 1800, # reading out via perf. counters is not supported
|
||||
'L2_volume_metric': 'L1<->L2 data volume [GBytes]',
|
||||
'IACA': None,
|
||||
'OSACA': 'A64FX',
|
||||
'assign_optimal_throughput': False,
|
||||
'LLVM-MCA': '-mcpu=a64fx -march=aarch64',
|
||||
'Ithemal': None,
|
||||
'isa': 'aarch64',
|
||||
'perfevents': [],
|
||||
"A64FX": {
|
||||
"Clock [MHz]": 1800, # reading out via perf. counters is not supported
|
||||
"L2_volume_metric": "L1<->L2 data volume [GBytes]",
|
||||
"IACA": None,
|
||||
"OSACA": "A64FX",
|
||||
"assign_optimal_throughput": False,
|
||||
"LLVM-MCA": "-mcpu=a64fx -march=aarch64",
|
||||
"Ithemal": None,
|
||||
"isa": "aarch64",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"gcc": {
|
||||
"Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(),
|
||||
@@ -211,7 +223,7 @@ arch_info = {
|
||||
"O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||
"O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -231,12 +243,13 @@ def get_kernels(kernels=None):
|
||||
if kernels is None:
|
||||
kernels = []
|
||||
for f in glob("kernels/*.c"):
|
||||
f = f.rsplit('.', 1)[0].split('/', 1)[1]
|
||||
f = f.rsplit(".", 1)[0].split("/", 1)[1]
|
||||
if f == "dummy":
|
||||
continue
|
||||
kernels.append(f)
|
||||
return kernels
|
||||
|
||||
|
||||
# Columns:
|
||||
# arch
|
||||
# kernel
|
||||
@@ -259,6 +272,7 @@ def get_kernels(kernels=None):
|
||||
# allruns [list (length, repetitions, cy/it, L2 B/it)]
|
||||
# perfevents [dict event: counter/it]
|
||||
|
||||
|
||||
def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True):
|
||||
arch = get_current_arch()
|
||||
if arch is None:
|
||||
@@ -268,90 +282,132 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
||||
islocal = True
|
||||
arches = [arch]
|
||||
ainfo = arch_info.get(arch)
|
||||
if 'prepare' in ainfo:
|
||||
for cmd in ainfo['prepare']:
|
||||
if "prepare" in ainfo:
|
||||
for cmd in ainfo["prepare"]:
|
||||
check_call(cmd)
|
||||
for arch in arches:
|
||||
ainfo = arch_info.get(arch)
|
||||
print(arch)
|
||||
data_path = Path(f"build/{arch}/data.pkl")
|
||||
if data_path.exists():
|
||||
with data_path.open('rb') as f:
|
||||
with data_path.open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
else:
|
||||
data = []
|
||||
data_lastsaved = deepcopy(data)
|
||||
for compiler, compiler_cflags in ainfo['cflags'].items():
|
||||
for compiler, compiler_cflags in ainfo["cflags"].items():
|
||||
if not shutil.which(compiler) and islocal:
|
||||
print(compiler, "not found in path! Skipping...")
|
||||
continue
|
||||
for cflags_name, cflags in compiler_cflags.items():
|
||||
for kernel in get_kernels():
|
||||
print(f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
|
||||
end=": ", flush=True)
|
||||
row = list([r for r in data
|
||||
if r['arch'] == arch and r['kernel'] == kernel and
|
||||
r['compiler'] == compiler and r['cflags_name'] == cflags_name])
|
||||
print(
|
||||
f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
|
||||
end=": ",
|
||||
flush=True,
|
||||
)
|
||||
row = list(
|
||||
[
|
||||
r
|
||||
for r in data
|
||||
if r["arch"] == arch
|
||||
and r["kernel"] == kernel
|
||||
and r["compiler"] == compiler
|
||||
and r["cflags_name"] == cflags_name
|
||||
]
|
||||
)
|
||||
if row:
|
||||
row = row[0]
|
||||
else:
|
||||
orig_row = None
|
||||
row = {
|
||||
'arch': arch,
|
||||
'kernel': kernel,
|
||||
'compiler': compiler,
|
||||
'cflags_name': cflags_name,
|
||||
'element_size': 8,
|
||||
"arch": arch,
|
||||
"kernel": kernel,
|
||||
"compiler": compiler,
|
||||
"cflags_name": cflags_name,
|
||||
"element_size": 8,
|
||||
}
|
||||
data.append(row)
|
||||
|
||||
# Build
|
||||
print("build", end="", flush=True)
|
||||
asm_path, exec_path, overwrite = build_kernel(
|
||||
kernel, arch, compiler, cflags, cflags_name, dontbuild=not islocal)
|
||||
kernel,
|
||||
arch,
|
||||
compiler,
|
||||
cflags,
|
||||
cflags_name,
|
||||
dontbuild=not islocal,
|
||||
)
|
||||
|
||||
if overwrite:
|
||||
# clear all measurment information
|
||||
row['best_length'] = None
|
||||
row['best_runtime'] = None
|
||||
row['L2_traffic'] = None
|
||||
row['allruns'] = None
|
||||
row['perfevents'] = None
|
||||
row["best_length"] = None
|
||||
row["best_runtime"] = None
|
||||
row["L2_traffic"] = None
|
||||
row["allruns"] = None
|
||||
row["perfevents"] = None
|
||||
|
||||
# Mark for IACA, OSACA and LLVM-MCA
|
||||
print("mark", end="", flush=True)
|
||||
try:
|
||||
marked_asmfile, marked_objfile, row['pointer_increment'], overwrite = mark(
|
||||
asm_path, compiler, cflags, isa=ainfo['isa'], overwrite=overwrite)
|
||||
row['marking_error'] = None
|
||||
(
|
||||
marked_asmfile,
|
||||
marked_objfile,
|
||||
row["pointer_increment"],
|
||||
overwrite,
|
||||
) = mark(
|
||||
asm_path,
|
||||
compiler,
|
||||
cflags,
|
||||
isa=ainfo["isa"],
|
||||
overwrite=overwrite,
|
||||
)
|
||||
row["marking_error"] = None
|
||||
except ValueError as e:
|
||||
row['marking_error'] = str(e)
|
||||
row["marking_error"] = str(e)
|
||||
print(":", e)
|
||||
continue
|
||||
|
||||
if overwrite:
|
||||
# clear all model generated information
|
||||
for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
|
||||
for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
|
||||
row[model+'_'+k] = None
|
||||
|
||||
for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
|
||||
for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
|
||||
if model+'_'+k not in row:
|
||||
row[model+'_'+k] = None
|
||||
for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
|
||||
for k in [
|
||||
"ports",
|
||||
"prediction",
|
||||
"throughput",
|
||||
"cp",
|
||||
"lcd",
|
||||
"raw",
|
||||
]:
|
||||
row[model + "_" + k] = None
|
||||
|
||||
for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
|
||||
for k in [
|
||||
"ports",
|
||||
"prediction",
|
||||
"throughput",
|
||||
"cp",
|
||||
"lcd",
|
||||
"raw",
|
||||
]:
|
||||
if model + "_" + k not in row:
|
||||
row[model + "_" + k] = None
|
||||
|
||||
# Analyze with IACA, if requested and configured
|
||||
if iaca and ainfo['IACA'] is not None:
|
||||
if iaca and ainfo["IACA"] is not None:
|
||||
print("IACA", end="", flush=True)
|
||||
if not row.get('IACA_ports'):
|
||||
row['IACA_raw'] = iaca_analyse_instrumented_binary(
|
||||
marked_objfile, micro_architecture=ainfo['IACA'])
|
||||
row['IACA_ports'] = \
|
||||
{k: v/(row['pointer_increment']/row['element_size'])
|
||||
for k,v in row['IACA_raw']['port cycles'].items()}
|
||||
row['IACA_prediction'] = row['IACA_raw']['throughput']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['IACA_throughput'] = max(row['IACA_ports'].values())
|
||||
if not row.get("IACA_ports"):
|
||||
row["IACA_raw"] = iaca_analyse_instrumented_binary(
|
||||
marked_objfile, micro_architecture=ainfo["IACA"]
|
||||
)
|
||||
row["IACA_ports"] = {
|
||||
k: v / (row["pointer_increment"] / row["element_size"])
|
||||
for k, v in row["IACA_raw"]["port cycles"].items()
|
||||
}
|
||||
row["IACA_prediction"] = row["IACA_raw"]["throughput"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["IACA_throughput"] = max(row["IACA_ports"].values())
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
@@ -359,56 +415,70 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
||||
# Analyze with OSACA, if requested
|
||||
if osaca:
|
||||
print("OSACA", end="", flush=True)
|
||||
if not row.get('OSACA_ports'):
|
||||
row['OSACA_raw'] = osaca_analyse_instrumented_assembly(
|
||||
marked_asmfile, micro_architecture=ainfo['OSACA'],
|
||||
assign_optimal_throughput=ainfo.get('assign_optimal_throughput',
|
||||
True))
|
||||
row['OSACA_ports'] = \
|
||||
{k: v/(row['pointer_increment']/row['element_size'])
|
||||
for k,v in row['OSACA_raw']['port cycles'].items()}
|
||||
row['OSACA_prediction'] = row['OSACA_raw']['throughput']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['OSACA_throughput'] = max(row['OSACA_ports'].values())
|
||||
row['OSACA_cp'] = row['OSACA_raw']['cp_latency']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['OSACA_lcd'] = row['OSACA_raw']['lcd']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
if not row.get("OSACA_ports"):
|
||||
row["OSACA_raw"] = osaca_analyse_instrumented_assembly(
|
||||
marked_asmfile,
|
||||
micro_architecture=ainfo["OSACA"],
|
||||
assign_optimal_throughput=ainfo.get(
|
||||
"assign_optimal_throughput", True
|
||||
),
|
||||
)
|
||||
row["OSACA_ports"] = {
|
||||
k: v / (row["pointer_increment"] / row["element_size"])
|
||||
for k, v in row["OSACA_raw"]["port cycles"].items()
|
||||
}
|
||||
row["OSACA_prediction"] = row["OSACA_raw"]["throughput"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["OSACA_throughput"] = max(row["OSACA_ports"].values())
|
||||
row["OSACA_cp"] = row["OSACA_raw"]["cp_latency"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["OSACA_lcd"] = row["OSACA_raw"]["lcd"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
# Analyze with LLVM-MCA, if requested and configured
|
||||
if llvm_mca and ainfo['LLVM-MCA'] is not None:
|
||||
if llvm_mca and ainfo["LLVM-MCA"] is not None:
|
||||
print("LLVM-MCA", end="", flush=True)
|
||||
if not row.get('LLVM-MCA_ports'):
|
||||
row['LLVM-MCA_raw'] = llvm_mca_analyse_instrumented_assembly(
|
||||
if not row.get("LLVM-MCA_ports"):
|
||||
row["LLVM-MCA_raw"] = llvm_mca_analyse_instrumented_assembly(
|
||||
marked_asmfile,
|
||||
micro_architecture=ainfo['LLVM-MCA'],
|
||||
isa=ainfo['isa'])
|
||||
row['LLVM-MCA_ports'] = \
|
||||
{k: v/(row['pointer_increment']/row['element_size'])
|
||||
for k,v in row['LLVM-MCA_raw']['port cycles'].items()}
|
||||
row['LLVM-MCA_prediction'] =row['LLVM-MCA_raw']['throughput']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['LLVM-MCA_throughput'] = max(row['LLVM-MCA_ports'].values())
|
||||
row['LLVM-MCA_cp'] = row['LLVM-MCA_raw']['cp_latency']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['LLVM-MCA_lcd'] = row['LLVM-MCA_raw']['lcd']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
micro_architecture=ainfo["LLVM-MCA"],
|
||||
isa=ainfo["isa"],
|
||||
)
|
||||
row["LLVM-MCA_ports"] = {
|
||||
k: v / (row["pointer_increment"] / row["element_size"])
|
||||
for k, v in row["LLVM-MCA_raw"]["port cycles"].items()
|
||||
}
|
||||
row["LLVM-MCA_prediction"] = row["LLVM-MCA_raw"]["throughput"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["LLVM-MCA_throughput"] = max(row["LLVM-MCA_ports"].values())
|
||||
row["LLVM-MCA_cp"] = row["LLVM-MCA_raw"]["cp_latency"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["LLVM-MCA_lcd"] = row["LLVM-MCA_raw"]["lcd"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
|
||||
# Analyze with Ithemal, if not running local and configured
|
||||
if ainfo['Ithemal'] is not None and not islocal:
|
||||
if ainfo["Ithemal"] is not None and not islocal:
|
||||
print("Ithemal", end="", flush=True)
|
||||
if not row.get('Ithemal_prediction'):
|
||||
if not row.get("Ithemal_prediction"):
|
||||
with open(marked_asmfile) as f:
|
||||
parsed_code = parse_asm(f.read(), ainfo['isa'])
|
||||
kernel = reduce_to_section(parsed_code, ainfo['isa'])
|
||||
row['Ithemal_prediction'] = get_ithemal_prediction(
|
||||
get_intel_style_code(marked_objfile), model=ainfo['Ithemal'])
|
||||
parsed_code = parse_asm(f.read(), ainfo["isa"])
|
||||
kernel = reduce_to_section(parsed_code, ainfo["isa"])
|
||||
row["Ithemal_prediction"] = get_ithemal_prediction(
|
||||
get_intel_style_code(marked_objfile),
|
||||
model=ainfo["Ithemal"],
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
@@ -416,43 +486,45 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
||||
if measurements and islocal:
|
||||
# run measurements if on same hardware
|
||||
print("scale", end="", flush=True)
|
||||
if not row.get('allruns'):
|
||||
if not row.get("allruns"):
|
||||
# find best length with concurrent L2 measurement
|
||||
scaling_runs, best = scalingrun(exec_path)
|
||||
row['best_length'] = best[0]
|
||||
row['best_runtime'] = best[2]
|
||||
row['L2_traffic'] = best[3]
|
||||
row['allruns'] = scaling_runs
|
||||
row["best_length"] = best[0]
|
||||
row["best_runtime"] = best[2]
|
||||
row["L2_traffic"] = best[3]
|
||||
row["allruns"] = scaling_runs
|
||||
print(f"({best[0]}). ", end="", flush=True)
|
||||
else:
|
||||
print(f"({row.get('best_length', None)})! ", end="", flush=True)
|
||||
print(
|
||||
f"({row.get('best_length', None)})! ",
|
||||
end="",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
# dump to file
|
||||
if data != data_lastsaved:
|
||||
print('saving... ', end="", flush=True)
|
||||
with data_path.open('wb') as f:
|
||||
print("saving... ", end="", flush=True)
|
||||
with data_path.open("wb") as f:
|
||||
try:
|
||||
pickle.dump(data, f)
|
||||
data_lastsaved = deepcopy(data)
|
||||
print('saved!')
|
||||
print("saved!")
|
||||
except KeyboardInterrupt:
|
||||
f.seek(0)
|
||||
pickle.dump(data, f)
|
||||
print('saved!')
|
||||
print("saved!")
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1)):
|
||||
#print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
|
||||
parameters = chain(*[[total_iterations//i, i] for i in lengths])
|
||||
def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1 * 1024 + 1)):
|
||||
# print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
|
||||
parameters = chain(*[[total_iterations // i, i] for i in lengths])
|
||||
# TODO use arch specific events and grooup
|
||||
r, o = perfctr(chain([kernel_exec], map(str, parameters)),
|
||||
1, group="L2")
|
||||
r, o = perfctr(chain([kernel_exec], map(str, parameters)), 1, group="L2")
|
||||
global_infos = {}
|
||||
for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", l) for l in o]:
|
||||
for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", line) for line in o]:
|
||||
if m is not None:
|
||||
try:
|
||||
v = int(m.group(4))
|
||||
@@ -464,37 +536,45 @@ def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1
|
||||
r[m.group(2)][m.group(3)] = v
|
||||
|
||||
results = []
|
||||
best = (float('inf'), None)
|
||||
best = (float("inf"), None)
|
||||
for markername, mmetrics in r.items():
|
||||
kernelname, repetitions, *_, xlength = markername.split('_')
|
||||
kernelname, repetitions, *_, xlength = markername.split("_")
|
||||
repetitions = int(repetitions)
|
||||
xlength = int(xlength)
|
||||
total_iterations = mmetrics['repetitions'] * mmetrics['iterations']
|
||||
if 'Clock [MHz]' in mmetrics:
|
||||
clock_hz = mmetrics['Clock [MHz]']*1e6
|
||||
total_iterations = mmetrics["repetitions"] * mmetrics["iterations"]
|
||||
if "Clock [MHz]" in mmetrics:
|
||||
clock_hz = mmetrics["Clock [MHz]"] * 1e6
|
||||
else:
|
||||
clock_hz = arch_info[get_current_arch()]['Clock [MHz]']*1e6
|
||||
cyperit = mmetrics['Runtime (RDTSC) [s]'] * clock_hz / total_iterations
|
||||
clock_hz = arch_info[get_current_arch()]["Clock [MHz]"] * 1e6
|
||||
cyperit = mmetrics["Runtime (RDTSC) [s]"] * clock_hz / total_iterations
|
||||
# TODO use arch specific events and grooup
|
||||
if 'L2D load data volume [GBytes]' in mmetrics:
|
||||
l2perit = (mmetrics['L2D load data volume [GBytes]'] +
|
||||
mmetrics.get('L2D evict data volume [GBytes]', 0))*1e9 / total_iterations
|
||||
if "L2D load data volume [GBytes]" in mmetrics:
|
||||
l2perit = (
|
||||
(
|
||||
mmetrics["L2D load data volume [GBytes]"]
|
||||
+ mmetrics.get("L2D evict data volume [GBytes]", 0)
|
||||
)
|
||||
* 1e9
|
||||
/ total_iterations
|
||||
)
|
||||
else:
|
||||
l2perit = \
|
||||
mmetrics[arch_info[get_current_arch()]['L2_volume_metric']]*1e9 / total_iterations
|
||||
results.append(
|
||||
(xlength, repetitions, cyperit, l2perit)
|
||||
)
|
||||
l2perit = (
|
||||
mmetrics[arch_info[get_current_arch()]["L2_volume_metric"]]
|
||||
* 1e9
|
||||
/ total_iterations
|
||||
)
|
||||
results.append((xlength, repetitions, cyperit, l2perit))
|
||||
if cyperit < best[0]:
|
||||
best = cyperit, results[-1]
|
||||
return results, best[1]
|
||||
|
||||
|
||||
def mark(asm_path, compiler, cflags, isa, overwrite=False):
|
||||
# Mark assembly for IACA, OSACA and LLVM-MCA
|
||||
marked_asm_path = Path(asm_path).with_suffix(".marked.s")
|
||||
if not marked_asm_path.exists() or overwrite:
|
||||
overwrite = True
|
||||
with open(asm_path) as fa, open(marked_asm_path, 'w') as fm:
|
||||
with open(asm_path) as fa, open(marked_asm_path, "w") as fm:
|
||||
try:
|
||||
_, pointer_increment = asm_instrumentation(fa, fm, isa=isa)
|
||||
except KeyboardInterrupt:
|
||||
@@ -505,37 +585,46 @@ def mark(asm_path, compiler, cflags, isa, overwrite=False):
|
||||
# use maked assembly and extract asm_block and pointer_increment
|
||||
with open(marked_asm_path) as f:
|
||||
marked_asm = f.read()
|
||||
m = re.search(r'pointer_increment=([0-9]+)', marked_asm)
|
||||
m = re.search(r"pointer_increment=([0-9]+)", marked_asm)
|
||||
if m:
|
||||
pointer_increment = int(m.group(1))
|
||||
else:
|
||||
os.unlink(marked_asm_path)
|
||||
raise ValueError(
|
||||
"Could not find `pointer_increment=<byte increment>`. Plase place into file.")
|
||||
"Could not find `pointer_increment=<byte increment>`. Plase place into file."
|
||||
)
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
# Compile marked assembly to object for IACA
|
||||
marked_obj = Path(asm_path).with_suffix(".marked.o")
|
||||
if not marked_obj.exists():
|
||||
check_call([compiler] + ['-c', str(marked_asm_path), '-o', str(marked_obj)])
|
||||
|
||||
check_call([compiler] + ["-c", str(marked_asm_path), "-o", str(marked_obj)])
|
||||
|
||||
return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite
|
||||
|
||||
|
||||
def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=False,
|
||||
dontbuild=False):
|
||||
def build_kernel(
|
||||
kernel,
|
||||
architecture,
|
||||
compiler,
|
||||
cflags,
|
||||
cflags_name,
|
||||
overwrite=False,
|
||||
dontbuild=False,
|
||||
):
|
||||
build_path = f"build/{architecture}/{compiler}/{cflags_name}"
|
||||
kernel_assembly = f"{build_path}/{kernel}.s"
|
||||
kernel_object= f"{build_path}/{kernel}.o"
|
||||
kernel_object = f"{build_path}/{kernel}.o"
|
||||
executable = f"{build_path}/{kernel}"
|
||||
Path(build_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not overwrite:
|
||||
# Overwrite if any kernel specific file is missing
|
||||
overwrite = (
|
||||
not os.path.exists(kernel_object) or
|
||||
not os.path.exists(kernel_assembly) or
|
||||
not os.path.exists(executable))
|
||||
not os.path.exists(kernel_object)
|
||||
or not os.path.exists(kernel_assembly)
|
||||
or not os.path.exists(executable)
|
||||
)
|
||||
|
||||
if dontbuild and overwrite:
|
||||
raise ValueError("Must build, but not allowed.")
|
||||
@@ -545,39 +634,43 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=
|
||||
|
||||
if not Path(f"{build_path}/compiler_version").exists():
|
||||
# Document compiler version
|
||||
with open(f"{build_path}/compiler_version", 'w') as f:
|
||||
f.write(check_output([compiler, "-v"], encoding='utf8', stderr=STDOUT))
|
||||
with open(f"{build_path}/compiler_version", "w") as f:
|
||||
f.write(check_output([compiler, "-v"], encoding="utf8", stderr=STDOUT))
|
||||
|
||||
if overwrite:
|
||||
# build object + assembly
|
||||
check_call([compiler] +
|
||||
cflags +
|
||||
["-c", f"kernels/{kernel}.c", "-o", kernel_object])
|
||||
check_call([compiler] +
|
||||
cflags +
|
||||
["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly])
|
||||
check_call([compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
|
||||
check_call(
|
||||
[compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly]
|
||||
)
|
||||
|
||||
# build main and link executable
|
||||
executable_cflags = [
|
||||
os.environ["LIKWID_DEFINES"],
|
||||
os.environ["LIKWID_INC"],
|
||||
os.environ["LIKWID_LIB"]
|
||||
] + ['-Ofast']
|
||||
check_call([compiler] + executable_cflags + [
|
||||
f"{build_path}/dummy.o",
|
||||
kernel_object,
|
||||
"-DMAIN",
|
||||
f"kernels/{kernel}.c",
|
||||
"-llikwid",
|
||||
"-o", executable])
|
||||
os.environ["LIKWID_LIB"],
|
||||
] + ["-Ofast"]
|
||||
check_call(
|
||||
[compiler]
|
||||
+ executable_cflags
|
||||
+ [
|
||||
f"{build_path}/dummy.o",
|
||||
kernel_object,
|
||||
"-DMAIN",
|
||||
f"kernels/{kernel}.c",
|
||||
"-llikwid",
|
||||
"-o",
|
||||
executable,
|
||||
]
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
|
||||
return kernel_assembly, executable, overwrite
|
||||
|
||||
|
||||
def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
def perfctr(cmd, cores, group="MEM", code_markers=True, verbose=0):
|
||||
"""
|
||||
Run *cmd* with likwid-perfctr and returns result as dict.
|
||||
|
||||
@@ -586,30 +679,32 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
if CLI argument cores > 1, running with multi-core, otherwise single-core
|
||||
"""
|
||||
# Making sure likwid-perfctr is available:
|
||||
if benchmark.find_executable('likwid-perfctr') is None:
|
||||
print("likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
|
||||
file=sys.stderr)
|
||||
if benchmark.find_executable("likwid-perfctr") is None:
|
||||
print(
|
||||
"likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# FIXME currently only single core measurements support!
|
||||
perf_cmd = ['likwid-perfctr', '-f', '-O', '-g', group]
|
||||
perf_cmd = ["likwid-perfctr", "-f", "-O", "-g", group]
|
||||
|
||||
cpu = 'S0:0'
|
||||
cpu = "S0:0"
|
||||
if cores > 1:
|
||||
cpu += '-'+str(cores-1)
|
||||
cpu += "-" + str(cores - 1)
|
||||
|
||||
# Pinned and measured on cpu
|
||||
perf_cmd += ['-C', cpu]
|
||||
perf_cmd += ["-C", cpu]
|
||||
|
||||
# code must be marked using likwid markers
|
||||
perf_cmd.append('-m')
|
||||
perf_cmd.append("-m")
|
||||
|
||||
perf_cmd += cmd
|
||||
if verbose > 1:
|
||||
print(' '.join(perf_cmd))
|
||||
print(" ".join(perf_cmd))
|
||||
try:
|
||||
with benchmark.fix_env_variable('OMP_NUM_THREADS', None):
|
||||
output = check_output(perf_cmd).decode('utf-8').split('\n')
|
||||
with benchmark.fix_env_variable("OMP_NUM_THREADS", None):
|
||||
output = check_output(perf_cmd).decode("utf-8").split("\n")
|
||||
except CalledProcessError as e:
|
||||
print("Executing benchmark failed: {!s}".format(e), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -626,7 +721,7 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line)
|
||||
if m:
|
||||
cur_region_name = m.group(1)
|
||||
line = line.split(',')
|
||||
line = line.split(",")
|
||||
try:
|
||||
# Metrics
|
||||
cur_region_data[line[0]] = float(line[1])
|
||||
@@ -639,12 +734,13 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
continue
|
||||
try:
|
||||
# Event counters
|
||||
if line[2] == '-' or line[2] == 'nan':
|
||||
if line[2] == "-" or line[2] == "nan":
|
||||
counter_value = 0
|
||||
else:
|
||||
counter_value = int(line[2])
|
||||
if re.fullmatch(r'[A-Z0-9_]+', line[0]) and \
|
||||
re.fullmatch(r'[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*', line[1]):
|
||||
if re.fullmatch(r"[A-Z0-9_]+", line[0]) and re.fullmatch(
|
||||
r"[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*", line[1]
|
||||
):
|
||||
cur_region_data.setdefault(line[0], {})
|
||||
cur_region_data[line[0]][line[1]] = counter_value
|
||||
continue
|
||||
@@ -659,49 +755,52 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
|
||||
|
||||
def remove_html_tags(text):
|
||||
return re.sub('<.*?>', '', text)
|
||||
return re.sub("<.*?>", "", text)
|
||||
|
||||
|
||||
def get_intel_style_code(marked_objfile):
|
||||
# Disassembl with Intel syntax
|
||||
cmd = ("objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
|
||||
"--x86-asm-syntax=intel").split(" ") + [marked_objfile]
|
||||
cmd = (
|
||||
"objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
|
||||
"--x86-asm-syntax=intel"
|
||||
).split(" ") + [marked_objfile]
|
||||
asm_raw = check_output(cmd).decode()
|
||||
asm_raw = '\n'.join([l.strip() for l in asm_raw.split('\n')])
|
||||
asm_raw = "\n".join([line.strip() for line in asm_raw.split("\n")])
|
||||
kernel_raw = asm_raw[
|
||||
asm_raw.index('mov\tebx, 111\nnop')+len('mov\tebx, 111\nnop') :
|
||||
asm_raw.index('mov\tebx, 222\nnop')
|
||||
asm_raw.index("mov\tebx, 111\nnop")
|
||||
+ len("mov\tebx, 111\nnop") : asm_raw.index("mov\tebx, 222\nnop")
|
||||
]
|
||||
kernel_lines = kernel_raw.split('\n')
|
||||
kernel_lines = kernel_raw.split("\n")
|
||||
# Ignore label and jump
|
||||
return '\n'.join(kernel_lines[:-2])
|
||||
return "\n".join(kernel_lines[:-2])
|
||||
|
||||
|
||||
def get_ithemal_prediction(code, model='skl'):
|
||||
def get_ithemal_prediction(code, model="skl"):
|
||||
url = "http://3.18.198.23/predict"
|
||||
assert model in ['skl', 'hsw', 'ivb']
|
||||
r = requests.post(url, {'code': code, 'model': model})
|
||||
assert model in ["skl", "hsw", "ivb"]
|
||||
r = requests.post(url, {"code": code, "model": model})
|
||||
raw_text = remove_html_tags(r.text)
|
||||
m = re.search("Could not generate a prediction: (.*)", raw_text)
|
||||
if m:
|
||||
print(" error:", m.group(1).strip(), end=' ')
|
||||
return float('nan')
|
||||
m = re.search("Prediction: ([0-9\.]+) cycles per iteration", raw_text)
|
||||
print(" error:", m.group(1).strip(), end=" ")
|
||||
return float("nan")
|
||||
m = re.search("Prediction: ([0-9.]+) cycles per iteration", raw_text)
|
||||
if m:
|
||||
return float(m.group(1))
|
||||
else:
|
||||
return float('nan')
|
||||
return float("nan")
|
||||
|
||||
|
||||
def main():
|
||||
# Check for correct LLVM-MCA version
|
||||
try:
|
||||
llvm_mca = 'LLVM version 12.0.0' in check_output(['llvm-mca', '-version']).decode()
|
||||
llvm_mca = "LLVM version 12.0.0" in check_output(["llvm-mca", "-version"]).decode()
|
||||
except FileNotFoundError:
|
||||
llvm_mca = False
|
||||
|
||||
build_mark_run_all_kernels(measurements='--no-measurements' not in sys.argv, llvm_mca=llvm_mca)
|
||||
|
||||
build_mark_run_all_kernels(measurements="--no-measurements" not in sys.argv, llvm_mca=llvm_mca)
|
||||
sys.exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user