mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-11 13:37:07 +01:00
Resolved merge conflicts
This commit is contained in:
28
.travis.yml
28
.travis.yml
@@ -3,6 +3,28 @@ language: python
|
||||
python:
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
install: pip install tox-travis
|
||||
script: tox
|
||||
# Python 3.7 not working yet
|
||||
# - "3.7"
|
||||
before_install:
|
||||
# - pip install tox-travis
|
||||
- pip install codecov
|
||||
install:
|
||||
- pip install -e .
|
||||
cache: pip
|
||||
script:
|
||||
# - tox
|
||||
- coverage run -p tests/all_tests.py
|
||||
after_success:
|
||||
- coverage combine
|
||||
- codecov
|
||||
deploy:
|
||||
provider: pypi
|
||||
user: "__token__"
|
||||
password:
|
||||
secure: "fRRCETOwDkJ4pFacYZghPfCQ9mSsV4PlD3sTDp8rDHoCnebPjvFYc1tIdv+Wds0ae162KNUaj9GbxjK0MTGiRcy4pD08n7ufv8snmBQ2rtOLkj7RCRg1hw30WcMHjzqScFJgQcBrpjdPmR5AlesUufh6OadGvF1NspmVRWKr8ir3KQhmNV+itAliYoqaSTRTg1zC/znm+49l5gkzlLxd+mPj5/dtcc8vZ/i2M2+nNTTjDxq71q4Ddqv+bgZV1y7OZY2YuvjEDPflUbwc3fjOxpj891uMDHodsGmEHBu8WsLpF2tAO0C/x63S0jXamkV+/4cAQqQAwWr0Lby9/BjCfUwyUMOEgZ0S+z9WoFpBpQTQEfkD2JH/UFrv4CMnLFqgDkVMcx0vc/rT4Od8eJ5wOSG5+VdniJNOLpodFOXuKc09eJMk2lE9vk9OBrcsZ09UOTPTUCMZSIP4cBDxaIkx+RHQEy63TQdJZcElRBEWGEgj2e9hbiktvIoOvbFGQDscpz7ShBDklXIpu9hnxcKHtNDEjyywTUJmx7lTMILL05DPUnpUmnMb1Gyx5lbHzhSExc9re0cxEA354UUQKBS5HwHQcEBw9stMfsaForiBAUOocUKdGqlGP9cOXFoxdC9M+ff5FNstgbjPYSowb/JbATMlmCWKgH/bXXcTGCO10sk="
|
||||
distributions: sdist
|
||||
skip_existing: true
|
||||
skip_cleanup: true
|
||||
on:
|
||||
repo: RRZE-HPC/OSACA
|
||||
tag: true
|
||||
@@ -1,7 +1,7 @@
|
||||
include README.rst
|
||||
include LICENSE
|
||||
include tox.ini
|
||||
recursive-include osaca/data/ *.csv
|
||||
recursive-include osaca/data/ *.yml
|
||||
include examples/*
|
||||
recursive-include tests *.py *.out
|
||||
recursive-include tests/testfiles/ *
|
||||
|
||||
263
README.rst
263
README.rst
@@ -16,9 +16,15 @@ analysis and throughput prediction for a innermost loop kernel.
|
||||
.. image:: https://travis-ci.com/RRZE-HPC/OSACA.svg?token=393L6z2HEXNiGLtZ43s6&branch=master
|
||||
:target: https://travis-ci.com/RRZE-HPC/OSACA
|
||||
|
||||
.. image:: https://landscape.io/github/RRZE-HPC/OSACA/master/landscape.svg?style=flat&badge_auth_token=c95f01b247f94bc79c09d21c5c827697
|
||||
:target: https://landscape.io/github/RRZE-HPC/OSACA/master
|
||||
:alt: Code Health
|
||||
.. ..image:: https://landscape.io/github/RRZE-HPC/OSACA/master/landscape.svg?style=flat&badge_auth_token=c95f01b247f94bc79c09d21c5c827697
|
||||
.. :target: https://landscape.io/github/RRZE-HPC/OSACA/master
|
||||
.. :alt: Code Health
|
||||
|
||||
.. image:: https://codecov.io/github/RRZE-HPC/OSACA/coverage.svg?branch=master
|
||||
:target: https://codecov.io/github/RRZE-HPC/OSACA?branch=master
|
||||
|
||||
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
|
||||
:target: https://github.com/ambv/black
|
||||
|
||||
Getting started
|
||||
===============
|
||||
@@ -46,8 +52,7 @@ Dependencies:
|
||||
Additional requirements are:
|
||||
|
||||
- `Python3 <https://www.python.org/>`_
|
||||
- `pandas <http://pandas.pydata.org/>`_
|
||||
- `NumPy <http://www.numpy.org/>`_
|
||||
- `Graphviz <https://www.graphviz.org/>`_ for dependency graph creation (minimal dependency is `libgraphviz-dev` on Ubuntu)
|
||||
- `Kerncraft <https://github.com/RRZE-HPC/kerncraft>`_ for marker insertion
|
||||
- `ibench <https://github.com/hofm/ibench>`_ for throughput/latency measurements
|
||||
|
||||
@@ -66,213 +71,169 @@ The usage of OSACA can be listed as:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
osaca [-h] [-V] [--arch ARCH] [--tp-list] [-i | --iaca | -m] FILEPATH
|
||||
osaca [-h] [-V] [--arch ARCH] [--export-graph GRAPHNAME] FILEPATH
|
||||
|
||||
- ``-h`` or ``--help`` prints out the help message.
|
||||
- ``-V`` or ``--version`` shows the program’s version number.
|
||||
- ``ARCH`` needs to be replaced with the wished architecture abbreviation. This flag is necessary for the throughput analysis (default function) and the inclusion of an ibench output (``-i``). Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW`` and ``SKL`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN`` for AMD Zen (17h family) architecture .
|
||||
- While in the throughput analysis mode, one can add ``--tp-list`` for printing the additional throughput list of the kernel or ``--iaca`` for letting OSACA to know it has to search for IACA binary markers.
|
||||
- ``-i`` or ``--include-ibench`` starts the integration of ibench output into the CSV data file determined by ``ARCH``.
|
||||
- With the flag ``-m`` or ``--insert-marker`` OSACA calls the Kerncraft module for the interactively insertion of `IACA <https://software.intel.com/en-us/articles/intel-architecture-code-analyzer>`_ marker in suggested assembly blocks.
|
||||
- ``FILEPATH`` describes the filepath to the file to work with and is always necessary
|
||||
-h, --help
|
||||
prints out the help message.
|
||||
-V, --version
|
||||
shows the program’s version number.
|
||||
--arch ARCH
|
||||
needs to be replaced with the wished architecture abbreviation.
|
||||
This flag is necessary for the throughput analysis (default function) and the inclusion of an ibench output (``-i``).
|
||||
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX`` and ``CSX`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1`` for AMD Zen (17h family) architecture.
|
||||
Furthermore, `VULCAN` for Marvell`s ARM-based ThunderX2 architecture is available.
|
||||
--insert-marker
|
||||
OSACA calls the Kerncraft module for the interactively insertion of `IACA <https://software.intel.com/en-us/articles/intel-architecture-code-analyzer>`_ marker in suggested assembly blocks.
|
||||
--db-check
|
||||
Run a sanity check on the by "--arch" specified database.
|
||||
The output depends on the verbosity level.
|
||||
Keep in mind you have to provide a (dummy) filename in anyway.
|
||||
--export-graph EXPORT_PATH
|
||||
Output path for .dot file export. If "." is given, the file will be stored as "./osaca_dg.dot".
|
||||
After the file was created, you can convert it to a PDF file using dot: `dot -Tpdf osaca_dg.dot -o osaca_dependency_graph.pdf`
|
||||
|
||||
The **FILEPATH** describes the filepath to the file to work with and is always necessary
|
||||
|
||||
______________________
|
||||
|
||||
Hereinafter OSACA's scope of function will be described.
|
||||
|
||||
Throughput analysis
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
As main functionality of OSACA this process starts by default. It is always necessary to specify the core architecture by the flag ``--arch ARCH``, where ``ARCH`` can stand for ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKL`` or ``ZEN``.
|
||||
Throughput & Latency analysis
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
As main functionality of OSACA this process starts by default. It is always necessary to specify the core architecture by the flag ``--arch ARCH``, where ``ARCH`` can stand for ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ZEN`` or ``VULCAN``.
|
||||
|
||||
For extracting the right kernel, one has to mark it beforehand. For this there are two different approaches:
|
||||
For extracting the right kernel, one has to mark it beforehand.
|
||||
Currently, only the detechtion of markers in the assembly code and therefore the analysis of assemly files is supported by OSACA.
|
||||
|
||||
| **High level code**
|
||||
**Assembly code**
|
||||
|
||||
The OSACA marker is ``//STARTLOOP`` and must be put in one line in front of the loop head, and the loop code must be indented consistently. This means the marker and the head must have the same indentation level while the whole loop body needs to be more indented than the code before and after. For instance, this is a valid OSACA marker:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int i = 0;
|
||||
//STARTLOOP
|
||||
while(i < N){
|
||||
// do something...
|
||||
i++;
|
||||
}
|
||||
|
||||
| **Assembly code**
|
||||
|
||||
Another way for marking a kernel is to insert the IACA byte markers in the assembly file in before and after the loop.
|
||||
Marking a kernel means to insert the byte markers in the assembly file in before and after the loop.
|
||||
For this, the start marker has to be inserted right in front of the loop label and the end marker directly after the jump instruction.
|
||||
Start and end marker can be seen in the example below:
|
||||
For the convience of the user, in x86 assembly IACA byte markers are used.
|
||||
|
||||
**x86 Byte Markers**
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
movl $111,%ebx ;IACA START MARKER
|
||||
.byte 100,103,144 ;IACA START MARKER
|
||||
; LABEL
|
||||
; do something
|
||||
; ...
|
||||
; conditional jump to LABEL
|
||||
movl $222,%ebx ;IACA END MARKER
|
||||
.byte 100,103,144 ;IACA END MARKER
|
||||
movl $111,%ebx #IACA/OSACA START MARKER
|
||||
.byte 100,103,144 #IACA/OSACA START MARKER
|
||||
Loop:
|
||||
# ...
|
||||
movl $222,%ebx #IACA/OSACA END MARKER
|
||||
.byte 100,103,144 #IACA/OSACA END MARKER
|
||||
|
||||
The optional flag ``--iaca`` defines if OSACA needs to search for the IACA byte markers or the OSACA marker in the chosen file.
|
||||
**AArch64 Byte Markers**
|
||||
|
||||
With an additional, optional ``--tp-list``, OSACA adds a simple list of all kernel instruction forms together with their reciprocal throughput to the output. This is helpful in case of no further information about the port binding of the single instruction forms.
|
||||
.. code-block:: asm
|
||||
|
||||
Include new measurements into the data file
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Running OSACA with the flag ``-i`` or ``--include-ibench`` and a specified micro architecture ``ARCH``, it
|
||||
takes the values given in an ibench output file and checks them for reasonability. If a value is not in the data file already, it will be added, otherwise OSACA prints out a warning message and keeps the old value in the data file. If a value does not pass the validation, a warning message is shown, however, OSACA will keep working with the new value.
|
||||
The handling of ibench is shortly described in the example section below.
|
||||
mov x1, #111 // OSACA START
|
||||
.byte 213,3,32,31 // OSACA START
|
||||
\\ ...
|
||||
mov x1, #222 // OSACA END
|
||||
.byte 213,3,32,31 // OSACA END
|
||||
|
||||
.. Include new measurements into the data file
|
||||
.. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. Running OSACA with the flag ``-i`` or ``--include-ibench`` and a specified micro architecture ``ARCH``, it takes the values given in an ibench output file and checks them for reasonability. If a value is not in the data file already, it will be added, otherwise OSACA prints out a warning message and keeps the old value in the data file. If a value does not pass the validation, a warning message is shown, however, OSACA will keep working with the new value. The handling of ibench is shortly described in the example section below.
|
||||
|
||||
Insert IACA markers
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
Using the ``-m`` or ``--insert-marker`` flags for a given file, OSACA calls the implemented Kerncraft module for identifying and marking the inner-loop block in *manual mode*. More information about how this is done can be found in the `Kerncraft repository <https://github.com/RRZE-HPC/kerncraft>`_.
|
||||
Using the ``--insert-marker`` flags for a given file, OSACA calls the implemented Kerncraft module for identifying and marking the inner-loop block in *manual mode*. More information about how this is done can be found in the `Kerncraft repository <https://github.com/RRZE-HPC/kerncraft>`_.
|
||||
Note that this currrently only works for x86 loop kernels
|
||||
|
||||
Example
|
||||
=======
|
||||
For clarifying the functionality of OSACA a sample kernel is analyzed for an Intel IVB core hereafter:
|
||||
For clarifying the functionality of OSACA a sample kernel is analyzed for an Intel CSX core hereafter:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
double a[N], double b[N];
|
||||
double s;
|
||||
|
||||
//STARTLOOP
|
||||
// loop
|
||||
for(int i = 0; i < N; ++i)
|
||||
a[i] = s * b[i];
|
||||
|
||||
The code shows a simple scalar multiplication of a vector ``b`` and a floating-point number ``s``. The result is
|
||||
written in vector ``a``.
|
||||
After including the OSACA marker ``//STARTLOOP`` and compiling the source, one can
|
||||
start the analysis typing
|
||||
The code shows a simple scalar multiplication of a vector ``b`` and a floating-point number ``s``.
|
||||
The result is written in vector ``a``.
|
||||
After including the OSACA byte marker into the assembly, one can start the analysis typing
|
||||
|
||||
.. code:: bash
|
||||
|
||||
osaca --arch IVB PATH/TO/FILE
|
||||
osaca --arch CSX PATH/TO/FILE
|
||||
|
||||
in the command line. Optionally, one can create the assembly code out of the file, identify and mark the kernel of interest and run OSACA with the additional ``--iaca`` flag.
|
||||
in the command line.
|
||||
|
||||
The output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
Open Source Architecture Code Analyzer (OSACA) - v0.3
|
||||
Analyzed file: scale.s.csx.O3.s
|
||||
Architecture: csx
|
||||
Timestamp: 2019-10-03 23:36:21
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
P - Load operation can be hidden behind a past or future store instruction
|
||||
X - No information for this instruction in database
|
||||
* - Instruction micro-ops not bound to a port
|
||||
|
||||
Port Binding in Cycles Per Iteration:
|
||||
-------------------------------------------------
|
||||
| Port | 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------------
|
||||
| Cycles | 2.33 | 1.33 | 5.0 | 5.0 | 2.0 | 1.33 |
|
||||
-------------------------------------------------
|
||||
|
||||
Ports Pressure in cycles
|
||||
| 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------
|
||||
| | | 0.50 | 0.50 | 1.00 | | movl $0x0,-0x24(%rbp)
|
||||
| | | | | | | jmp 10b <scale+0x10b>
|
||||
| | | 0.50 | 0.50 | | | mov -0x48(%rbp),%rax
|
||||
| | | 0.50 | 0.50 | | | mov -0x24(%rbp),%edx
|
||||
| 0.33 | 0.33 | | | | 0.33 | movslq %edx,%rdx
|
||||
| | | 0.50 | 0.50 | | | vmovsd (%rax,%rdx,8),%xmm0
|
||||
| 1.00 | | 0.50 | 0.50 | | | vmulsd -0x50(%rbp),%xmm0,%xmm0
|
||||
| | | 0.50 | 0.50 | | | mov -0x38(%rbp),%rax
|
||||
| | | 0.50 | 0.50 | | | mov -0x24(%rbp),%edx
|
||||
| 0.33 | 0.33 | | | | 0.33 | movslq %edx,%rdx
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovsd %xmm0,(%rax,%rdx,8)
|
||||
| | | | | | | X addl $0x1,-0x24(%rbp)
|
||||
| | | 0.50 | 0.50 | | | mov -0x24(%rbp),%eax
|
||||
| 0.33 | 0.33 | 0.50 | 0.50 | | 0.33 | cmp -0x54(%rbp),%eax
|
||||
| | | | | | | jl e4 <scale+0xe4>
|
||||
| 0.33 | 0.33 | | | | 0.33 | mov %rcx,%rsp
|
||||
Total number of estimated throughput: 5.0
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 |
|
||||
-----------------------------------------------------------------------------------
|
||||
170 | | | | | | | | | .L22:
|
||||
171 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | | vmulpd (%r12,%rax), %ymm1, %ymm0
|
||||
172 | | | 0.50 | 0.50 | 1.00 | | | | vmovapd %ymm0, 0(%r13,%rax)
|
||||
173 | 0.25 | 0.25 | | | | 0.25 | 0.25 | | addq $32, %rax
|
||||
174 | 0.25 | 0.25 | | | | 0.25 | 0.25 | | cmpq %rax, %r14
|
||||
175 | | | | | | | | | * jne .L22
|
||||
|
||||
1.00 1.00 1.00 0.50 1.00 0.50 1.00 0.50 0.50
|
||||
|
||||
|
||||
Latency Analysis Report
|
||||
-----------------------
|
||||
171 | 8.0 | | vmulpd (%r12,%rax), %ymm1, %ymm0
|
||||
172 | 5.0 | | vmovapd %ymm0, 0(%r13,%rax)
|
||||
|
||||
13.0
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
173 | 1.0 | addq $32, %rax | [173]
|
||||
|
||||
It shows the whole kernel together with the average port pressure of each instruction form and the overall port binding.
|
||||
In the fifth to last line containing ``addl $0x1, -0x24(%rbp)`` one can see an ``X`` in front of the instruction form and no port occupation.
|
||||
This means either there are no measured values for this instruction form or no port binding is provided in the
|
||||
data file.
|
||||
In the first case, OSACA automatically creates two benchmark assembly files (``add-mem_imd.S`` for latency and ``add-mem_imd-TP.S`` for throughput) in the benchmark folder, if it not already exists there.
|
||||
Furthermore, the critical path of the loop kernel and all loop-carried dependencies, each with a list of line numbers being part of this dependency chain on the right.
|
||||
|
||||
One can now run ibench to get the throughput value for addl with the given file. Mind that the assembly
|
||||
file, which is used for ibench, is implemented in Intel syntax. So for a valid run instruction ``addl`` must be
|
||||
changed to ``add`` manually.
|
||||
.. For measuring the instruction forms with ibench we highly recommend to use an exclusively allocated node, so there is no other workload falsifying the results. For the correct function of ibench the benchmark files from OSACA need to be placed in a subdirectory of src in root so ibench can create the a folder with the subdirectory’s name and the shared objects. For running the tests the frequencies of all cores must set to a constant value and this has to be given as an argument together with the directory of the shared objects to ibench, e.g.:
|
||||
|
||||
For measuring the instruction forms with ibench we highly recommend to use an exclusively allocated node,
|
||||
so there is no other workload falsifying the results. For the correct function of ibench the benchmark files
|
||||
from OSACA need to be placed in a subdirectory of src in root so ibench can create the a folder with the
|
||||
subdirectory’s name and the shared objects. For running the tests the frequencies of all cores must set to a
|
||||
constant value and this has to be given as an argument together with the directory of the shared objects to
|
||||
ibench, e.g.:
|
||||
|
||||
.. code:: bash
|
||||
.. .. code:: bash
|
||||
|
||||
./ibench ./AVX 2.2
|
||||
|
||||
for running ibench in the directory ``AVX`` with a core frequency of 2.2 GHz.
|
||||
We get an output like:
|
||||
.. for running ibench in the directory ``AVX`` with a core frequency of 2.2 GHz. We get an output like:
|
||||
|
||||
.. code:: bash
|
||||
.. .. code:: bash
|
||||
|
||||
Using frequency 2.20GHz.
|
||||
add-mem_imd-TP: 1.023 (clock cycles) [DEBUG - result: 1.000000]
|
||||
add-mem_imd: 6.050 (clock cycles) [DEBUG - result: 1.000000]
|
||||
|
||||
The debug output as resulting value of register ``xmm0`` is additional validation information depending on
|
||||
the executed instruction form meant for the user and is not considered by OSACA.
|
||||
The ibench output information can be included by OSACA running the program with the flag ``--include-ibench`` or just
|
||||
``-i`` and the specify micro architecture:
|
||||
.. The debug output as resulting value of register ``xmm0`` is additional validation information depending on the executed instruction form meant for the user and is not considered by OSACA. The ibench output information can be included by OSACA running the program with the flag ``--include-ibench`` or just ``-i`` and the specify micro architecture:
|
||||
|
||||
.. code-block:: bash
|
||||
.. .. code-block:: bash
|
||||
|
||||
osaca --arch IVB -i PATH/TO/IBENCH-OUTPUTFILE
|
||||
|
||||
For now no automatic allocation of ports for a instruction form is implemented, so for getting an output in the Ports Pressure table, one must add the port occupation by hand.
|
||||
We know that the inserted instruction form must be assigned always to Port 2, 3 and 4 and additionally to either 0, 1 or 5, a valid data file therefore would look like this:
|
||||
.. For now no automatic allocation of ports for a instruction form is implemented, so for getting an output in the Ports Pressure table, one must add the port occupation by hand. We know that the inserted instruction form must be assigned always to Port 2, 3 and 4 and additionally to either 0, 1 or 5, a valid data file therefore would look like this:
|
||||
|
||||
.. code:: bash
|
||||
.. .. code:: bash
|
||||
|
||||
addl-mem_imd,1.0,6.0,"(0.33,0.33,1.00,1.00,1.00,0.33)"
|
||||
|
||||
Another throughput analysis with OSACA now returns all information for the kernel:
|
||||
|
||||
.. code-block::
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
P - Load operation can be hidden behind a past or future store instruction
|
||||
X - No information for this instruction in database
|
||||
* - Instruction micro-ops not bound to a port
|
||||
|
||||
Port Binding in Cycles Per Iteration:
|
||||
-------------------------------------------------
|
||||
| Port | 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------------
|
||||
| Cycles | 2.67 | 1.67 | 6.0 | 6.0 | 3.0 | 1.67 |
|
||||
-------------------------------------------------
|
||||
|
||||
Ports Pressure in cycles
|
||||
| 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------
|
||||
| | | 0.50 | 0.50 | 1.00 | | movl $0x0,-0x24(%rbp)
|
||||
| | | | | | | jmp 10b <scale+0x10b>
|
||||
| | | 0.50 | 0.50 | | | mov -0x48(%rbp),%rax
|
||||
| | | 0.50 | 0.50 | | | mov -0x24(%rbp),%edx
|
||||
| 0.33 | 0.33 | | | | 0.33 | movslq %edx,%rdx
|
||||
| | | 0.50 | 0.50 | | | vmovsd (%rax,%rdx,8),%xmm0
|
||||
| 1.00 | | 0.50 | 0.50 | | | vmulsd -0x50(%rbp),%xmm0,%xmm0
|
||||
| | | 0.50 | 0.50 | | | mov -0x38(%rbp),%rax
|
||||
| | | 0.50 | 0.50 | | | mov -0x24(%rbp),%edx
|
||||
| 0.33 | 0.33 | | | | 0.33 | movslq %edx,%rdx
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovsd %xmm0,(%rax,%rdx,8)
|
||||
| 0.33 | 0.33 | 1.00 | 1.00 | 1.00 | 0.33 | addl $0x1,-0x24(%rbp)
|
||||
| | | 0.50 | 0.50 | | | mov -0x24(%rbp),%eax
|
||||
| 0.33 | 0.33 | 0.50 | 0.50 | | 0.33 | cmp -0x54(%rbp),%eax
|
||||
| | | | | | | jl e4 <scale+0xe4>
|
||||
| 0.33 | 0.33 | | | | 0.33 | mov %rcx,%rsp
|
||||
Total number of estimated throughput: 6.0
|
||||
|
||||
Credits
|
||||
=======
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 231 KiB After Width: | Height: | Size: 206 KiB |
@@ -1,286 +0,0 @@
|
||||
# mark_description "Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 17.0.5.239 Build 20170817";
|
||||
# mark_description "-fno-alias -O3 -fopenmp -xCORE-AVX-I -S -o 2d.S";
|
||||
.file "2d-5pt.c"
|
||||
.text
|
||||
..TXTST0:
|
||||
# -- Begin jacobi2D5pt
|
||||
.text
|
||||
# mark_begin;
|
||||
.align 16,0x90
|
||||
.globl jacobi2D5pt
|
||||
# --- jacobi2D5pt(int, int)
|
||||
jacobi2D5pt:
|
||||
# parameter 1: %edi
|
||||
# parameter 2: %esi
|
||||
..B1.1: # Preds ..B1.0
|
||||
# Execution count [1.00e+00]
|
||||
.cfi_startproc
|
||||
..___tag_value_jacobi2D5pt.1:
|
||||
..L2:
|
||||
#2.31
|
||||
pushq %rbx #2.31
|
||||
.cfi_def_cfa_offset 16
|
||||
movq %rsp, %rbx #2.31
|
||||
.cfi_def_cfa 3, 16
|
||||
.cfi_offset 3, -16
|
||||
andq $-32, %rsp #2.31
|
||||
pushq %rbp #2.31
|
||||
pushq %rbp #2.31
|
||||
movq 8(%rbx), %rbp #2.31
|
||||
movq %rbp, 8(%rsp) #2.31
|
||||
movq %rsp, %rbp #2.31
|
||||
.cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00
|
||||
pushq %r13 #2.31
|
||||
pushq %r14 #2.31
|
||||
pushq %r15 #2.31
|
||||
subq $88, %rsp #2.31
|
||||
movslq %esi, %rsi #2.31
|
||||
movslq %edi, %rcx #2.31
|
||||
.cfi_escape 0x10, 0x0d, 0x02, 0x76, 0x78
|
||||
.cfi_escape 0x10, 0x0e, 0x02, 0x76, 0x70
|
||||
.cfi_escape 0x10, 0x0f, 0x02, 0x76, 0x68
|
||||
movq %rsi, %r13 #4.17
|
||||
imulq %rcx, %r13 #4.17
|
||||
shlq $3, %r13 #4.12
|
||||
movq %r13, %rax #4.12
|
||||
addq $31, %rax #4.12
|
||||
andq $-32, %rax #4.12
|
||||
subq %rax, %rsp #4.12
|
||||
movq %rsp, %rax #4.12
|
||||
# LOE rax rcx rsi r12 r13 edi
|
||||
..B1.29: # Preds ..B1.1
|
||||
# Execution count [1.00e+00]
|
||||
movq %rax, %r14 #4.12
|
||||
# LOE rcx rsi r12 r13 r14 edi
|
||||
..B1.2: # Preds ..B1.29
|
||||
# Execution count [1.00e+00]
|
||||
movq %r13, %rax #5.12
|
||||
addq $31, %rax #5.12
|
||||
andq $-32, %rax #5.12
|
||||
subq %rax, %rsp #5.12
|
||||
movq %rsp, %rax #5.12
|
||||
# LOE rax rcx rsi r12 r13 r14 edi
|
||||
..B1.30: # Preds ..B1.2
|
||||
# Execution count [1.00e+00]
|
||||
movq %rax, %r15 #5.12
|
||||
# LOE rcx rsi r12 r13 r14 r15 edi
|
||||
..B1.3: # Preds ..B1.30
|
||||
# Execution count [1.00e+00]
|
||||
xorl %r10d, %r10d #9.5
|
||||
lea (%r15,%rcx,8), %r11 #13.13
|
||||
vxorpd %xmm1, %xmm1, %xmm1 #6.5
|
||||
lea (%r14,%rcx,8), %rdx #13.37
|
||||
cmpq $2, %rsi #9.18
|
||||
jle ..B1.21 # Prob 9% #9.18
|
||||
# LOE rdx rcx rsi r10 r11 r12 r13 r14 r15 edi xmm1
|
||||
..B1.4: # Preds ..B1.3
|
||||
# Execution count [9.00e-01]
|
||||
addl $-2, %edi #12.9
|
||||
movq %rcx, %r9 #13.61
|
||||
movl %edi, %eax #12.9
|
||||
addq $-2, %rsi #9.18
|
||||
andl $-16, %eax #12.9
|
||||
xorl %r8d, %r8d #9.5
|
||||
shlq $4, %r9 #13.61
|
||||
movslq %eax, %rax #12.9
|
||||
addq %r14, %r9 #13.61
|
||||
movslq %edi, %rdi #12.9
|
||||
vxorps %ymm0, %ymm0, %ymm0 #6.5
|
||||
movq %rax, -80(%rbp) #12.9[spill]
|
||||
movq %rdi, -88(%rbp) #12.9[spill]
|
||||
movl %eax, -72(%rbp) #9.5[spill]
|
||||
movq %rsi, -48(%rbp) #9.5[spill]
|
||||
movq %rdx, -64(%rbp) #9.5[spill]
|
||||
movq %r15, -96(%rbp) #9.5[spill]
|
||||
movq %r14, -56(%rbp) #9.5[spill]
|
||||
movq %r13, -104(%rbp) #9.5[spill]
|
||||
movq %r12, -112(%rbp) #9.5[spill]
|
||||
.cfi_escape 0x10, 0x0c, 0x03, 0x76, 0x90, 0x7f
|
||||
# LOE rcx r8 r9 r10 r11 edi xmm1 ymm0
|
||||
..B1.5: # Preds ..B1.19 ..B1.4
|
||||
# Execution count [5.00e+00]
|
||||
cmpq $2, %rcx #12.22
|
||||
jle ..B1.19 # Prob 50% #12.22
|
||||
# LOE rcx r8 r9 r10 r11 edi xmm1 ymm0
|
||||
..B1.6: # Preds ..B1.5
|
||||
# Execution count [4.50e+00]
|
||||
cmpl $16, %edi #12.9
|
||||
jl ..B1.26 # Prob 10% #12.9
|
||||
# LOE rcx r8 r9 r10 r11 edi xmm1 ymm0
|
||||
..B1.7: # Preds ..B1.6
|
||||
# Execution count [4.50e+00]
|
||||
movl -72(%rbp), %r14d #12.9[spill]
|
||||
xorl %edx, %edx #12.9
|
||||
movq -80(%rbp), %r12 #13.13[spill]
|
||||
lea (%r11,%r8), %rax #13.13
|
||||
# LOE rax rdx rcx r8 r9 r10 r11 r12 edi r14d xmm1 ymm0
|
||||
..B1.8: # Preds ..B1.8 ..B1.7
|
||||
# Execution count [2.50e+01]
|
||||
vmovupd %ymm0, 8(%rax,%rdx,8) #13.13
|
||||
vmovupd %ymm0, 40(%rax,%rdx,8) #13.13
|
||||
vmovupd %ymm0, 72(%rax,%rdx,8) #13.13
|
||||
vmovupd %ymm0, 104(%rax,%rdx,8) #13.13
|
||||
addq $16, %rdx #12.9
|
||||
cmpq %r12, %rdx #12.9
|
||||
jb ..B1.8 # Prob 82% #12.9
|
||||
# LOE rax rdx rcx r8 r9 r10 r11 r12 edi r14d xmm1 ymm0
|
||||
..B1.10: # Preds ..B1.8 ..B1.26
|
||||
# Execution count [5.00e+00]
|
||||
lea 1(%r14), %eax #12.9
|
||||
cmpl %edi, %eax #12.9
|
||||
ja ..B1.19 # Prob 50% #12.9
|
||||
# LOE rcx r8 r9 r10 r11 edi r14d xmm1 ymm0
|
||||
..B1.11: # Preds ..B1.10
|
||||
# Execution count [4.50e+00]
|
||||
movslq %r14d, %r14 #12.9
|
||||
movq -88(%rbp), %r13 #12.9[spill]
|
||||
subq %r14, %r13 #12.9
|
||||
cmpq $4, %r13 #12.9
|
||||
jl ..B1.25 # Prob 10% #12.9
|
||||
# LOE rcx r8 r9 r10 r11 r13 r14 edi xmm1 ymm0
|
||||
..B1.12: # Preds ..B1.11
|
||||
# Execution count [4.50e+00]
|
||||
movl %r13d, %r15d #12.9
|
||||
lea (%r11,%r8), %rax #13.13
|
||||
andl $-4, %r15d #12.9
|
||||
xorl %edx, %edx #12.9
|
||||
movslq %r15d, %r15 #12.9
|
||||
lea (%rax,%r14,8), %rax #13.13
|
||||
# LOE rax rdx rcx r8 r9 r10 r11 r13 r14 r15 edi xmm1 ymm0
|
||||
..B1.13: # Preds ..B1.13 ..B1.12
|
||||
# Execution count [2.50e+01]
|
||||
vmovupd %ymm0, 8(%rax,%rdx,8) #13.13
|
||||
addq $4, %rdx #12.9
|
||||
cmpq %r15, %rdx #12.9
|
||||
jb ..B1.13 # Prob 82% #12.9
|
||||
# LOE rax rdx rcx r8 r9 r10 r11 r13 r14 r15 edi xmm1 ymm0
|
||||
..B1.15: # Preds ..B1.13 ..B1.25
|
||||
# Execution count [5.00e+00]
|
||||
cmpq %r13, %r15 #12.9
|
||||
jae ..B1.19 # Prob 10% #12.9
|
||||
# LOE rcx r8 r9 r10 r11 r13 r14 r15 edi xmm1 ymm0
|
||||
..B1.16: # Preds ..B1.15
|
||||
# Execution count [4.50e+00]
|
||||
movq -56(%rbp), %rax #13.49[spill]
|
||||
lea (%r11,%r8), %r12 #13.13
|
||||
movq -64(%rbp), %rsi #13.25[spill]
|
||||
lea (%r9,%r8), %rdx #13.61
|
||||
lea (%r12,%r14,8), %r12 #13.13
|
||||
addq %r8, %rax #13.49
|
||||
addq %r8, %rsi #13.25
|
||||
lea (%rdx,%r14,8), %rdx #13.61
|
||||
lea (%rax,%r14,8), %rax #13.49
|
||||
lea (%rsi,%r14,8), %r14 #13.25
|
||||
# LOE rax rdx rcx r8 r9 r10 r11 r12 r13 r14 r15 edi xmm1 ymm0
|
||||
movl $111, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
..B1.17: # Preds ..B1.17 ..B1.16
|
||||
# Execution count [2.50e+01]
|
||||
vmovsd (%r14,%r15,8), %xmm2 #13.25
|
||||
vaddsd 16(%r14,%r15,8), %xmm2, %xmm3 #13.37
|
||||
vaddsd 8(%rax,%r15,8), %xmm3, %xmm4 #13.49
|
||||
vaddsd 8(%rdx,%r15,8), %xmm4, %xmm5 #13.61
|
||||
vmulsd %xmm5, %xmm1, %xmm6 #13.74
|
||||
vmovsd %xmm6, 8(%r12,%r15,8) #13.13
|
||||
incq %r15 #12.9
|
||||
cmpq %r13, %r15 #12.9
|
||||
jb ..B1.17 # Prob 82% #12.9
|
||||
movl $222, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
# LOE rax rdx rcx r8 r9 r10 r11 r12 r13 r14 r15 edi xmm1 ymm0
|
||||
..B1.19: # Preds ..B1.17 ..B1.5 ..B1.10 ..B1.15
|
||||
# Execution count [5.00e+00]
|
||||
incq %r10 #9.5
|
||||
lea (%r8,%rcx,8), %r8 #9.5
|
||||
cmpq -48(%rbp), %r10 #9.5[spill]
|
||||
jb ..B1.5 # Prob 82% #9.5
|
||||
# LOE rcx r8 r9 r10 r11 edi xmm1 ymm0
|
||||
..B1.20: # Preds ..B1.19
|
||||
# Execution count [9.00e-01]
|
||||
movq -64(%rbp), %rdx #[spill]
|
||||
movq -96(%rbp), %r15 #[spill]
|
||||
movq -56(%rbp), %r14 #[spill]
|
||||
movq -104(%rbp), %r13 #[spill]
|
||||
movq -112(%rbp), %r12 #[spill]
|
||||
.cfi_restore 12
|
||||
# LOE rdx r11 r12 r13 r14 r15
|
||||
..B1.21: # Preds ..B1.3 ..B1.20
|
||||
# Execution count [1.00e+00]
|
||||
addq $8, %rdx #16.5
|
||||
addq $8, %r11 #16.5
|
||||
movq %rdx, %rdi #16.5
|
||||
movq %r11, %rsi #16.5
|
||||
vzeroupper #16.5
|
||||
..___tag_value_jacobi2D5pt.12:
|
||||
# dummy(double *, double *)
|
||||
call dummy #16.5
|
||||
..___tag_value_jacobi2D5pt.13:
|
||||
# LOE r12 r13 r14 r15
|
||||
..B1.22: # Preds ..B1.21
|
||||
# Execution count [1.00e+00]
|
||||
movq %r15, %rdx #16.5
|
||||
movq %r13, %rax #16.5
|
||||
addq $31, %rax #16.5
|
||||
andq $-32, %rax #16.5
|
||||
addq %rax, %rsp #16.5
|
||||
# LOE r12 r13 r14
|
||||
..B1.23: # Preds ..B1.22
|
||||
# Execution count [1.00e+00]
|
||||
movq %r14, %rdx #16.5
|
||||
movq %r13, %rax #16.5
|
||||
addq $31, %rax #16.5
|
||||
andq $-32, %rax #16.5
|
||||
addq %rax, %rsp #16.5
|
||||
# LOE r12
|
||||
..B1.24: # Preds ..B1.23
|
||||
# Execution count [1.00e+00]
|
||||
lea -24(%rbp), %rsp #17.1
|
||||
.cfi_restore 15
|
||||
popq %r15 #17.1
|
||||
.cfi_restore 14
|
||||
popq %r14 #17.1
|
||||
.cfi_restore 13
|
||||
popq %r13 #17.1
|
||||
popq %rbp #17.1
|
||||
.cfi_restore 6
|
||||
movq %rbx, %rsp #17.1
|
||||
popq %rbx #17.1
|
||||
.cfi_def_cfa 7, 8
|
||||
.cfi_restore 3
|
||||
ret #17.1
|
||||
.cfi_def_cfa 3, 16
|
||||
.cfi_offset 3, -16
|
||||
.cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00
|
||||
.cfi_escape 0x10, 0x0c, 0x03, 0x76, 0x90, 0x7f
|
||||
.cfi_escape 0x10, 0x0d, 0x02, 0x76, 0x78
|
||||
.cfi_escape 0x10, 0x0e, 0x02, 0x76, 0x70
|
||||
.cfi_escape 0x10, 0x0f, 0x02, 0x76, 0x68
|
||||
# LOE
|
||||
..B1.25: # Preds ..B1.11
|
||||
# Execution count [4.50e-01]: Infreq
|
||||
xorl %r15d, %r15d #12.9
|
||||
jmp ..B1.15 # Prob 100% #12.9
|
||||
# LOE rcx r8 r9 r10 r11 r13 r14 r15 edi xmm1 ymm0
|
||||
..B1.26: # Preds ..B1.6
|
||||
# Execution count [4.50e-01]: Infreq
|
||||
xorl %r14d, %r14d #12.9
|
||||
jmp ..B1.10 # Prob 100% #12.9
|
||||
.align 16,0x90
|
||||
# LOE rcx r8 r9 r10 r11 edi r14d xmm1 ymm0
|
||||
.cfi_endproc
|
||||
# mark_end;
|
||||
.type jacobi2D5pt,@function
|
||||
.size jacobi2D5pt,.-jacobi2D5pt
|
||||
.data
|
||||
# -- End jacobi2D5pt
|
||||
.data
|
||||
.section .note.GNU-stack, ""
|
||||
// -- Begin DWARF2 SEGMENT .eh_frame
|
||||
.section .eh_frame,"a",@progbits
|
||||
.eh_frame_seg:
|
||||
.align 8
|
||||
# End
|
||||
@@ -1,16 +0,0 @@
|
||||
|
||||
void jacobi2D5pt(int N, int M){
|
||||
void dummy(double*, double*);
|
||||
double a[M][N];
|
||||
double b[M][N];
|
||||
double s;
|
||||
|
||||
for(int j=1; j<M-1; ++j){
|
||||
#pragma vector aligned
|
||||
//STARTLOOP
|
||||
for(int i=1; i<N-1; ++i){
|
||||
b[j][i] = ( a[j][i-1] + a[j][i+1] + a[j-1][i] + a[j+1][i]) * s;
|
||||
}
|
||||
}
|
||||
dummy(&a[1][1], &b[1][1]);
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
|
||||
void daxpy(int N){
|
||||
void dummy(double*, double*);
|
||||
double a[N], b[N];
|
||||
double s;
|
||||
|
||||
//STARTLOOP
|
||||
for(int i=0; i<N; ++i)
|
||||
a[i] = a[i] + s * b[i];
|
||||
|
||||
dummy(&a[1], &b[1]);
|
||||
}
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
|
||||
void scale(int N){
|
||||
void dummy(double*, double*);
|
||||
double a[N], b[N];
|
||||
double s;
|
||||
|
||||
//STARTLOOP
|
||||
for(int i=0; i<N; ++i){
|
||||
a[i] = s * b[i];
|
||||
}
|
||||
|
||||
dummy(&a[1],&b[1]);
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,199 +0,0 @@
|
||||
# mark_description "Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 16.0.3.210 Build 20160415";
|
||||
# mark_description "-I../../iaca-lin64/include -fno-alias -O3 -fopenmp -xCORE-AVX-I -S -o ivb-asm.S";
|
||||
.file "taxCalc.c"
|
||||
.text
|
||||
..TXTST0:
|
||||
# -- Begin main
|
||||
.text
|
||||
# mark_begin;
|
||||
.align 16,0x90
|
||||
.globl main
|
||||
# --- main(void)
|
||||
main:
|
||||
..B1.1: # Preds ..B1.0
|
||||
.cfi_startproc
|
||||
..___tag_value_main.1:
|
||||
..L2:
|
||||
#4.15
|
||||
pushq %rbp #4.15
|
||||
.cfi_def_cfa_offset 16
|
||||
movq %rsp, %rbp #4.15
|
||||
.cfi_def_cfa 6, 16
|
||||
.cfi_offset 6, -16
|
||||
andq $-128, %rsp #4.15
|
||||
subq $4096, %rsp #4.15
|
||||
movl $104446, %esi #4.15
|
||||
movl $3, %edi #4.15
|
||||
call __intel_new_feature_proc_init #4.15
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.10: # Preds ..B1.1
|
||||
vstmxcsr (%rsp) #4.15
|
||||
movl $.2.3_2_kmpc_loc_struct_pack.3, %edi #4.15
|
||||
xorl %esi, %esi #4.15
|
||||
orl $32832, (%rsp) #4.15
|
||||
xorl %eax, %eax #4.15
|
||||
vldmxcsr (%rsp) #4.15
|
||||
..___tag_value_main.6:
|
||||
call __kmpc_begin #4.15
|
||||
..___tag_value_main.7:
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.2: # Preds ..B1.10
|
||||
movl $il0_peep_printf_format_0, %edi #5.5
|
||||
call puts #5.5
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.3: # Preds ..B1.2
|
||||
vmovss .L_2il0floatpacket.0(%rip), %xmm0 #8.15
|
||||
xorl %eax, %eax #11.5
|
||||
vxorps %xmm1, %xmm1, %xmm1 #9.5
|
||||
vmovss %xmm1, (%rsp) #9.5
|
||||
|
||||
movl $111,%ebx #IACA START
|
||||
.byte 100,103,144 #IACA START
|
||||
# LOE rax rbx r12 r13 r14 r15 xmm0 xmm1
|
||||
|
||||
..B1.4: # Preds ..B1.4 ..B1.3
|
||||
lea 1(%rax,%rax), %edx #12.9
|
||||
vcvtsi2ss %edx, %xmm2, %xmm2 #12.27
|
||||
vmulss %xmm2, %xmm0, %xmm3 #12.29
|
||||
lea 2(%rax,%rax), %ecx #12.9
|
||||
vaddss %xmm3, %xmm1, %xmm4 #12.29
|
||||
vxorps %xmm1, %xmm1, %xmm1 #12.27
|
||||
vcvtsi2ss %ecx, %xmm1, %xmm1 #12.27
|
||||
vmulss %xmm1, %xmm0, %xmm5 #12.29
|
||||
vmovss %xmm4, 4(%rsp,%rax,8) #12.9
|
||||
vaddss %xmm5, %xmm4, %xmm1 #12.29
|
||||
vmovss %xmm1, 8(%rsp,%rax,8) #12.9
|
||||
incq %rax #11.5
|
||||
cmpq $499, %rax #11.5
|
||||
jb ..B1.4 # Prob 99% #11.5
|
||||
|
||||
movl $222,%ebx #IACA END
|
||||
.byte 100,103,144 #IACA END
|
||||
|
||||
# LOE rax rbx r12 r13 r14 r15 xmm0 xmm1
|
||||
..B1.5: # Preds ..B1.4
|
||||
vmovss 3992(%rsp), %xmm0 #12.18
|
||||
movl $il0_peep_printf_format_1, %edi #15.5
|
||||
vaddss .L_2il0floatpacket.1(%rip), %xmm0, %xmm1 #12.29
|
||||
vmovss %xmm1, 3996(%rsp) #12.9
|
||||
call puts #15.5
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.6: # Preds ..B1.5
|
||||
movl $.2.3_2_kmpc_loc_struct_pack.14, %edi #16.12
|
||||
xorl %eax, %eax #16.12
|
||||
..___tag_value_main.8:
|
||||
call __kmpc_end #16.12
|
||||
..___tag_value_main.9:
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.7: # Preds ..B1.6
|
||||
xorl %eax, %eax #16.12
|
||||
movq %rbp, %rsp #16.12
|
||||
popq %rbp #16.12
|
||||
.cfi_def_cfa 7, 8
|
||||
.cfi_restore 6
|
||||
ret #16.12
|
||||
.align 16,0x90
|
||||
.cfi_endproc
|
||||
# LOE
|
||||
# mark_end;
|
||||
.type main,@function
|
||||
.size main,.-main
|
||||
.data
|
||||
.align 4
|
||||
.align 4
|
||||
.2.3_2_kmpc_loc_struct_pack.3:
|
||||
.long 0
|
||||
.long 2
|
||||
.long 0
|
||||
.long 0
|
||||
.quad .2.3_2__kmpc_loc_pack.2
|
||||
.align 4
|
||||
.2.3_2__kmpc_loc_pack.2:
|
||||
.byte 59
|
||||
.byte 117
|
||||
.byte 110
|
||||
.byte 107
|
||||
.byte 110
|
||||
.byte 111
|
||||
.byte 119
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 109
|
||||
.byte 97
|
||||
.byte 105
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 52
|
||||
.byte 59
|
||||
.byte 52
|
||||
.byte 59
|
||||
.byte 59
|
||||
.space 1, 0x00 # pad
|
||||
.align 4
|
||||
.2.3_2_kmpc_loc_struct_pack.14:
|
||||
.long 0
|
||||
.long 2
|
||||
.long 0
|
||||
.long 0
|
||||
.quad .2.3_2__kmpc_loc_pack.13
|
||||
.align 4
|
||||
.2.3_2__kmpc_loc_pack.13:
|
||||
.byte 59
|
||||
.byte 117
|
||||
.byte 110
|
||||
.byte 107
|
||||
.byte 110
|
||||
.byte 111
|
||||
.byte 119
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 109
|
||||
.byte 97
|
||||
.byte 105
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 49
|
||||
.byte 54
|
||||
.byte 59
|
||||
.byte 49
|
||||
.byte 54
|
||||
.byte 59
|
||||
.byte 59
|
||||
.section .rodata.str1.4, "aMS",@progbits,1
|
||||
.align 4
|
||||
.align 4
|
||||
il0_peep_printf_format_0:
|
||||
.long 1128354639
|
||||
.long 1702109249
|
||||
.long 1931506803
|
||||
.long 1953653108
|
||||
.byte 0
|
||||
.space 3, 0x00 # pad
|
||||
.align 4
|
||||
il0_peep_printf_format_1:
|
||||
.long 1128354639
|
||||
.long 1702109249
|
||||
.long 1696625779
|
||||
.word 25710
|
||||
.byte 0
|
||||
.data
|
||||
# -- End main
|
||||
.section .rodata, "a"
|
||||
.align 4
|
||||
.align 4
|
||||
.L_2il0floatpacket.0:
|
||||
.long 0x3e428f5c
|
||||
.type .L_2il0floatpacket.0,@object
|
||||
.size .L_2il0floatpacket.0,4
|
||||
.align 4
|
||||
.L_2il0floatpacket.1:
|
||||
.long 0x433dcf5c
|
||||
.type .L_2il0floatpacket.1,@object
|
||||
.size .L_2il0floatpacket.1,4
|
||||
.data
|
||||
.section .note.GNU-stack, ""
|
||||
// -- Begin DWARF2 SEGMENT .eh_frame
|
||||
.section .eh_frame,"a",@progbits
|
||||
.eh_frame_seg:
|
||||
.align 8
|
||||
# End
|
||||
@@ -1,18 +0,0 @@
|
||||
#include <stdio.h>
|
||||
//#include "iacaMarks.h"
|
||||
|
||||
int main(void){
|
||||
printf("OSACA test start\n");
|
||||
int i = 1;
|
||||
float arr[1000];
|
||||
float tax = 0.19;
|
||||
arr[0] = 0;
|
||||
//STARTLOOP
|
||||
while(i < 1000){
|
||||
arr[i] = arr[i-1]+i*tax;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
printf("OSACA test end\n");
|
||||
return 0;
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
|
||||
void triad(int N){
|
||||
void dummy(double*);
|
||||
double a[N], b[N], c[N], d[N];
|
||||
double s;
|
||||
|
||||
//STARTLOOP
|
||||
for(int i=0; i<N; ++i)
|
||||
a[i] = b[i] + c[i] * d[i];
|
||||
|
||||
dummy(&a[1]);
|
||||
}
|
||||
@@ -1,2 +1,10 @@
|
||||
"""Open Source Architecture Code Analyzer"""
|
||||
name = 'osaca'
|
||||
__version__ = '0.2.2'
|
||||
__version__ = '0.3.1.dev1'
|
||||
|
||||
# To trigger travis deployment to pypi, do the following:
|
||||
# 1. Increment __version___
|
||||
# 2. commit to RRZE-HPC/osaca's master branch
|
||||
# 3. wait for travis to complete successful (unless already tested)
|
||||
# 4. tag commit with 'v{}'.format(__version__) (`git tag vX.Y.Z`)
|
||||
# 5. push tag to github (`git push origin vX.Y.Z` or push all tags with `git push --tags`)
|
||||
|
||||
8
osaca/api/__init__.py
Normal file
8
osaca/api/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
APIs for handling interfaces to kerncraft, etc.
|
||||
|
||||
Only the classes below will be exported, so please add new semantic tools to __all__.
|
||||
"""
|
||||
from .kerncraft_interface import KerncraftAPI
|
||||
|
||||
__all__ = ['KerncraftAPI']
|
||||
80
osaca/api/kerncraft_interface.py
Executable file
80
osaca/api/kerncraft_interface.py
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import collections
|
||||
import sys
|
||||
from io import StringIO
|
||||
|
||||
from osaca.frontend import Frontend
|
||||
from osaca.parser import ParserAArch64v81, ParserX86ATT
|
||||
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel,
|
||||
SemanticsAppender, reduce_to_section)
|
||||
|
||||
|
||||
# Stolen from https://stackoverflow.com/a/16571630
|
||||
class Capturing(list):
|
||||
def __enter__(self):
|
||||
self._stdout = sys.stdout
|
||||
sys.stdout = self._stringio = StringIO()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.extend(self._stringio.getvalue().splitlines())
|
||||
del self._stringio # free up some memory
|
||||
sys.stdout = self._stdout
|
||||
|
||||
|
||||
class KerncraftAPI(object):
|
||||
def __init__(self, arch, code):
|
||||
self.machine_model = MachineModel(arch=arch)
|
||||
self.semantics = SemanticsAppender(self.machine_model)
|
||||
isa = self.machine_model.get_ISA().lower()
|
||||
if isa == 'aarch64':
|
||||
self.parser = ParserAArch64v81()
|
||||
elif isa == 'x86':
|
||||
self.parser = ParserX86ATT()
|
||||
|
||||
parsed_code = self.parser.parse_file(code)
|
||||
self.kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA())
|
||||
self.semantics.add_semantics(self.kernel)
|
||||
|
||||
def create_output(self, verbose=False):
|
||||
kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
|
||||
frontend = Frontend(arch=self.machine_model.get_arch())
|
||||
with Capturing() as output:
|
||||
frontend.print_full_analysis(self.kernel, kernel_graph, verbose=verbose)
|
||||
return '\n'.join(output)
|
||||
|
||||
def get_unmatched_instruction_ratio(self):
|
||||
unmatched_counter = 0
|
||||
for instruction in self.kernel:
|
||||
if (
|
||||
INSTR_FLAGS.TP_UNKWN in instruction['flags']
|
||||
and INSTR_FLAGS.LT_UNKWN in instruction['flags']
|
||||
):
|
||||
unmatched_counter += 1
|
||||
return unmatched_counter / len(self.kernel)
|
||||
|
||||
def get_port_occupation_cycles(self):
|
||||
throughput_values = self.semantics.get_throughput_sum(self.kernel)
|
||||
port_names = self.machine_model['ports']
|
||||
return collections.OrderedDict(list(zip(port_names, throughput_values)))
|
||||
|
||||
def get_total_throughput(self):
|
||||
return max(self.semantics.get_throughput_sum(self.kernel))
|
||||
|
||||
def get_latency(self):
|
||||
return (self.get_lcd(), self.get_cp())
|
||||
|
||||
def get_cp(self):
|
||||
kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
|
||||
kernel_cp = kernel_graph.get_critical_path()
|
||||
return sum([x['latency_cp'] for x in kernel_cp])
|
||||
|
||||
def get_lcd(self):
|
||||
kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
|
||||
lcd_dict = kernel_graph.get_loopcarried_dependencies()
|
||||
lcd = 0.0
|
||||
for dep in lcd_dict:
|
||||
lcd_tmp = sum([x['latency_lcd'] for x in lcd_dict[dep]['dependencies']])
|
||||
lcd = lcd_tmp if lcd_tmp > lcd else lcd
|
||||
return lcd
|
||||
@@ -1,41 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from param import Register, MemAddr, Parameter
|
||||
from testcase import Testcase
|
||||
|
||||
# Choose out of various operands
|
||||
reg8 = Register('al')
|
||||
reg16 = Register('ax')
|
||||
reg32 = Register('eax')
|
||||
reg64 = Register('rax')
|
||||
xmm = Register('xmm0')
|
||||
ymm = Register('ymm0')
|
||||
zmm = Register('zmm0')
|
||||
mem0 = MemAddr('(%rax, %esi, 4)')
|
||||
imd1 = Parameter('IMD')
|
||||
|
||||
|
||||
# -----------------------------------------------
|
||||
# -USER INPUT------------------------------------
|
||||
# -----------------------------------------------
|
||||
# Enter your mnemonic
|
||||
mnemonic = 'add'
|
||||
|
||||
# Define your operands. If you don't need it, just type in None
|
||||
dst = mem0
|
||||
op1 = imd1
|
||||
op2 = None
|
||||
|
||||
# Define the number of instructions per loop (default: 12)
|
||||
per_loop = '32'
|
||||
|
||||
# -----------------------------------------------
|
||||
# -----------------------------------------------
|
||||
|
||||
# Start
|
||||
operands = [x for x in [dst, op1, op2] if x is not None]
|
||||
opListStr = ', '.join([str(x) for x in operands])
|
||||
print('Create Testcase for {} {}'.format(mnemonic, opListStr), end='')
|
||||
tc = Testcase(mnemonic, operands, per_loop)
|
||||
tc.write_testcase()
|
||||
print(' --------> SUCCEEDED')
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
540
osaca/data/csx.yml
Normal file
540
osaca/data/csx.yml
Normal file
@@ -0,0 +1,540 @@
|
||||
osaca_version: 0.3.2
|
||||
micro_architecture: Cascade Lake SP
|
||||
arch_code: CSX
|
||||
isa: x86
|
||||
ROB_size: 224
|
||||
retired_uOps_per_cycle: 4
|
||||
scheduler_size: 97
|
||||
hidden_loads: false
|
||||
load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0}
|
||||
load_throughput:
|
||||
- {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, index: ~, offset: imd, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, index: ~, offset: imd, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, index: gpr, offset: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, index: gpr, offset: ~, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
ports: ['0', 0DV, '1', '2', 2D, '3', 3D, '4', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
┌------------------------------------------------------------------------┐
|
||||
| 97 entry unified scheduler |
|
||||
└------------------------------------------------------------------------┘
|
||||
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||
▼ ▼ ▼ ▼ ▼ ▼ ▼ ▼
|
||||
┌-------┐ ┌-------┐ ┌-----┐ ┌-----┐ ┌-----┐ ┌-------┐ ┌--------┐ ┌-----┐
|
||||
| ALU | | ALU | | LD | | LD | | ST | | ALU | | ALU & | | AGU |
|
||||
└-------┘ └-------┘ └-----┘ └-----┘ └-----┘ └-------┘ | Shift | └-----┘
|
||||
┌-------┐ ┌-------┐ ┌-----┐ ┌-----┐ ┌-------┐ └--------┘
|
||||
| 2ND | | Fast | | AGU | | AGU | | Fast |
|
||||
| BRANCH| | LEA | └-----┘ └-----┘ | LEA |
|
||||
└-------┘ └-------┘ └-------┘
|
||||
┌-------┐ ┌-------┐ ┌-------┐
|
||||
|AVX DIV| |AVX FMA| | AVX |
|
||||
└-------┘ └-------┘ | SHUF |
|
||||
┌-------┐ ┌-------┐ └-------┘
|
||||
|AVX FMA| |AVX MUL| ┌-------┐
|
||||
└-------┘ └-------┘ |AVX-512|
|
||||
┌-------┐ ┌-------┐ | FMA |
|
||||
|AVX MUL| |AVX ADD| └-------┘
|
||||
└-------┘ └-------┘ ┌-------┐
|
||||
┌-------┐ ┌-------┐ |AVX-512|
|
||||
|AVX ADD| |AVX ALU| | ADD |
|
||||
└-------┘ └-------┘ └-------┘
|
||||
┌-------┐ ┌-------┐ ┌-------┐
|
||||
|AVX ALU| | AVX | |AVX-512|
|
||||
└-------┘ | Shift | | MUL |
|
||||
┌-------┐ └-------┘ └-------┘
|
||||
| AVX | ┌-------┐ ┌-------┐
|
||||
| Shift | | Slow | |AVX-512|
|
||||
└-------┘ | LEA | | ALU |
|
||||
┌-------┐ └-------┘ └-------┘
|
||||
| VNNI | ┌-------┐
|
||||
└-------┘ | VNNI |
|
||||
└-------┘
|
||||
instruction_forms:
|
||||
- name: addsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: addss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: addl
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: addq
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: cmpl
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: ~ # 1*p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: cmpq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: ~ # 1*p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: incq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: ~ # 1*p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: ja
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: jb
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: jne
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: mulsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: mulss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: movl
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: movq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: movq
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p23+1*p2D3D
|
||||
port_pressure: [[1, '23'], [1, [2D, 3D]]]
|
||||
- name: movq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p23+1*p4
|
||||
port_pressure: [[1, '23'], [1, '4']]
|
||||
- name: rcpss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 1.0
|
||||
latency: 4.0
|
||||
port_pressure: ~
|
||||
- name: sqrtsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 6.0
|
||||
latency: 22.0 # 1*p0+6*p0DV
|
||||
port_pressure: [[1, '0'], [6.0, [0DV]]]
|
||||
- name: sqrtss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 3.0
|
||||
latency: 16.0 # 1*p0+3*p0DV
|
||||
port_pressure: [[1, '0'], [3.0, [0DV]]]
|
||||
- name: subq
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p0156
|
||||
port_pressure: [[1, '0156']]
|
||||
- name: vaddpd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vaddpd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vaddss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vdivsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 4.0
|
||||
latency: 14.0 # 1*p0+4*p0DV
|
||||
port_pressure: [[1, '0'], [4.0, [0DV]]]
|
||||
- name: vdivss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 3.0
|
||||
latency: 11.0 # 1*p0+3*p0DV
|
||||
port_pressure: [[1, '0'], [3.0, [0DV]]]
|
||||
- name: vfmadd213pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vfmadd132pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vfmadd231pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vfmadd132pd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vfmadd213pd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vfmadd231pd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p23+1*p2D3D
|
||||
port_pressure: [[1, '23'], [1, [2D, 3D]]]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p23+1*p2D3D
|
||||
port_pressure: [[1, '23'], [1, [2D, 3D]]]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p23+1*p4
|
||||
port_pressure: [[1, '23'], [1, '4']]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 1*p23+1*p4
|
||||
port_pressure: [[1, '23'], [1, '4']]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 1*p23+1*p4
|
||||
port_pressure: [[1, '23'], [1, '4']]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 1*p23+1*p4
|
||||
port_pressure: [[1, '23'], [1, '4']]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 1*p23+1*p4
|
||||
port_pressure: [[1, '23'], [1, '4']]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p23+1*p2D3D
|
||||
port_pressure: [[1, '23'], [1, [2D, 3D]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p23+1*p4
|
||||
port_pressure: [[1, '23'], [1, '4']]
|
||||
File diff suppressed because it is too large
Load Diff
374
osaca/data/isa/aarch64.yml
Normal file
374
osaca/data/isa/aarch64.yml
Normal file
@@ -0,0 +1,374 @@
|
||||
osaca_version: 0.3.0
|
||||
isa: "AArch64"
|
||||
# Contains all operand-irregular instruction forms OSACA supports for AArch64.
|
||||
# Operand-regular for a AArch64 instruction form with N operands in the shape of
|
||||
# mnemonic op1 ... opN
|
||||
# means that op1 is the only destination operand and op2 to op(N) are source operands.
|
||||
instruction_forms:
|
||||
- name: "fmla"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "s"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "s"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "s"
|
||||
source: true
|
||||
destination: false
|
||||
- name: "fmla"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "d"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: true
|
||||
- name: "stp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "stp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "stp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "stp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: True
|
||||
source: false
|
||||
destination: true
|
||||
- name: "stp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "x"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: "x"
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "x"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "x"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "x"
|
||||
offset: ~
|
||||
index: "x"
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
218
osaca/data/isa/x86.yml
Normal file
218
osaca/data/isa/x86.yml
Normal file
@@ -0,0 +1,218 @@
|
||||
osaca_version: 0.3.0
|
||||
isa: "x86"
|
||||
# Contains all operand-irregular instruction forms OSACA supports for x86.
|
||||
# Operand-regular for a x86 AT&T instruction form with N operands in the shape of
|
||||
# mnemonic op1 ... opN
|
||||
# means that opN is the only destination operand and op1 to op(N-1) are source operands.
|
||||
instruction_forms:
|
||||
- name: addl
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: addq
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: addsd
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- name: addss
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- name: cmpl
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- name: cmpq
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- name: cmpq
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
source: true
|
||||
destination: false
|
||||
- name: ja
|
||||
operands:
|
||||
- class: "identifier"
|
||||
source: true
|
||||
destination: false
|
||||
- name: mulsd
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- name: mulss
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- name: subq
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd132pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: ~
|
||||
index: "gpr"
|
||||
scale: 1
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd132pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: "imd"
|
||||
index: "gpr"
|
||||
scale: 1
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd213pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: ~
|
||||
index: "gpr"
|
||||
scale: 1
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd213pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: "imd"
|
||||
index: "gpr"
|
||||
scale: 1
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd231pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: "imd"
|
||||
index: "gpr"
|
||||
scale: 1
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd231pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: ~
|
||||
index: "gpr"
|
||||
scale: 1
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: true
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,222 +1,202 @@
|
||||
#!/usr/bin/env python3
|
||||
from collections import defaultdict, OrderedDict
|
||||
import xml.etree.ElementTree as ET
|
||||
import re
|
||||
import sys
|
||||
import argparse
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from distutils.version import StrictVersion
|
||||
|
||||
from osaca.param import Parameter, Register
|
||||
from osaca.eu_sched import Scheduler
|
||||
from osaca.parser import get_parser
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
def normalize_reg_name(reg_name):
|
||||
# strip spaces
|
||||
reg_name = reg_name.strip()
|
||||
# masks are denoted with curly brackets in uops.info
|
||||
reg_name = re.sub(r'{K([0-7])}', r'K\1', reg_name)
|
||||
reg_name = re.sub(r'ST\(([0-7])\)', r'ST\1', reg_name)
|
||||
return reg_name
|
||||
def port_pressure_from_tag_attributes(attrib):
|
||||
# '1*p015+1*p1+1*p23+1*p4+3*p5' ->
|
||||
# [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']]
|
||||
port_occupation = []
|
||||
for p in attrib['ports'].split('+'):
|
||||
cycles, ports = p.split('*p')
|
||||
port_occupation.append([int(cycles), ports])
|
||||
|
||||
|
||||
def port_occupancy_from_tag_attributes(attrib, arch):
|
||||
occupancy = defaultdict(int)
|
||||
for k, v in attrib.items():
|
||||
m = re.match('^port([0-9]+)', k)
|
||||
if not m:
|
||||
continue
|
||||
ports = m.group(1)
|
||||
# Ignore Port7 on HSW, BDW, SKL and SKX if present in combination with ports 2 and 3.
|
||||
# Port7 is only used for simple address generation, while 2 and 3 handle all addressing,
|
||||
# but uops.info does not differentiate.
|
||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX'] and ports == '237':
|
||||
ports = ports.replace('7', '')
|
||||
potential_ports = list(ports)
|
||||
per_port_occupancy = int(v) / len(potential_ports)
|
||||
for pp in potential_ports:
|
||||
occupancy[pp] += per_port_occupancy
|
||||
|
||||
# Also consider DIV pipeline
|
||||
# Also
|
||||
if 'div_cycles' in attrib:
|
||||
occupancy['0DV'] = int(attrib['div_cycles'])
|
||||
port_occupation.append([int(attrib['div_cycles']), ['DIV']])
|
||||
|
||||
return dict(occupancy)
|
||||
return port_occupation
|
||||
|
||||
|
||||
def extract_paramters(instruction_tag):
|
||||
def extract_paramters(instruction_tag, parser, isa):
|
||||
# Extract parameter components
|
||||
parameters = [] # used to store string representations
|
||||
parameter_tags = sorted(instruction_tag.findall("operand"),
|
||||
key=lambda p: int(p.attrib['idx']))
|
||||
parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib['idx']))
|
||||
for parameter_tag in parameter_tags:
|
||||
parameter = {}
|
||||
# Ignore parameters with suppressed=1
|
||||
if int(parameter_tag.attrib.get('suppressed', '0')):
|
||||
continue
|
||||
|
||||
p_type = parameter_tag.attrib['type']
|
||||
if p_type == 'imm':
|
||||
parameters.append('imd') # Parameter('IMD')
|
||||
parameter['class'] = 'immediate'
|
||||
parameter['imd'] = 'int'
|
||||
parameters.append(parameter)
|
||||
elif p_type == 'mem':
|
||||
parameters.append('mem') # Parameter('MEM')
|
||||
parameter['class'] = 'memory'
|
||||
parameter['base'] = 'gpr'
|
||||
parameter['offset'] = None
|
||||
parameter['index'] = None
|
||||
parameter['scale'] = 1
|
||||
parameters.append(parameter)
|
||||
elif p_type == 'reg':
|
||||
possible_regs = [normalize_reg_name(r)
|
||||
for r in parameter_tag.text.split(',')]
|
||||
reg_groups = [Register.sizes.get(r, None) for r in possible_regs]
|
||||
if reg_groups[1:] == reg_groups[:-1]:
|
||||
if reg_groups[0] is None:
|
||||
raise ValueError("Unknown register type for {} with {}.".format(
|
||||
parameter_tag.attrib, parameter_tag.text))
|
||||
elif reg_groups[0][1] == 'GPR':
|
||||
parameters.append('r{}'.format(reg_groups[0][0]))
|
||||
# Register(possible_regs[0]))
|
||||
elif '{' in parameter_tag.text:
|
||||
# We have a mask
|
||||
parameters[-1] += '{opmask}'
|
||||
parameter['class'] = 'register'
|
||||
possible_regs = [parser.parse_register('%' + r) for r in parameter_tag.text.split(',')]
|
||||
if possible_regs[0] is None:
|
||||
raise ValueError(
|
||||
'Unknown register type for {} with {}.'.format(
|
||||
parameter_tag.attrib, parameter_tag.text
|
||||
)
|
||||
)
|
||||
if isa == 'x86':
|
||||
if parser.is_vector_register(possible_regs[0]['register']):
|
||||
possible_regs[0]['register']['name'] = possible_regs[0]['register'][
|
||||
'name'
|
||||
].lower()[:3]
|
||||
if 'mask' in possible_regs[0]['register']:
|
||||
possible_regs[0]['register']['mask'] = True
|
||||
else:
|
||||
parameters.append(reg_groups[0][1].lower())
|
||||
possible_regs[0]['register']['name'] = 'gpr'
|
||||
elif isa == 'aarch64':
|
||||
del possible_regs['register']['name']
|
||||
for key in possible_regs[0]['register']:
|
||||
parameter[key] = possible_regs[0]['register'][key]
|
||||
parameters.append(parameter)
|
||||
elif p_type == 'relbr':
|
||||
parameters.append('LBL')
|
||||
parameter['class'] = 'identifier'
|
||||
parameters.append(parameter)
|
||||
elif p_type == 'agen':
|
||||
parameters.append('mem')
|
||||
# FIXME actually only address generation
|
||||
parameter['class'] = 'memory'
|
||||
parameter['base'] = 'gpr'
|
||||
parameter['offset'] = None
|
||||
parameter['index'] = None
|
||||
parameter['scale'] = 1
|
||||
parameters.append(parameter)
|
||||
parameters.append(parameter)
|
||||
else:
|
||||
raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
|
||||
return parameters
|
||||
|
||||
|
||||
def extract_model(tree, arch):
|
||||
model_data = []
|
||||
for instruction_tag in tree.findall('//instruction'):
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
mm = MachineModel(isa=isa)
|
||||
parser = get_parser(isa)
|
||||
|
||||
for instruction_tag in tree.findall('.//instruction'):
|
||||
ignore = False
|
||||
|
||||
mnemonic = instruction_tag.attrib['asm']
|
||||
|
||||
# Extract parameter components
|
||||
try:
|
||||
parameters = extract_paramters(instruction_tag)
|
||||
parameters = extract_paramters(instruction_tag, parser, isa)
|
||||
if isa == 'x86':
|
||||
parameters.reverse()
|
||||
except ValueError as e:
|
||||
print(e, file=sys.stderr)
|
||||
|
||||
# Extract port occupation, throughput and latency
|
||||
port_occupancy, throughput, latency = [], 0.0, None
|
||||
arch_tag = instruction_tag.find('architecture[@name="'+arch+'"]')
|
||||
port_pressure, throughput, latency, uops = [], None, None, None
|
||||
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
|
||||
if arch_tag is None:
|
||||
continue
|
||||
# We collect all measurement and IACA information and compare them later
|
||||
for measurement_tag in arch_tag.iter('measurement'):
|
||||
port_occupancy.append(port_occupancy_from_tag_attributes(measurement_tag.attrib, arch))
|
||||
# FIXME handle min/max Latencies ('maxCycles' and 'minCycles')
|
||||
latencies = [int(l_tag.attrib['cycles'])
|
||||
for l_tag in measurement_tag.iter('latency') if 'latency' in l_tag.attrib]
|
||||
|
||||
if 'TP_ports' in measurement_tag.attrib:
|
||||
throughput = measurement_tag.attrib['TP_ports']
|
||||
else:
|
||||
throughput = (
|
||||
measurement_tag.attrib['TP'] if 'TP' in measurement_tag.attrib else None
|
||||
)
|
||||
uops = (
|
||||
int(measurement_tag.attrib['uops']) if 'uops' in measurement_tag.attrib else None
|
||||
)
|
||||
if 'ports' in measurement_tag.attrib:
|
||||
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
||||
latencies = [
|
||||
int(l_tag.attrib['cycles'])
|
||||
for l_tag in measurement_tag.iter('latency')
|
||||
if 'cycles' in l_tag.attrib
|
||||
]
|
||||
if len(latencies) == 0:
|
||||
latencies = [
|
||||
int(l_tag.attrib['max_cycles'])
|
||||
for l_tag in measurement_tag.iter('latency')
|
||||
if 'max_cycles' in l_tag.attrib
|
||||
]
|
||||
if latencies[1:] != latencies[:-1]:
|
||||
print("Contradicting latencies found:", mnemonic, file=sys.stderr)
|
||||
ignore = True
|
||||
elif latencies:
|
||||
latency = latencies[0]
|
||||
|
||||
# Ordered by IACA version (newest last)
|
||||
for iaca_tag in sorted(arch_tag.iter('IACA'),
|
||||
key=lambda i: StrictVersion(i.attrib['version'])):
|
||||
port_occupancy.append(port_occupancy_from_tag_attributes(iaca_tag.attrib, arch))
|
||||
if ignore: continue
|
||||
for iaca_tag in sorted(
|
||||
arch_tag.iter('IACA'), key=lambda i: StrictVersion(i.attrib['version'])
|
||||
):
|
||||
if 'ports' in iaca_tag.attrib:
|
||||
port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))
|
||||
if ignore:
|
||||
continue
|
||||
|
||||
# Check if all are equal
|
||||
if port_occupancy:
|
||||
if port_occupancy[1:] != port_occupancy[:-1]:
|
||||
print("Contradicting port occupancies, using latest IACA:", mnemonic,
|
||||
file=sys.stderr)
|
||||
port_occupancy = port_occupancy[-1]
|
||||
throughput = max(list(port_occupancy.values())+[0.0])
|
||||
if port_pressure:
|
||||
if port_pressure[1:] != port_pressure[:-1]:
|
||||
print(
|
||||
"Contradicting port occupancies, using latest IACA:", mnemonic, file=sys.stderr
|
||||
)
|
||||
port_pressure = port_pressure[-1]
|
||||
|
||||
# Add missing ports:
|
||||
for ports in [pp[1] for pp in port_pressure]:
|
||||
for p in ports:
|
||||
mm.add_port(p)
|
||||
|
||||
throughput = max(mm.average_port_pressure(port_pressure))
|
||||
else:
|
||||
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
||||
continue
|
||||
# ---------------------------------------------
|
||||
mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
|
||||
|
||||
for m, p in build_variants(mnemonic, parameters):
|
||||
model_data.append((m.lower() + '-' + '_'.join(p),
|
||||
throughput, latency, port_occupancy))
|
||||
|
||||
return model_data
|
||||
|
||||
|
||||
def all_or_false(iterator):
|
||||
if not iterator:
|
||||
return False
|
||||
else:
|
||||
return all(iterator)
|
||||
|
||||
|
||||
def build_variants(mnemonic, parameters):
|
||||
"""Yield all resonable variants of this instruction form."""
|
||||
# The one that was given
|
||||
mnemonic = mnemonic.upper()
|
||||
yield mnemonic, parameters
|
||||
|
||||
# Without opmask
|
||||
if any(['{opmask}' in p for p in parameters]):
|
||||
yield mnemonic, list([p.replace('{opmask}', '') for p in parameters])
|
||||
|
||||
# With suffix (assuming suffix was not already present)
|
||||
suffixes = {'Q': 'r64',
|
||||
'L': 'r32',
|
||||
'W': 'r16',
|
||||
'B': 'r8'}
|
||||
for s, reg in suffixes.items():
|
||||
if not mnemonic.endswith(s) and all_or_false(
|
||||
[p == reg for p in parameters if p not in ['mem', 'imd']]):
|
||||
yield mnemonic+s, parameters
|
||||
return mm
|
||||
|
||||
|
||||
def architectures(tree):
|
||||
return set([a.attrib['name'] for a in tree.findall('.//architecture')])
|
||||
|
||||
|
||||
def int_or_zero(s):
|
||||
try:
|
||||
return int(s)
|
||||
except ValueError:
|
||||
return 0
|
||||
|
||||
|
||||
def dump_csv(model_data, arch):
|
||||
csv = 'instr,TP,LT,ports\n'
|
||||
ports = set()
|
||||
for mnemonic, throughput, latency, port_occupancy in model_data:
|
||||
for p in port_occupancy:
|
||||
ports.add(p)
|
||||
ports = sorted(ports)
|
||||
# If not all ports have been used (happens with port7 due to blacklist
|
||||
# port_occupancy_from_tag_attributes), extend list accordingly:
|
||||
while len(ports) < Scheduler.arch_dict[arch] + len(Scheduler.arch_pipeline_ports.get(arch, [])):
|
||||
max_index = ports.index(str(max(map(int_or_zero, ports))))
|
||||
ports.insert(max_index + 1, str(max(map(int_or_zero, ports)) + 1))
|
||||
|
||||
for mnemonic, throughput, latency, port_occupancy in model_data:
|
||||
for p in ports:
|
||||
if p not in port_occupancy:
|
||||
port_occupancy[p] = 0.0
|
||||
po_items = sorted(port_occupancy.items())
|
||||
csv_line = '{},{},{},"({})"\n'.format(mnemonic, throughput, latency,
|
||||
','.join([str(c) for p, c in po_items]))
|
||||
csv += csv_line
|
||||
return csv
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('xml', help='path of instructions.xml from http://uops.info')
|
||||
parser.add_argument('arch', nargs='?',
|
||||
help='architecture to extract, use IACA abbreviations (e.g., SNB). '
|
||||
'if not given, all will be extracted and saved to file in CWD.')
|
||||
parser.add_argument(
|
||||
'arch',
|
||||
nargs='?',
|
||||
help='architecture to extract, use IACA abbreviations (e.g., SNB). '
|
||||
'if not given, all will be extracted and saved to file in CWD.',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tree = ET.parse(args.xml)
|
||||
print('Available architectures:', ', '.join(architectures(tree)))
|
||||
if args.arch:
|
||||
model_data = extract_model(tree, args.arch)
|
||||
print(dump_csv(model_data, args.arch))
|
||||
model = extract_model(tree, args.arch)
|
||||
print(model.dump())
|
||||
else:
|
||||
for arch in architectures(tree):
|
||||
model_data = extract_model(tree, arch)
|
||||
with open('{}_data.csv'.format(arch), 'w') as f:
|
||||
f.write(dump_csv(model_data, arch))
|
||||
print(arch, end='')
|
||||
model = extract_model(tree, arch.lower())
|
||||
with open('{}.yml'.format(arch.lower()), 'w') as f:
|
||||
model.dump(f)
|
||||
print('.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
698
osaca/data/tx2.yml
Normal file
698
osaca/data/tx2.yml
Normal file
@@ -0,0 +1,698 @@
|
||||
osaca_version: 0.3.2
|
||||
micro_architecture: Thunder X2
|
||||
arch_code: tx2
|
||||
isa: AArch64
|
||||
ROB_size: 180
|
||||
retired_uOps_per_cycle: 4
|
||||
scheduler_size: 60
|
||||
hidden_loads: false
|
||||
load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 4.0, q: 4.0, v: 4.0}
|
||||
load_throughput:
|
||||
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [1, '34']}
|
||||
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [1, '34']}
|
||||
ports: ['0', 0DV, '1', 1DV, '2', '3', '4', '5']
|
||||
port_model_scheme: |
|
||||
┌-----------------------------------------------------------┐
|
||||
| 60 entry unified scheduler |
|
||||
└-----------------------------------------------------------┘
|
||||
0 | 1 | 2 | 3 | 4 | 5 |
|
||||
▼ ▼ ▼ ▼ ▼ ▼
|
||||
┌------┐ ┌------┐ ┌------┐ ┌------┐ ┌------┐ ┌------┐
|
||||
| ALU | | ALU | | ALU/ | | LD | | LD | | ST |
|
||||
└------┘ └------┘ | BR | └------┘ └------┘ └------┘
|
||||
┌------┐ ┌------┐ └------┘ ┌------┐ ┌------┐
|
||||
| FP/ | | FP/ | | AGU | | AGU |
|
||||
| NEON | | NEON | └------┘ └------┘
|
||||
└------┘ └------┘
|
||||
┌------┐
|
||||
| INT |
|
||||
| MUL/ |
|
||||
| DIV |
|
||||
└------┘
|
||||
┌------┐
|
||||
|CRYPTO|
|
||||
└------┘
|
||||
instruction_forms:
|
||||
- name: add
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p012
|
||||
port_pressure: [[1, '012']]
|
||||
- name: add
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p012
|
||||
port_pressure: [[1, '012']]
|
||||
- name: adds
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p012
|
||||
port_pressure: [[1, '012']]
|
||||
- name: b.ne
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: b.gt
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: bne
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: cmp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: w
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p012
|
||||
port_pressure: [[1, '012']]
|
||||
- name: cmp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p012
|
||||
port_pressure: [[1, '012']]
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fdiv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
throughput: 8.5
|
||||
latency: 16.0 # 1*p01+17*p0DV1DV
|
||||
port_pressure: [[1, '01'], [17.0, [0DV, 1DV]]]
|
||||
- name: fdiv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
throughput: 12.0
|
||||
latency: 23.0 # 1*p01+24*p0DV1DV
|
||||
port_pressure: [[1, '01'], [24.0, [0DV, 1DV]]]
|
||||
- name: fmla
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fmla
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fmov
|
||||
operands:
|
||||
- {class: register, prefix: s}
|
||||
- {class: immediate, imd: double}
|
||||
latency: ~ # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
throughput: 0.5
|
||||
- name: fmul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fmul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fmul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: ~ # 2*p34
|
||||
port_pressure: [[2.0, '34']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: ~ # 2*p34
|
||||
port_pressure: [[2.0, '34']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: ~ # 2*p34
|
||||
port_pressure: [[2.0, '34']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: ~ # 2*p34
|
||||
port_pressure: [[2.0, '34']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: ~ # 2*p34
|
||||
port_pressure: [[2.0, '34']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: ~ # 2*p34
|
||||
port_pressure: [[2.0, '34']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: ~ # 2*p34
|
||||
port_pressure: [[2.0, '34']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p34
|
||||
port_pressure: [[1.0, '34']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p34
|
||||
port_pressure: [[1.0, '34']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: x
|
||||
scale: 8
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p34
|
||||
port_pressure: [[1.0, '34']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: mov
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.5
|
||||
latency: 1.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: mov
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: b
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: b
|
||||
throughput: 0.5
|
||||
latency: 5.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: prfm
|
||||
operands:
|
||||
- class: prfop
|
||||
type: pld
|
||||
target: l1
|
||||
policy: keep
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: ~
|
||||
latency: ~
|
||||
port_pressure: []
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: ~ # 4*p34
|
||||
port_pressure: [[4.0, '34']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: ~ # 4*p34
|
||||
port_pressure: [[4.0, '34']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 2.0
|
||||
latency: ~ # 2*p34+2*p5
|
||||
port_pressure: [[2.0, '34'], [2.0, '5']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: ~ # 2*p34+2*p5
|
||||
port_pressure: [[2.0, '34'], [2.0, '5']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: ~ # 2*p34+2*p5
|
||||
port_pressure: [[2.0, '34'], [2.0, '5']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p34+1*p5
|
||||
port_pressure: [[1.0, '34'], [1.0, '5']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p34+1*p5
|
||||
port_pressure: [[1.0, '34'], [1.0, '5']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p34+1*p5
|
||||
port_pressure: [[1.0, '34'], [1.0, '5']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: x
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p34+1*p5
|
||||
port_pressure: [[1.0, '34'], [1.0, '5']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p34+1*p5
|
||||
port_pressure: [[1.0, '34'], [1.0, '5']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p34+1*p5
|
||||
port_pressure: [[1.0, '34'], [1.0, '5']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: x
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p34+1*p5
|
||||
port_pressure: [[1.0, '34'], [1.0, '5']]
|
||||
File diff suppressed because it is too large
Load Diff
539
osaca/data/zen1.yml
Normal file
539
osaca/data/zen1.yml
Normal file
@@ -0,0 +1,539 @@
|
||||
osaca_version: 0.3.2
|
||||
micro_architecture: AMD Zen (family 17h)
|
||||
arch_code: ZEN1
|
||||
isa: x86
|
||||
load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0}
|
||||
load_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0}
|
||||
load_throughput:
|
||||
- {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: ~, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: ~, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
hidden_loads: false
|
||||
ports: ['0', '1', '2', '3', 3DV, '4', '5', '6', '7', '8', '9', 8D, 9D, ST]
|
||||
port_model_scheme: |
|
||||
┌--------------------------------------┐ ┌-----------------------------------------------┐
|
||||
| 96 entries OoO scheduler | | 84 entries OoO scheduler |
|
||||
└--------------------------------------┘ └-----------------------------------------------┘
|
||||
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|
||||
▼ ▼ ▼ ▼ ▼ ▼ ▼ ▼ ▼ ▼
|
||||
┌-------┐ ┌-------┐ ┌-------┐ ┌-------┐ ┌------┐ ┌-----┐ ┌-----┐ ┌------┐ ┌-----┐ ┌-----┐
|
||||
|SSE ALU| |SSE ALU| |SSE ALU| |SSE ALU| | ALU | | ALU | | ALU | | ALU | | AGU | | AGU |
|
||||
└-------┘ └-------┘ └-------┘ └-------┘ └------┘ └-----┘ └-----┘ └------┘ └-----┘ └-----┘
|
||||
┌-------┐ ┌-------┐ ┌-------┐ ┌-------┐ ┌------┐ ┌-----┐ ┌-----┐ ┌------┐ | |
|
||||
|SSE MUL| |SSE MUL| |SSE ADD| |SSE ADD| |BRANCH| | MUL | | MUL | |BRANCH| ▼ ▼
|
||||
└-------┘ └-------┘ └-------┘ └-------┘ └------┘ └-----┘ └-----┘ └------┘ ┌-------------┐
|
||||
┌-------┐ ┌-------┐ ┌-------┐ ┌-------┐ | LOAD |
|
||||
|SSE FMA| |SSE FMA| | SSE | |SSE DIV| └-------------┘
|
||||
└-------┘ └-------┘ | SHUF | └-------┘ ┌-------------┐
|
||||
┌-------┐ └-------┘ | LOAD |
|
||||
| SSE | └-------------┘
|
||||
| SHUF | ┌-------------┐
|
||||
└-------┘ | STORE |
|
||||
└-------------┘
|
||||
instruction_forms:
|
||||
- name: add
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: add
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: addl
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: addq
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: cmpl
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: ~ # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: cmpq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: ~ # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: incq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: ja
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: ~
|
||||
port_pressure: []
|
||||
- name: jb
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: ~
|
||||
port_pressure: []
|
||||
- name: jne
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: ~
|
||||
port_pressure: []
|
||||
- name: leaq
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.5
|
||||
latency: ~ # 1*p89
|
||||
port_pressure: [[1, '89']]
|
||||
- name: movl
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: mulsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: mulss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: rcpss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: ~ #1.0
|
||||
latency: 5.0
|
||||
port_pressure: []
|
||||
- name: sqrtsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: ~ #8.0
|
||||
latency: 23.0
|
||||
port_pressure: []
|
||||
- name: sqrtss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: ~ #5.0
|
||||
latency: 17.0
|
||||
port_pressure: []
|
||||
- name: subq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: subq
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: vaddpd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 3.0 # 2*p23
|
||||
port_pressure: [[2, '23']]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p23
|
||||
port_pressure: [[1, '23']]
|
||||
- name: vaddss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p23
|
||||
port_pressure: [[1, '23']]
|
||||
- name: vdivsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 4.0
|
||||
latency: 13.0 # 1*p3+4*p3DV
|
||||
port_pressure: [[1, '3'], [4.0, [3DV]]]
|
||||
- name: vdivss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 3.0
|
||||
latency: 10.0
|
||||
port_pressure: [[1, '3'], [3.0, [3DV]]]
|
||||
- name: vfmadd213pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vfmadd231pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vfmadd132pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vmulsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01+1*p89+1*p8D9D
|
||||
port_pressure: [[1, '01'], [1, '89'], [1, [8D, 9D]]]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovaps
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovaps
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p89+1*p8D9D
|
||||
port_pressure: [[1, '89'], [1, [8D, 9D]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
@@ -1,138 +0,0 @@
|
||||
instr,TP,LT,ports
|
||||
jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jmpq-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
add-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
add-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
addl-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
addq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
addl-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
|
||||
addq-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
|
||||
add-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
|
||||
add-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
|
||||
addl-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
|
||||
addq-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)"
|
||||
cmp-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
|
||||
cmpl-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
|
||||
cmp-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
|
||||
cmpl-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
|
||||
cmp-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
cmpl-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
cmp-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
cmp-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
cmpq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
cmpq-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
inc-r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
incq-r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
incl-r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
mov-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
mov-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
mov-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
movq-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
movq-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
movl-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
movslq-r64_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
sub-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
vaddpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vaddsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vaddsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vaddss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vcvtsi2ss-xmm_xmm_r32,1.0,4.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vcvtss2si-r32_xmm,1.0,7.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
cvtsi2ss-xmm_r32,1.0,8.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd213pd-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd213pd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd213ps-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd213ps-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd213sd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd213ss-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd132sd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vfmadd132pd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vfmadd132pd-ymm_ymm_mem,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
vinsertf128-ymm_ymm_imd,0.6666666666666667,1.0,"(-1,)"
|
||||
vmovsd-mem_xmm,1.0,8.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
vmovsd-xmm_mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vmulpd-ymm_ymm_ymm,1.0,4.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vmulss-xmm_xmm_xmm,0.5,3.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vsubpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
vsubsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vsubsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vsubss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vmovaps-xmm_mem,0.5,3.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vmovaps-mem_xmm,1.0,5.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
vmovapd-ymm_mem,1.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
vmovapd-mem_ymm,2.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 2.0, 2.0)"
|
||||
movq-r64_xmm,1.0,-1.0,"(0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
#prefetcht0-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
#prefetchw-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
cmpl-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)"
|
||||
vaddpd-xmm_xmm_xmm,0.5,3,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vaddpd-ymm_ymm_ymm,1,3,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vcvtdq2pd-xmm_xmm,1,7,"(0.5, 0.5, 0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vcvtdq2pd-ymm_xmm,2,7,"(1.0, 1.0, 0, 2.0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vcvtsi2sd-xmm_xmm_r32,1,4,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vextracti128-xmm_ymm_imd,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd132pd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd132pd-ymm_ymm_ymm,1,5,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vfmadd132sd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vmulpd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vpaddd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vpaddd-ymm_ymm_ymm,0.6666666666666667,1,"(0.66, 0.66, 0, 0.66, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vpshufd-xmm_xmm_imd,0.5,1,"(0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vxorpd-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vxorps-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vdivpd-xmm_xmm_xmm,4,8,"(0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)"
|
||||
vdivsd-xmm_xmm_xmm,4,8,"(0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)"
|
||||
vmovups-mem_xmm,0.5,8,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vmovups-xmm_mem,1,8,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vaddpd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)"
|
||||
vmulpd-xmm_xmm_mem,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vmulpd-ymm_ymm_mem,1,4,"(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vinsertf128-ymm_ymm_mem_imd,1,5,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)"
|
||||
vmovupd-xmm_mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
vmovupd-mem_xmm,1,1,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 2.0, 2.0)"
|
||||
vmovupd-ymm_mem,3.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
vmovupd-mem_ymm,2,2,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 2.0, 2.0)"
|
||||
movupd-xmm_mem,0.5,-1,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)"
|
||||
pushq-r64,0.5,-1,"(-1,)"
|
||||
cmpq-r64_mem,0.5,-1,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
|
||||
movq-r64_r64,0.2,-1,"(-1,)"
|
||||
subq-r64_r64,0.25,1,"(-1,)"
|
||||
cmpq-mem_r64,0.5,-1,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)"
|
||||
|
406
osaca/db_interface.py
Executable file
406
osaca/db_interface.py
Executable file
@@ -0,0 +1,406 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import math
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import ruamel.yaml
|
||||
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
def sanity_check(arch: str, verbose=False):
|
||||
# load arch machine model
|
||||
arch_mm = MachineModel(arch=arch)
|
||||
data = arch_mm['instruction_forms']
|
||||
# load isa machine model
|
||||
isa = arch_mm.get_ISA()
|
||||
isa_mm = MachineModel(arch='isa/{}'.format(isa))
|
||||
num_of_instr = len(data)
|
||||
|
||||
# check arch DB entries
|
||||
(
|
||||
missing_throughput,
|
||||
missing_latency,
|
||||
missing_port_pressure,
|
||||
wrong_port,
|
||||
suspicious_instructions,
|
||||
duplicate_instr_arch,
|
||||
) = _check_sanity_arch_db(arch_mm, isa_mm)
|
||||
# check ISA DB entries
|
||||
duplicate_instr_isa, only_in_isa = _check_sanity_isa_db(arch_mm, isa_mm)
|
||||
|
||||
_print_sanity_report(
|
||||
num_of_instr,
|
||||
missing_throughput,
|
||||
missing_latency,
|
||||
missing_port_pressure,
|
||||
wrong_port,
|
||||
suspicious_instructions,
|
||||
duplicate_instr_arch,
|
||||
duplicate_instr_isa,
|
||||
only_in_isa,
|
||||
verbose=verbose,
|
||||
)
|
||||
|
||||
|
||||
def import_benchmark_output(arch, bench_type, filepath):
|
||||
supported_bench_outputs = ['ibench', 'asmbench']
|
||||
assert os.path.exists(filepath)
|
||||
if bench_type not in supported_bench_outputs:
|
||||
raise ValueError('Benchmark type is not supported.')
|
||||
with open(filepath, 'r') as f:
|
||||
input_data = f.readlines()
|
||||
db_entries = None
|
||||
if bench_type == 'ibench':
|
||||
db_entries = _get_ibench_output(input_data)
|
||||
elif bench_type == 'asmbench':
|
||||
raise NotImplementedError
|
||||
# write entries to DB
|
||||
mm = MachineModel(arch)
|
||||
for entry in db_entries:
|
||||
mm.set_instruction_entry(entry)
|
||||
with open(filepath, 'w') as f:
|
||||
mm.dump(f)
|
||||
|
||||
##################
|
||||
# HELPERS IBENCH #
|
||||
##################
|
||||
|
||||
|
||||
def _get_ibench_output(input_data):
|
||||
db_entries = {}
|
||||
for line in input_data:
|
||||
if 'Using frequency' in line or len(line) == 0:
|
||||
continue
|
||||
instruction = line.split(':')[0]
|
||||
key = '-'.join(instruction.split('-')[:2])
|
||||
if key in db_entries:
|
||||
# add only TP/LT value
|
||||
entry = db_entries[key]
|
||||
else:
|
||||
mnemonic = instruction.split('-')[0]
|
||||
operands = instruction.split('-')[1].split('_')
|
||||
operands = [_create_db_operand(op) for op in operands]
|
||||
entry = {
|
||||
'name': mnemonic,
|
||||
'operands': operands,
|
||||
'throughput': None,
|
||||
'latency': None,
|
||||
'port_pressure': None,
|
||||
}
|
||||
if 'TP' in instruction:
|
||||
entry['throughput'] = _validate_measurement(float(line.split()[1]), True)
|
||||
if not entry['throughput']:
|
||||
warnings.warn(
|
||||
'Your THROUGHPUT measurement for {} looks suspicious'.format(key)
|
||||
+ ' and was not added. Please inspect your benchmark.'
|
||||
)
|
||||
elif 'LT' in instruction:
|
||||
entry['latency'] = _validate_measurement(float(line.split()[1]), False)
|
||||
if not entry['latency']:
|
||||
warnings.warn(
|
||||
'Your LATENCY measurement for {} looks suspicious'.format(key)
|
||||
+ ' and was not added. Please inspect your benchmark.'
|
||||
)
|
||||
db_entries[key] = entry
|
||||
return db_entries
|
||||
|
||||
|
||||
def _validate_measurement(self, measurement, is_tp):
|
||||
if not is_tp:
|
||||
if (
|
||||
math.floor(measurement) * 1.05 >= measurement
|
||||
or math.ceil(measurement) * 0.95 <= measurement
|
||||
):
|
||||
# Value is probably correct, so round it to the estimated value
|
||||
return float(round(measurement))
|
||||
# Check reciprocal only if it is a throughput value
|
||||
else:
|
||||
reciprocals = [1 / x for x in range(1, 11)]
|
||||
for reci in reciprocals:
|
||||
if reci * 0.95 <= measurement <= reci * 1.05:
|
||||
# Value is probably correct, so round it to the estimated value
|
||||
return round(reci, 5)
|
||||
# No value close to an integer or its reciprocal found, we assume the
|
||||
# measurement is incorrect
|
||||
return None
|
||||
|
||||
|
||||
def _create_db_operand(self, operand):
|
||||
if self.isa == 'aarch64':
|
||||
return self._create_db_operand_aarch64(operand)
|
||||
elif self.isa == 'x86':
|
||||
return self._create_db_operand_x86(operand)
|
||||
|
||||
|
||||
def _create_db_operand_aarch64(self, operand):
|
||||
if operand == 'i':
|
||||
return {'class': 'immediate', 'imd': 'int'}
|
||||
elif operand in 'wxbhsdq':
|
||||
return {'class': 'register', 'prefix': operand}
|
||||
elif operand.startswith('v'):
|
||||
return {'class': 'register', 'prefix': 'v', 'shape': operand[1:2]}
|
||||
elif operand.startswith('m'):
|
||||
return {
|
||||
'class': 'memory',
|
||||
'base': 'gpr' if 'b' in operand else None,
|
||||
'offset': 'imd' if 'o' in operand else None,
|
||||
'index': 'gpr' if 'i' in operand else None,
|
||||
'scale': 8 if 's' in operand else 1,
|
||||
'pre-indexed': True if 'r' in operand else False,
|
||||
'post-indexed': True if 'p' in operand else False,
|
||||
}
|
||||
else:
|
||||
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
|
||||
|
||||
|
||||
def _create_db_operand_x86(self, operand):
|
||||
if operand == 'r':
|
||||
return {'class': 'register', 'name': 'gpr'}
|
||||
elif operand in 'xyz':
|
||||
return {'class': 'register', 'name': operand + 'mm'}
|
||||
elif operand == 'i':
|
||||
return {'class': 'immediate', 'imd': 'int'}
|
||||
elif operand.startswith('m'):
|
||||
return {
|
||||
'class': 'memory',
|
||||
'base': 'gpr' if 'b' in operand else None,
|
||||
'offset': 'imd' if 'o' in operand else None,
|
||||
'index': 'gpr' if 'i' in operand else None,
|
||||
'scale': 8 if 's' in operand else 1,
|
||||
}
|
||||
else:
|
||||
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
|
||||
|
||||
|
||||
########################
|
||||
# HELPERS SANITY CHECK #
|
||||
########################
|
||||
|
||||
|
||||
def _check_sanity_arch_db(arch_mm, isa_mm):
|
||||
suspicious_prefixes_x86 = ['vfm', 'fm']
|
||||
suspicious_prefixes_arm = ['fml', 'ldp', 'stp', 'str']
|
||||
if arch_mm.get_ISA().lower() == 'aarch64':
|
||||
suspicious_prefixes = suspicious_prefixes_arm
|
||||
if arch_mm.get_ISA().lower() == 'x86':
|
||||
suspicious_prefixes = suspicious_prefixes_x86
|
||||
|
||||
# returned lists
|
||||
missing_throughput = []
|
||||
missing_latency = []
|
||||
missing_port_pressure = []
|
||||
wrong_port = []
|
||||
suspicious_instructions = []
|
||||
duplicate_instr_arch = []
|
||||
|
||||
for instr_form in arch_mm['instruction_forms']:
|
||||
# check value in DB entry
|
||||
if instr_form['throughput'] is None:
|
||||
missing_throughput.append(instr_form)
|
||||
if instr_form['latency'] is None:
|
||||
missing_latency.append(instr_form)
|
||||
if instr_form['port_pressure'] is None:
|
||||
missing_port_pressure.append(instr_form)
|
||||
else:
|
||||
if _check_for_wrong_port(arch_mm['ports'], instr_form):
|
||||
wrong_port.append(instr_form)
|
||||
# check entry against ISA DB
|
||||
for prefix in suspicious_prefixes:
|
||||
if instr_form['name'].startswith(prefix):
|
||||
# check if instruction in ISA DB
|
||||
if isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
|
||||
# if not, mark them as suspicious and print it on the screen
|
||||
suspicious_instructions.append(instr_form)
|
||||
# check for duplicates in DB
|
||||
if arch_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
|
||||
duplicate_instr_arch.append(instr_form)
|
||||
# every entry exists twice --> uniquify
|
||||
tmp_list = []
|
||||
for i in range(0, len(duplicate_instr_arch)):
|
||||
tmp = duplicate_instr_arch.pop()
|
||||
if tmp not in duplicate_instr_arch:
|
||||
tmp_list.append(tmp)
|
||||
duplicate_instr_arch = tmp_list
|
||||
return (
|
||||
missing_throughput,
|
||||
missing_latency,
|
||||
missing_port_pressure,
|
||||
wrong_port,
|
||||
suspicious_instructions,
|
||||
duplicate_instr_arch,
|
||||
)
|
||||
|
||||
|
||||
def _check_for_wrong_port(port_list, instr_form):
|
||||
for cycles, ports in instr_form['port_pressure']:
|
||||
for p in ports:
|
||||
if p not in port_list:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _check_sanity_isa_db(arch_mm, isa_mm):
|
||||
# returned lists
|
||||
duplicate_instr_isa = []
|
||||
only_in_isa = []
|
||||
|
||||
for instr_form in isa_mm['instruction_forms']:
|
||||
# check if instr is missing in arch DB
|
||||
if arch_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
|
||||
only_in_isa.append(instr_form)
|
||||
# check for duplicates
|
||||
if isa_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
|
||||
duplicate_instr_isa.append(instr_form)
|
||||
# every entry exists twice --> uniquify
|
||||
tmp_list = []
|
||||
for i in range(0, len(duplicate_instr_isa)):
|
||||
tmp = duplicate_instr_isa.pop()
|
||||
if tmp not in duplicate_instr_isa:
|
||||
tmp_list.append(tmp)
|
||||
duplicate_instr_isa = tmp_list
|
||||
|
||||
return duplicate_instr_isa, only_in_isa
|
||||
|
||||
|
||||
def _print_sanity_report(
|
||||
total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False
|
||||
):
|
||||
# non-verbose summary
|
||||
print('SUMMARY\n----------------------')
|
||||
print(
|
||||
'{}% ({}/{}) of instruction forms have no throughput value.'.format(
|
||||
round(100 * len(m_tp) / total), len(m_tp), total
|
||||
)
|
||||
)
|
||||
print(
|
||||
'{}% ({}/{}) of instruction forms have no latency value.'.format(
|
||||
round(100 * len(m_l) / total), len(m_l), total
|
||||
)
|
||||
)
|
||||
print(
|
||||
'{}% ({}/{}) of instruction forms have no port pressure assignment.'.format(
|
||||
round(100 * len(m_pp) / total), len(m_pp), total
|
||||
)
|
||||
)
|
||||
print(
|
||||
'{}% ({}/{}) of instruction forms have an invalid port identifier.'.format(
|
||||
round(100 * len(wrong_pp) / total), len(wrong_pp), total
|
||||
)
|
||||
)
|
||||
print(
|
||||
'{}% ({}/{}) of instruction forms might miss an ISA DB entry.'.format(
|
||||
round(100 * len(suspic_instr) / total), len(suspic_instr), total
|
||||
)
|
||||
)
|
||||
print('{} duplicate instruction forms in uarch DB.'.format(len(dup_arch)))
|
||||
print('{} duplicate instruction forms in ISA DB.'.format(len(dup_isa)))
|
||||
print(
|
||||
'{} instruction forms in ISA DB are not referenced by instruction '.format(len(only_isa))
|
||||
+ 'forms in uarch DB.'
|
||||
)
|
||||
print('----------------------\n')
|
||||
# verbose version
|
||||
if verbose:
|
||||
_print_sanity_report_verbose(
|
||||
total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa
|
||||
)
|
||||
|
||||
|
||||
def _print_sanity_report_verbose(
|
||||
total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa
|
||||
):
|
||||
BRIGHT_CYAN = '\033[1;36;1m'
|
||||
BRIGHT_BLUE = '\033[1;34;1m'
|
||||
BRIGHT_RED = '\033[1;31;1m'
|
||||
BRIGHT_MAGENTA = '\033[1;35;1m'
|
||||
BRIGHT_YELLOW = '\033[1;33;1m'
|
||||
CYAN = '\033[36m'
|
||||
YELLOW = '\033[33m'
|
||||
WHITE = '\033[0m'
|
||||
|
||||
print('Instruction forms without throughput value:\n' if len(m_tp) != 0 else '', end='')
|
||||
for instr_form in m_tp:
|
||||
print('{}{}{}'.format(BRIGHT_BLUE, _get_full_instruction_name(instr_form), WHITE))
|
||||
print('Instruction forms without latency value:\n' if len(m_l) != 0 else '', end='')
|
||||
for instr_form in m_l:
|
||||
print('{}{}{}'.format(BRIGHT_RED, _get_full_instruction_name(instr_form), WHITE))
|
||||
print(
|
||||
'Instruction forms without port pressure assignment:\n' if len(m_pp) != 0 else '', end=''
|
||||
)
|
||||
for instr_form in m_pp:
|
||||
print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
|
||||
print(
|
||||
'Instruction forms with invalid port identifiers in port pressure:\n'
|
||||
if len(wrong_pp) != 0
|
||||
else '',
|
||||
end='',
|
||||
)
|
||||
for instr_form in wrong_pp:
|
||||
print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
|
||||
print(
|
||||
'Instruction forms which might miss an ISA DB entry:\n' if len(suspic_instr) != 0 else '',
|
||||
end='',
|
||||
)
|
||||
for instr_form in suspic_instr:
|
||||
print('{}{}{}'.format(BRIGHT_CYAN, _get_full_instruction_name(instr_form), WHITE))
|
||||
print('Duplicate instruction forms in uarch DB:\n' if len(dup_arch) != 0 else '', end='')
|
||||
for instr_form in dup_arch:
|
||||
print('{}{}{}'.format(YELLOW, _get_full_instruction_name(instr_form), WHITE))
|
||||
print('Duplicate instruction forms in ISA DB:\n' if len(dup_isa) != 0 else '', end='')
|
||||
for instr_form in dup_isa:
|
||||
print('{}{}{}'.format(BRIGHT_YELLOW, _get_full_instruction_name(instr_form), WHITE))
|
||||
print(
|
||||
'Instruction forms existing in ISA DB but not in uarch DB:\n'
|
||||
if len(only_isa) != 0
|
||||
else '',
|
||||
end='',
|
||||
)
|
||||
for instr_form in only_isa:
|
||||
print('{}{}{}'.format(CYAN, _get_full_instruction_name(instr_form), WHITE))
|
||||
|
||||
|
||||
###################
|
||||
# GENERIC HELPERS #
|
||||
###################
|
||||
|
||||
|
||||
def _get_full_instruction_name(instruction_form):
|
||||
operands = []
|
||||
for op in instruction_form['operands']:
|
||||
op_attrs = [
|
||||
y + ':' + str(op[y])
|
||||
for y in list(filter(lambda x: True if x != 'class' else False, op))
|
||||
]
|
||||
operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
|
||||
return '{} {}'.format(instruction_form['name'], ','.join(operands))
|
||||
|
||||
|
||||
def __represent_none(self, data):
|
||||
return self.represent_scalar(u'tag:yaml.org,2002:null', u'~')
|
||||
|
||||
|
||||
def _create_yaml_object():
|
||||
yaml_obj = ruamel.yaml.YAML()
|
||||
yaml_obj.representer.add_representer(type(None), __represent_none)
|
||||
return yaml_obj
|
||||
|
||||
|
||||
def __dump_data_to_yaml(filepath, data):
|
||||
# first add 'normal' meta data in the right order (no ordered dict yet)
|
||||
meta_data = dict(data)
|
||||
del meta_data['instruction_forms']
|
||||
del meta_data['port_model_scheme']
|
||||
with open(filepath, 'w') as f:
|
||||
ruamel.yaml.dump(meta_data, f, allow_unicode=True)
|
||||
with open(filepath, 'a') as f:
|
||||
# now add port model scheme in |-scheme for better readability
|
||||
ruamel.yaml.dump(
|
||||
{'port_model_scheme': data['port_model_scheme']},
|
||||
f,
|
||||
allow_unicode=True,
|
||||
default_style='|',
|
||||
)
|
||||
# finally, add instruction forms
|
||||
ruamel.yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True)
|
||||
@@ -1,447 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os
|
||||
import math
|
||||
import ast
|
||||
from operator import add
|
||||
import pandas as pd
|
||||
|
||||
from osaca.param import Register, MemAddr
|
||||
#from param import Register, MemAddr
|
||||
|
||||
|
||||
class Scheduler(object):
|
||||
arch_dict = {
|
||||
# Intel
|
||||
'NHM': 5, 'WSM': 5, # Nehalem, Westmere
|
||||
'SNB': 6, 'IVB': 6, # Sandy Bridge, Ivy Bridge
|
||||
'HSW': 8, 'BDW': 8, # Haswell, Broadwell
|
||||
'SKL': 8, 'SKX': 8, # Skylake(-X)
|
||||
'KBL': 8, 'CFL': 8, # Kaby Lake, Coffee Lake
|
||||
# AMD
|
||||
'ZEN': 10, # Zen/Ryzen/EPYC
|
||||
}
|
||||
arch_pipeline_ports = {
|
||||
'NHM': ['0DV'], 'WSM': ['0DV'],
|
||||
'SNB': ['0DV'], 'IVB': ['0DV'],
|
||||
'HSW': ['0DV'], 'BDW': ['0DV'],
|
||||
'SKL': ['0DV'], 'SKX': ['0DV'],
|
||||
'KBL': ['0DV'], 'CFL': ['0DV'],
|
||||
'ZEN': ['3DV'],}
|
||||
# content of most inner list in instrList: instr, operand(s), instr form
|
||||
df = None # type: DataFrame
|
||||
# for parallel ld/st in archs with 1 st/cy and >1 ld/cy, able to do 1 st and 1 ld in 1cy
|
||||
ld_ports = None # type: list<int>
|
||||
# enable flag for parallel ld/st
|
||||
en_par_ldst = False # type: boolean
|
||||
|
||||
def __init__(self, arch, instruction_list):
|
||||
arch = arch.upper()
|
||||
try:
|
||||
self.ports = self.arch_dict[arch]
|
||||
except KeyError:
|
||||
print('Architecture not supported for EU scheduling.', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
# check for parallel ld/st in a cycle
|
||||
if arch == 'ZEN':
|
||||
self.en_par_ldst = True
|
||||
self.ld_ports = [9, 10]
|
||||
# check for DV port
|
||||
self.pipeline_ports = self.arch_pipeline_ports.get(arch, [])
|
||||
self.instrList = instruction_list
|
||||
# curr_dir = os.path.realpath(__file__)[:-11]
|
||||
osaca_dir = os.path.expanduser('~/.osaca/')
|
||||
self.df = pd.read_csv(osaca_dir + 'data/' + arch.lower() + '_data.csv', quotechar='"',
|
||||
converters={'ports': ast.literal_eval})
|
||||
|
||||
def new_schedule(self, machine_readable=False):
|
||||
"""
|
||||
Schedule Instruction Form list and calculate port bindings.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
machine_readable : bool
|
||||
Boolean for indicating if the return value should be human readable (if False) or
|
||||
machine readable (if True)
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, [float, ...]) or ([[float, ...], ...], [float, ...])
|
||||
A tuple containing the output of the schedule as string (if machine_readable is not
|
||||
given or False) or as list of lists (if machine_readable is True) and the port bindings
|
||||
as list of float.
|
||||
"""
|
||||
sched = self.get_head()
|
||||
# Initialize ports
|
||||
# Add DV port, if it is existing
|
||||
occ_ports = [[0] * (self.ports + len(self.pipeline_ports)) for x in range(len(self.instrList))]
|
||||
port_bndgs = [0] * (self.ports + len(self.pipeline_ports))
|
||||
# Store instruction counter for parallel ld/st
|
||||
par_ldst = 0
|
||||
# Count the number of store instr if we schedule for an architecture with par ld/st
|
||||
if self.en_par_ldst:
|
||||
for i, instrForm in enumerate(self.instrList):
|
||||
if (isinstance(instrForm[1], MemAddr) and len(instrForm) > 3
|
||||
and not instrForm[0].startswith('cmp')):
|
||||
# print('({}, {}) is st --> par_ldst = {}'.format(i, instrForm[0], par_ldst + 1))
|
||||
par_ldst += 1
|
||||
# Check if there's a port occupation stored in the CSV, otherwise leave the
|
||||
# occ_port list item empty
|
||||
for i, instrForm in enumerate(self.instrList):
|
||||
search_string = instrForm[0] + self.get_operand_suffix(instrForm)
|
||||
try:
|
||||
entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr]
|
||||
tup = entry.ports.values[0]
|
||||
if len(tup) == 1 and tup[0] == -1:
|
||||
raise IndexError()
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
if instrForm[0][:3] == 'nop':
|
||||
sched += self.format_port_occupation_line(occ_ports[i], '* ' + instrForm[-1])
|
||||
elif instrForm[0] == 'DIRECTIVE':
|
||||
sched += self.format_port_occupation_line(occ_ports[i], '* ' + instrForm[-1])
|
||||
else:
|
||||
sched += self.format_port_occupation_line(occ_ports[i], 'X ' + instrForm[-1])
|
||||
continue
|
||||
occ_ports[i] = list(tup)
|
||||
# Check if it's a ld including instr
|
||||
p_flg = ''
|
||||
if self.en_par_ldst:
|
||||
# Check for ld
|
||||
# FIXME remove special load handling from here and place in machine model
|
||||
if (isinstance(instrForm[-2], MemAddr) or
|
||||
(len(instrForm) > 4 and isinstance(instrForm[2], MemAddr))):
|
||||
if par_ldst > 0:
|
||||
par_ldst -= 1
|
||||
p_flg = 'P '
|
||||
for port in self.ld_ports:
|
||||
occ_ports[i][port] = 0.0 # '(' + str(occ_ports[i][port]) + ')'
|
||||
# Write schedule line
|
||||
if len(p_flg) > 0:
|
||||
sched += self.format_port_occupation_line(occ_ports[i], p_flg + instrForm[-1])
|
||||
for port in self.ld_ports:
|
||||
occ_ports[i][port] = 0
|
||||
else:
|
||||
sched += self.format_port_occupation_line(occ_ports[i], instrForm[-1])
|
||||
# Add throughput to total port binding
|
||||
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
|
||||
if machine_readable:
|
||||
list(map(self.append, occ_ports, self.instrList))
|
||||
return occ_ports, port_bndgs
|
||||
return sched, port_bndgs
|
||||
|
||||
def schedule(self):
|
||||
"""
|
||||
Schedule Instruction Form list and calculate port bindings.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, [int, ...])
|
||||
A tuple containing the graphic output of the schedule as string and
|
||||
the port bindings as list of ints.
|
||||
"""
|
||||
wTP = False
|
||||
sched = self.get_head()
|
||||
# Initialize ports
|
||||
port_bndgs = [0] * self.ports
|
||||
# Check if there's a port occupation stored in the CSV, otherwise leave the
|
||||
# occ_port list item empty
|
||||
for i, instrForm in enumerate(self.instrList):
|
||||
try:
|
||||
search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm)
|
||||
entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr]
|
||||
tup = entry.ports.values[0]
|
||||
if len(tup) == 1 and tup[0][0] == -1:
|
||||
raise IndexError()
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
if instrForm[0][:3] == 'nop':
|
||||
sched += self.format_port_occupation_line(occ_ports[i], '* ' + instrForm[-1])
|
||||
else:
|
||||
sched += self.format_port_occupation_line(occ_ports[i], 'X ' + instrForm[-1])
|
||||
continue
|
||||
if wTP:
|
||||
# Get the occurance of each port from the occupation list
|
||||
port_occurances = self.get_port_occurances(tup)
|
||||
# Get 'occurance groups'
|
||||
occurance_groups = self.get_occurance_groups(port_occurances)
|
||||
# Calculate port dependent throughput
|
||||
tp_ges = entry.TP.values[0] * len(occurance_groups[0])
|
||||
for occGroup in occurance_groups:
|
||||
for port in occGroup:
|
||||
occ_ports[i][port] = tp_ges / len(occGroup)
|
||||
else:
|
||||
variations = len(tup)
|
||||
t_all = self.flatten(tup)
|
||||
if entry.TP.values[0] == 0:
|
||||
t_all = ()
|
||||
if variations == 1:
|
||||
for j in tup[0]:
|
||||
occ_ports[i][j] = entry.TP.values[0]
|
||||
else:
|
||||
for j in range(0, self.ports):
|
||||
occ_ports[i][j] = t_all.count(j) / variations
|
||||
# Write schedule line
|
||||
sched += self.format_port_occupation_line(occ_ports[i], instrForm[-1])
|
||||
# Add throughput to total port binding
|
||||
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
|
||||
return sched, port_bndgs
|
||||
|
||||
def flatten(self, l):
|
||||
if len(l) == 0:
|
||||
return l
|
||||
if isinstance(l[0], type(l)):
|
||||
return self.flatten(l[0]) + self.flatten(l[1:])
|
||||
return l[:1] + self.flatten(l[1:])
|
||||
|
||||
def append(self, l, e):
|
||||
if(isinstance(l, list)):
|
||||
l.append(e)
|
||||
|
||||
def schedule_fcfs(self):
|
||||
"""
|
||||
Schedule Instruction Form list for a single run with latencies.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, int)
|
||||
A tuple containing the graphic output as string and the total throughput time as int.
|
||||
"""
|
||||
sched = self.get_head()
|
||||
total = 0
|
||||
# Initialize ports
|
||||
occ_ports = [0] * self.ports
|
||||
for instrForm in self.instrList:
|
||||
try:
|
||||
search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm)
|
||||
entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr]
|
||||
tup = entry.ports.values[0]
|
||||
if len(tup) == 1 and tup[0][0] == -1:
|
||||
raise IndexError()
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
sched += self.format_port_occupation_line([0] * self.ports, '* ' + instrForm[-1])
|
||||
continue
|
||||
found = False
|
||||
while not found:
|
||||
for portOcc in tup:
|
||||
# Test if chosen instruction form port occupation suits the current CPU port
|
||||
# occupation
|
||||
if self.test_ports_fcfs(occ_ports, portOcc):
|
||||
# Current port occupation fits for chosen port occupation of instruction!
|
||||
found = True
|
||||
good = [entry.LT.values[0] if (j in portOcc) else 0 for j in
|
||||
range(0, self.ports)]
|
||||
sched += self.format_port_occupation_line(good, instrForm[-1])
|
||||
# Add new occupation
|
||||
occ_ports = [occ_ports[j] + good[j] for j in range(0, self.ports)]
|
||||
break
|
||||
# Step
|
||||
occ_ports = [j - 1 if (j > 0) else 0 for j in occ_ports]
|
||||
if entry.LT.values[0] != 0:
|
||||
total += 1
|
||||
total += max(occ_ports)
|
||||
return sched, total
|
||||
|
||||
def get_occurance_groups(self, port_occurances):
|
||||
"""
|
||||
Group ports in groups by the number of their occurrence and sorts groups by cardinality.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
port_occurances : [int, ...]
|
||||
List with the length of ports containing the number of occurances
|
||||
of each port
|
||||
|
||||
Returns
|
||||
-------
|
||||
[[int, ...], ...]
|
||||
List of lists with all occurance groups sorted by cardinality
|
||||
(smallest group first)
|
||||
"""
|
||||
groups = [[] for x in range(len(set(port_occurances))-1)]
|
||||
for i, groupInd in enumerate(range(min(list(filter(lambda x: x > 0, port_occurances))),
|
||||
max(port_occurances) + 1)):
|
||||
for p, occurs in enumerate(port_occurances):
|
||||
if groupInd == occurs:
|
||||
groups[i].append(p)
|
||||
# Sort groups by cardinality
|
||||
groups.sort(key=len)
|
||||
return groups
|
||||
|
||||
def get_port_occurances(self, tups):
|
||||
"""
|
||||
Return the number of each port occurrence for the possible port occupations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tups : ((int, ...), ...)
|
||||
Tuple of tuples of possible port occupations
|
||||
|
||||
Returns
|
||||
-------
|
||||
[int, ...]
|
||||
List in the length of the number of ports for the current architecture,
|
||||
containing the amount of occurances for each port
|
||||
"""
|
||||
ports = [0] * self.ports
|
||||
for tup in tups:
|
||||
for elem in tup:
|
||||
ports[elem] += 1
|
||||
return ports
|
||||
|
||||
def test_ports_fcfs(self, occ_ports, needed_ports):
|
||||
"""
|
||||
Test if current configuration of ports is possible and returns boolean.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
occ_ports : [int]
|
||||
Tuple to inspect for current port occupation
|
||||
needed_ports : (int)
|
||||
Tuple with needed port(s) for particular instruction form
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if needed ports can get scheduled on current port occupation
|
||||
False if not
|
||||
"""
|
||||
for port in needed_ports:
|
||||
if occ_ports[port] != 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_report_info(self):
|
||||
"""
|
||||
Create Report information including all needed annotations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the report information
|
||||
"""
|
||||
analysis = 'Throughput Analysis Report\n' + ('-' * 26) + '\n'
|
||||
annotations = (
|
||||
'P - Load operation can be hidden behind a past or future store instruction\n'
|
||||
'X - No information for this instruction in data file\n'
|
||||
'* - Not bound to a port, therefore ignored\n\n')
|
||||
return analysis + annotations
|
||||
|
||||
def get_head(self):
|
||||
"""
|
||||
Create right heading for CPU architecture.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the header
|
||||
"""
|
||||
port_names = self.get_port_naming()
|
||||
|
||||
port_line = ''.join('|{:^6}'.format(pn) for pn in port_names) + '|\n'
|
||||
horiz_line = '-' * (len(port_line) - 1) + '\n'
|
||||
port_anno = ' ' * ((len(port_line) - 25) // 2) + 'Ports Pressure in cycles\n'
|
||||
|
||||
return port_anno + port_line + horiz_line
|
||||
|
||||
def format_port_occupation_line(self, occ_ports, instr_name):
|
||||
"""
|
||||
Create line with port occupation for output.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
occ_ports : (int, ...)
|
||||
Integer tuple containing needed ports
|
||||
instr_name : str
|
||||
Name of instruction form for output
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String for output containing port scheduling for instr_name
|
||||
"""
|
||||
line = ''
|
||||
for cycles in occ_ports:
|
||||
if cycles == 0:
|
||||
line += '|' + ' ' * 6
|
||||
elif cycles >= 10:
|
||||
line += '|{:^6.1f}'.format(cycles)
|
||||
else:
|
||||
line += '|{:^6.2f}'.format(cycles)
|
||||
line += '| ' + instr_name + '\n'
|
||||
return line
|
||||
|
||||
def get_port_naming(self):
|
||||
"""
|
||||
Return list of port names
|
||||
|
||||
:return: list of strings
|
||||
"""
|
||||
return sorted([str(i) for i in range(self.ports)] + self.pipeline_ports)
|
||||
|
||||
def get_port_binding(self, port_bndg):
|
||||
"""
|
||||
Create port binding out of scheduling result.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
port_bndg : [int, ...]
|
||||
Integer list containing port bindings
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the port binding graphical output
|
||||
"""
|
||||
col_widths = self.get_column_widths(port_bndg)
|
||||
header = 'Port Binding in Cycles Per Iteration:\n'
|
||||
horiz_line = '-' * 10 + '-' * (sum(col_widths) + len(col_widths)) + '\n'
|
||||
port_line = '| Port |'
|
||||
for i, port_name in enumerate(self.get_port_naming()):
|
||||
port_line += port_name.center(col_widths[i]) + '|'
|
||||
port_line += '\n'
|
||||
cyc_line = '| Cycles |'
|
||||
for i in range(len(port_bndg)):
|
||||
cyc_line += '{}|'.format(str(round(port_bndg[i], 2)).center(col_widths[i]))
|
||||
cyc_line += '\n'
|
||||
binding = header + horiz_line + port_line + horiz_line + cyc_line + horiz_line
|
||||
return binding
|
||||
|
||||
def get_column_widths(self, port_bndg):
|
||||
return [max(len(str(round(x, 2))), len(name)) + 2
|
||||
for x, name in zip(port_bndg, self.get_port_naming())]
|
||||
|
||||
def get_operand_suffix(self, instr_form):
|
||||
"""
|
||||
Create operand suffix out of list of Parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
instr_form : [str, Parameter, ..., Parameter, str]
|
||||
Instruction Form data structure
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Operand suffix for searching in data file
|
||||
"""
|
||||
op_ext = []
|
||||
operands = ''
|
||||
if len(instr_form) > 2:
|
||||
operands = '-'
|
||||
for i in range(1, len(instr_form) - 1):
|
||||
if isinstance(instr_form[i], Register) and instr_form[i].reg_type == 'GPR':
|
||||
optmp = 'r' + str(instr_form[i].size)
|
||||
elif isinstance(instr_form[i], MemAddr):
|
||||
optmp = 'mem'
|
||||
else:
|
||||
optmp = str(instr_form[i]).lower()
|
||||
op_ext.append(optmp)
|
||||
operands += '_'.join(op_ext)
|
||||
return operands
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Nothing to do.')
|
||||
193
osaca/frontend.py
Executable file
193
osaca/frontend.py
Executable file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime as dt
|
||||
|
||||
from ruamel import yaml
|
||||
|
||||
from osaca import utils
|
||||
from osaca.semantics import INSTR_FLAGS, KernelDG, SemanticsAppender
|
||||
|
||||
|
||||
class Frontend(object):
|
||||
def __init__(self, filename='', arch=None, path_to_yaml=None):
|
||||
self._filename = filename
|
||||
if not arch and not path_to_yaml:
|
||||
raise ValueError('Either arch or path_to_yaml required.')
|
||||
if arch and path_to_yaml:
|
||||
raise ValueError('Only one of arch and path_to_yaml is allowed.')
|
||||
self._arch = arch
|
||||
if arch:
|
||||
self._arch = arch.lower()
|
||||
with open(utils.find_file(self._arch + '.yml'), 'r') as f:
|
||||
self._data = yaml.load(f, Loader=yaml.Loader)
|
||||
elif path_to_yaml:
|
||||
with open(path_to_yaml, 'r') as f:
|
||||
self._data = yaml.load(f, Loader=yaml.Loader)
|
||||
|
||||
def _is_comment(self, instruction_form):
|
||||
return instruction_form['comment'] is not None and instruction_form['instruction'] is None
|
||||
|
||||
def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
|
||||
lineno_filler = ' ' if show_lineno else ''
|
||||
port_len = self._get_max_port_len(kernel)
|
||||
separator = '-' * sum([x + 3 for x in port_len]) + '-'
|
||||
separator += '--' + len(str(kernel[-1]['line_number'])) * '-' if show_lineno else ''
|
||||
col_sep = '|'
|
||||
sep_list = self._get_separator_list(col_sep)
|
||||
headline = 'Port pressure in cycles'
|
||||
headline_str = '{{:^{}}}'.format(len(separator))
|
||||
|
||||
print('\n\nThroughput Analysis Report\n' + '--------------------------')
|
||||
print(headline_str.format(headline))
|
||||
print(lineno_filler + self._get_port_number_line(port_len))
|
||||
print(separator)
|
||||
for instruction_form in kernel:
|
||||
line = '{:4d} {} {} {}'.format(
|
||||
instruction_form['line_number'],
|
||||
self._get_port_pressure(instruction_form['port_pressure'], port_len, sep_list),
|
||||
self._get_flag_symbols(instruction_form['flags'])
|
||||
if instruction_form['instruction'] is not None
|
||||
else ' ',
|
||||
instruction_form['line'].strip(),
|
||||
)
|
||||
line = line if show_lineno else col_sep + col_sep.join(line.split(col_sep)[1:])
|
||||
if show_cmnts is False and self._is_comment(instruction_form):
|
||||
continue
|
||||
print(line)
|
||||
print()
|
||||
tp_sum = SemanticsAppender.get_throughput_sum(kernel)
|
||||
print(lineno_filler + self._get_port_pressure(tp_sum, port_len, ' '))
|
||||
|
||||
def _get_separator_list(self, separator, separator_2=' '):
|
||||
separator_list = []
|
||||
for i in range(len(self._data['ports']) - 1):
|
||||
match_1 = re.search(r'\d+', self._data['ports'][i])
|
||||
match_2 = re.search(r'\d+', self._data['ports'][i + 1])
|
||||
if match_1 is not None and match_2 is not None and match_1.group() == match_2.group():
|
||||
separator_list.append(separator_2)
|
||||
else:
|
||||
separator_list.append(separator)
|
||||
separator_list.append(separator)
|
||||
return separator_list
|
||||
|
||||
def _get_flag_symbols(self, flag_obj):
|
||||
string_result = ''
|
||||
string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else ''
|
||||
string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else ''
|
||||
string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else ''
|
||||
# TODO add other flags
|
||||
string_result += ' ' if len(string_result) == 0 else ''
|
||||
return string_result
|
||||
|
||||
def _get_port_pressure(self, ports, port_len, separator='|'):
|
||||
if not isinstance(separator, list):
|
||||
separator = [separator for x in ports]
|
||||
string_result = '{} '.format(separator[-1])
|
||||
for i in range(len(ports)):
|
||||
if float(ports[i]) == 0.0:
|
||||
string_result += port_len[i] * ' ' + ' {} '.format(separator[i])
|
||||
continue
|
||||
left_len = len(str(float(ports[i])).split('.')[0])
|
||||
substr = '{:' + str(left_len) + '.' + str(max(port_len[i] - left_len - 1, 0)) + 'f}'
|
||||
string_result += substr.format(ports[i]) + ' {} '.format(separator[i])
|
||||
return string_result[:-1]
|
||||
|
||||
def _get_max_port_len(self, kernel):
|
||||
port_len = [4 for x in self._data['ports']]
|
||||
for instruction_form in kernel:
|
||||
for i, port in enumerate(instruction_form['port_pressure']):
|
||||
if len('{:.2f}'.format(port)) > port_len[i]:
|
||||
port_len[i] = len('{:.2f}'.format(port))
|
||||
return port_len
|
||||
|
||||
def _get_port_number_line(self, port_len, separator='|'):
|
||||
string_result = separator
|
||||
separator_list = self._get_separator_list(separator, '-')
|
||||
for i, length in enumerate(port_len):
|
||||
substr = '{:^' + str(length + 2) + 's}'
|
||||
string_result += substr.format(self._data['ports'][i]) + separator_list[i]
|
||||
return string_result
|
||||
|
||||
def print_latency_analysis(self, cp_kernel, separator='|'):
|
||||
print('\n\nLatency Analysis Report\n' + '-----------------------')
|
||||
for instruction_form in cp_kernel:
|
||||
print(
|
||||
'{:4d} {} {:4.1f} {}{}{} {}'.format(
|
||||
instruction_form['line_number'],
|
||||
separator,
|
||||
instruction_form['latency_cp'],
|
||||
separator,
|
||||
'X' if INSTR_FLAGS.LT_UNKWN in instruction_form['flags'] else ' ',
|
||||
separator,
|
||||
instruction_form['line'],
|
||||
)
|
||||
)
|
||||
print(
|
||||
'\n{:4} {} {:4.1f}'.format(
|
||||
' ' * max([len(str(instr_form['line_number'])) for instr_form in cp_kernel]),
|
||||
' ' * len(separator),
|
||||
sum([instr_form['latency_cp'] for instr_form in cp_kernel]),
|
||||
)
|
||||
)
|
||||
|
||||
def print_loopcarried_dependencies(self, dep_dict, separator='|'):
|
||||
print(
|
||||
'\n\nLoop-Carried Dependencies Analysis Report\n'
|
||||
+ '-----------------------------------------'
|
||||
)
|
||||
# TODO find a way to overcome padding for different tab-lengths
|
||||
for dep in dep_dict:
|
||||
print(
|
||||
'{:4d} {} {:4.1f} {} {:36}{} {}'.format(
|
||||
dep,
|
||||
separator,
|
||||
sum(
|
||||
[
|
||||
instr_form['latency_lcd']
|
||||
for instr_form in dep_dict[dep]['dependencies']
|
||||
]
|
||||
),
|
||||
separator,
|
||||
dep_dict[dep]['root']['line'],
|
||||
separator,
|
||||
[node['line_number'] for node in dep_dict[dep]['dependencies']],
|
||||
)
|
||||
)
|
||||
|
||||
def _print_header_report(self):
|
||||
version = 'v0.3'
|
||||
adjust = 20
|
||||
header = ''
|
||||
header += 'Open Source Architecture Code Analyzer (OSACA) - {}\n'.format(version)
|
||||
header += 'Analyzed file:'.ljust(adjust) + '{}\n'.format(self._filename)
|
||||
header += 'Architecture:'.ljust(adjust) + '{}\n'.format(self._arch)
|
||||
header += 'Timestamp:'.ljust(adjust) + '{}\n'.format(
|
||||
dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
|
||||
)
|
||||
print(header)
|
||||
|
||||
def _print_symbol_map(self):
|
||||
symbol_dict = {
|
||||
INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port',
|
||||
INSTR_FLAGS.TP_UNKWN: 'No throughput/latency information for this instruction in '
|
||||
+ 'data file',
|
||||
INSTR_FLAGS.HIDDEN_LD: 'Throughput of LOAD operation can be hidden behind a past '
|
||||
+ 'or future STORE instruction',
|
||||
}
|
||||
symbol_map = ''
|
||||
for flag in sorted(symbol_dict.keys()):
|
||||
symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]), symbol_dict[flag])
|
||||
|
||||
print(symbol_map, end='')
|
||||
|
||||
def _print_port_binding_summary(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False):
|
||||
self._print_header_report()
|
||||
self._print_symbol_map()
|
||||
self.print_throughput_analysis(kernel, show_lineno=True)
|
||||
self.print_latency_analysis(kernel_dg.get_critical_path())
|
||||
self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())
|
||||
@@ -1,240 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import re
|
||||
import argparse
|
||||
|
||||
from osaca.testcase import Testcase
|
||||
from osaca.param import Register, MemAddr, Parameter
|
||||
#from testcase import Testcase
|
||||
#from param import Register, MemAddr, Parameter
|
||||
|
||||
|
||||
class InstrExtractor(object):
|
||||
filepaths = []
|
||||
# Variables for checking lines
|
||||
numSeps = 0
|
||||
sem = 0
|
||||
db = {}
|
||||
sorted_db = []
|
||||
lncnt = 1
|
||||
cntChar = ''
|
||||
first = True
|
||||
# Constant variables
|
||||
MARKER = r'//STARTLOOP'
|
||||
ASM_LINE = re.compile(r'\s[0-9a-f]+[:]')
|
||||
|
||||
def __init__(self, filepath):
|
||||
self.filepaths = filepath
|
||||
|
||||
def check_all(self):
|
||||
for i in range(0, len(self.filepaths)):
|
||||
self.extract_instr(self.filepaths[i])
|
||||
|
||||
def is_elffile(self, filepath):
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath) as f:
|
||||
src = f.read()
|
||||
if 'format elf64' in src:
|
||||
return True
|
||||
return False
|
||||
|
||||
def extract_instr(self, asm_file):
|
||||
# Check if parameter is in the correct file format
|
||||
if not self.is_elffile(asm_file):
|
||||
print('Invalid argument')
|
||||
return
|
||||
# Open file
|
||||
f = open(asm_file, 'r')
|
||||
# Analyse code line by line and check the instructions
|
||||
self.lncnt = 1
|
||||
for line in f:
|
||||
self.check_line(line)
|
||||
self.lncnt += 1
|
||||
f.close()
|
||||
|
||||
def check_line(self, line):
|
||||
# Check if MARKER is in line and count the number of whitespaces if so
|
||||
if self.MARKER in line:
|
||||
# But first, check if high level code ist indented with whitespaces or tabs
|
||||
if self.first:
|
||||
self.set_counter_char(line)
|
||||
self.first = False
|
||||
self.numSeps = (re.split(self.MARKER, line)[0]).count(self.cntChar)
|
||||
self.sem = 2
|
||||
elif self.sem > 0:
|
||||
# We're in the marked code snipped
|
||||
# Check if the line is ASM code and - if not - check if we're still in the loop
|
||||
match = re.search(self.ASM_LINE, line)
|
||||
if match:
|
||||
# Further analysis of instructions
|
||||
# Check if there are commetns in line
|
||||
if r'//' in line:
|
||||
return
|
||||
self.check_instr(''.join(re.split(r'\t', line)[-1:]))
|
||||
elif (re.split(r'\S', line)[0]).count(self.cntChar) <= self.numSeps:
|
||||
# Not in the loop anymore - or yet - so we decrement the semaphore
|
||||
self.sem = self.sem - 1
|
||||
|
||||
# Check if seperator is either tabulator or whitespace
|
||||
def set_counter_char(self, line):
|
||||
num_spaces = (re.split(self.MARKER, line)[0]).count(' ')
|
||||
num_tabs = (re.split(self.MARKER, line)[0]).count('\t')
|
||||
if num_spaces != 0 and num_tabs == 0:
|
||||
self.cntChar = ' '
|
||||
elif num_spaces == 0 and num_tabs != 0:
|
||||
self.cntChar = '\t'
|
||||
else:
|
||||
err_msg = 'Indentation of code is only supported for whitespaces and tabs.'
|
||||
raise NotImplementedError(err_msg)
|
||||
|
||||
def check_instr(self, instr):
|
||||
# Check for strange clang padding bytes
|
||||
while instr.startswith('data32'):
|
||||
instr = instr[7:]
|
||||
# Seperate mnemonic and operands
|
||||
mnemonic = instr.split()[0]
|
||||
params = ''.join(instr.split()[1:])
|
||||
# Check if line is not only a byte
|
||||
empty_byte = re.compile(r'[0-9a-f]{2}')
|
||||
if re.match(empty_byte, mnemonic) and len(mnemonic) == 2:
|
||||
return
|
||||
# Check if there's one or more operand and store all in a list
|
||||
param_list = self.flatten(self.separate_params(params))
|
||||
op_list = list(param_list)
|
||||
# Check operands and seperate them by IMMEDIATE (IMD), REGISTER (REG), MEMORY (MEM) or
|
||||
# LABEL (LBL)
|
||||
for i in range(len(param_list)):
|
||||
op = param_list[i]
|
||||
if len(op) <= 0:
|
||||
op = Parameter('NONE')
|
||||
elif op[0] == '$':
|
||||
op = Parameter('IMD')
|
||||
elif op[0] == '%' and '(' not in op:
|
||||
j = len(op)
|
||||
opmask = False
|
||||
if '{' in op:
|
||||
j = op.index('{')
|
||||
opmask = True
|
||||
op = Register(op[1:j], opmask)
|
||||
elif '<' in op:
|
||||
op = Parameter('LBL')
|
||||
else:
|
||||
op = MemAddr(op)
|
||||
param_list[i] = str(op) if (type(op) is not Register) else str(op) + str(op.size)
|
||||
op_list[i] = op
|
||||
# Join mnemonic and operand(s) to an instruction form
|
||||
if len(mnemonic) > 7:
|
||||
tabs = '\t'
|
||||
else:
|
||||
tabs = '\t\t'
|
||||
instr_form = mnemonic + tabs + (' '.join(param_list))
|
||||
# Check in data file for instruction form and increment the counter
|
||||
if instr_form in self.db:
|
||||
self.db[instr_form] = self.db[instr_form] + 1
|
||||
else:
|
||||
self.db[instr_form] = 1
|
||||
# Create testcase for instruction form, since it is the first appearance of it
|
||||
# Only create benchmark if no label (LBL) is part of the operands
|
||||
do_bench = True
|
||||
for par in op_list:
|
||||
if str(par) == 'LBL' or str(par) == '':
|
||||
do_bench = False
|
||||
if do_bench:
|
||||
# Create testcase with reversed param list, due to the fact its intel syntax!
|
||||
tc = Testcase(mnemonic, list(reversed(op_list)), '64')
|
||||
tc.write_testcase()
|
||||
|
||||
def separate_params(self, params):
|
||||
param_list = [params]
|
||||
if ',' in params:
|
||||
if ')' in params:
|
||||
if params.index(')') < len(params) - 1 and params[params.index(')') + 1] == ',':
|
||||
i = params.index(')') + 1
|
||||
elif params.index('(') < params.index(','):
|
||||
return param_list
|
||||
else:
|
||||
i = params.index(',')
|
||||
else:
|
||||
i = params.index(',')
|
||||
param_list = [params[:i], self.separate_params(params[i + 1:])]
|
||||
elif '#' in params:
|
||||
i = params.index('#')
|
||||
param_list = [params[:i]]
|
||||
return param_list
|
||||
|
||||
def sort_db(self):
|
||||
self.sorted_db = sorted(self.db.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
def print_sorted_db(self):
|
||||
self.sort_db()
|
||||
total = 0
|
||||
print('Number of\tmnemonic')
|
||||
print('calls\n')
|
||||
for i in range(len(self.sorted_db)):
|
||||
print(str(self.sorted_db[i][1]) + '\t\t' + self.sorted_db[i][0])
|
||||
total += self.sorted_db[i][1]
|
||||
print('\nCumulated number of instructions: ' + str(total))
|
||||
|
||||
def save_db(self):
|
||||
file = open('.cnt_asm_ops.db', 'w')
|
||||
for i in self.db.items():
|
||||
file.write(i[0] + '\t' + str(i[1]) + '\n')
|
||||
file.close()
|
||||
|
||||
def load_db(self):
|
||||
try:
|
||||
file = open('.cnt_asm_ops.db', 'r')
|
||||
except FileNotFoundError:
|
||||
print('no data file found in current directory')
|
||||
return
|
||||
for line in file:
|
||||
mnemonic = line.split('\t')[0]
|
||||
# Join mnemonic and operand(s) to an instruction form
|
||||
if len(mnemonic) > 7:
|
||||
tabs = '\t'
|
||||
params = line.split('\t')[1]
|
||||
num_calls = line.split('\t')[2][:-1]
|
||||
else:
|
||||
tabs = '\t\t'
|
||||
params = line.split('\t')[2]
|
||||
num_calls = line.split('\t')[3][:-1]
|
||||
instr_form = mnemonic + tabs + params
|
||||
self.db[instr_form] = int(num_calls)
|
||||
file.close()
|
||||
|
||||
def flatten(self, l):
|
||||
if not l:
|
||||
return l
|
||||
if isinstance(l[0], list):
|
||||
return self.flatten(l[0]) + self.flatten(l[1:])
|
||||
return l[:1] + self.flatten(l[1:])
|
||||
|
||||
|
||||
def main():
|
||||
# Parse args
|
||||
parser = argparse.ArgumentParser(description='Returns a list of all instruction forms in the '
|
||||
'given files sorted by their number of '
|
||||
'occurrences.')
|
||||
parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.2')
|
||||
parser.add_argument('filepath', nargs='+', help='path to objdump(s)')
|
||||
parser.add_argument('-l', '--load', dest='load', action='store_true',
|
||||
help='load data file before checking new files')
|
||||
parser.add_argument('-s', '--store', dest='store', action='store_true',
|
||||
help='store data file before checking new files')
|
||||
|
||||
# Create object and store arguments as attribute
|
||||
inp = parser.parse_args()
|
||||
ie = InstrExtractor(inp.filepath)
|
||||
|
||||
# Do work
|
||||
if inp.load:
|
||||
ie.load_db()
|
||||
ie.check_all()
|
||||
ie.print_sorted_db()
|
||||
if inp.store:
|
||||
ie.save_db()
|
||||
|
||||
|
||||
# ---------main method----------
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
1018
osaca/osaca.py
1018
osaca/osaca.py
File diff suppressed because it is too large
Load Diff
142
osaca/param.py
142
osaca/param.py
@@ -1,142 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
|
||||
|
||||
class Parameter(object):
|
||||
type_list = ['REG', 'MEM', 'IMD', 'LBL', 'NONE']
|
||||
|
||||
def __init__(self, ptype):
|
||||
self.ptype = ptype.upper()
|
||||
if self.ptype not in self.type_list:
|
||||
raise NameError('Type not supported: '+ptype)
|
||||
|
||||
def __str__(self):
|
||||
"""Return string representation."""
|
||||
if self.ptype == 'NONE':
|
||||
return ''
|
||||
else:
|
||||
return self.ptype
|
||||
|
||||
|
||||
class MemAddr(Parameter):
|
||||
segment_regs = ['CS', 'DS', 'SS', 'ES', 'FS', 'GS']
|
||||
scales = [1, 2, 4, 8]
|
||||
|
||||
def __init__(self, name):
|
||||
super().__init__("MEM")
|
||||
name = name.strip(', \t')
|
||||
self.offset = None
|
||||
self.base = None
|
||||
self.index = None
|
||||
self.scale = None
|
||||
|
||||
m = re.match(r'((?P<offset_hex>[x0-9a-fA-F]*)|(?P<offset_dec>\-?[0-9]*))'
|
||||
r'\((?P<base>[^,\)]+)(?:,\s*(?P<index>[^,\)]+)(?:,\s*'
|
||||
r'(?P<scale>[^,\)]+))?)?\)', name)
|
||||
|
||||
if not m:
|
||||
raise ValueError('Type not supported: {!r}'.format(name))
|
||||
|
||||
self.offset = m.group('offset_dec') or m.group('offset_hex') or None
|
||||
self.base = m.group('base') or None
|
||||
self.index = m.group('index') or None
|
||||
self.scale = m.group('scale') or None
|
||||
|
||||
|
||||
def __str__(self):
|
||||
"""returns string representation"""
|
||||
mem_format = 'MEM('
|
||||
if self.offset:
|
||||
mem_format += 'offset'
|
||||
if self.base and not self.index:
|
||||
mem_format += '(base)'
|
||||
elif self.base and self.index and self.scale:
|
||||
mem_format += '(base, index, scale)'
|
||||
mem_format += ')'
|
||||
return mem_format
|
||||
|
||||
|
||||
class Register(Parameter):
|
||||
sizes = {
|
||||
# General Purpose Registers
|
||||
'AH': (8, 'GPR'), 'AL': (8, 'GPR'), 'BH': (8, 'GPR'), 'BL': (8, 'GPR'), 'CH': (8, 'GPR'),
|
||||
'CL': (8, 'GPR'), 'DH': (8, 'GPR'), 'DL': (8, 'GPR'), 'BPL': (8, 'GPR'), 'SIL': (8, 'GPR'),
|
||||
'DIL': (8, 'GPR'), 'SPL': (8, 'GPR'), 'R8L': (8, 'GPR'), 'R9L': (8, 'GPR'),
|
||||
'R10L': (8, 'GPR'), 'R11L': (8, 'GPR'), 'R12L': (8, 'GPR'), 'R13L': (8, 'GPR'),
|
||||
'R14L': (8, 'GPR'), 'R15L': (8, 'GPR'), 'R8B': (8, 'GPR'), 'R9B': (8, 'GPR'),
|
||||
'R10B': (8, 'GPR'), 'R11B': (8, 'GPR'), 'R12B': (8, 'GPR'), 'R13B': (8, 'GPR'),
|
||||
'R14B': (8, 'GPR'), 'R15B': (8, 'GPR'), 'AX': (16, 'GPR'), 'BC': (16, 'GPR'),
|
||||
'CX': (16, 'GPR'), 'DX': (16, 'GPR'), 'BP': (16, 'GPR'), 'SI': (16, 'GPR'),
|
||||
'DI': (16, 'GPR'), 'SP': (16, 'GPR'), 'R8W': (16, 'GPR'), 'R9W': (16, 'GPR'),
|
||||
'R10W': (16, 'GPR'), 'R11W': (16, 'GPR'), 'R12W': (16, 'GPR'), 'R13W': (16, 'GPR'),
|
||||
'R14W': (16, 'GPR'), 'R15W': (16, 'GPR'), 'EAX': (32, 'GPR'), 'EBX': (32, 'GPR'),
|
||||
'ECX': (32, 'GPR'), 'EDX': (32, 'GPR'), 'EBP': (32, 'GPR'), 'ESI': (32, 'GPR'),
|
||||
'EDI': (32, 'GPR'), 'ESP': (32, 'GPR'), 'R8D': (32, 'GPR'), 'R9D': (32, 'GPR'),
|
||||
'R10D': (32, 'GPR'), 'R11D': (32, 'GPR'), 'R12D': (32, 'GPR'), 'R13D': (32, 'GPR'),
|
||||
'R14D': (32, 'GPR'), 'R15D': (32, 'GPR'), 'RAX': (64, 'GPR'), 'RBX': (64, 'GPR'),
|
||||
'RCX': (64, 'GPR'), 'RDX': (64, 'GPR'), 'RBP': (64, 'GPR'), 'RSI': (64, 'GPR'),
|
||||
'RDI': (64, 'GPR'), 'RSP': (64, 'GPR'), 'R8': (64, 'GPR'), 'R9': (64, 'GPR'),
|
||||
'R10': (64, 'GPR'), 'R11': (64, 'GPR'), 'R12': (64, 'GPR'), 'R13': (64, 'GPR'),
|
||||
'R14': (64, 'GPR'), 'R15': (64, 'GPR'), 'CS': (16, 'GPR'), 'DS': (16, 'GPR'),
|
||||
'SS': (16, 'GPR'), 'ES': (16, 'GPR'), 'FS': (16, 'GPR'), 'GS': (16, 'GPR'),
|
||||
'EFLAGS': (32, 'GPR'), 'RFLAGS': (64, 'GPR'), 'EIP': (32, 'GPR'), 'RIP': (64, 'GPR'),
|
||||
# FPU Registers
|
||||
'ST0': (80, 'FPU'), 'ST1': (80, 'FPU'), 'ST2': (80, 'FPU'), 'ST3': (80, 'FPU'),
|
||||
'ST4': (80, 'FPU'), 'ST5': (80, 'FPU'), 'ST6': (80, 'FPU'), 'ST7': (80, 'FPU'),
|
||||
# MMX Registers
|
||||
'MM0': (64, 'MMX'), 'MM1': (64, 'MMX'), 'MM2': (64, 'MMX'), 'MM3': (64, 'MMX'),
|
||||
'MM4': (64, 'MMX'), 'MM5': (64, 'MMX'), 'MM6': (64, 'MMX'), 'MM7': (64, 'MMX'),
|
||||
# XMM Registers
|
||||
'XMM0': (128, 'XMM'), 'XMM1': (128, 'XMM'), 'XMM2': (128, 'XMM'), 'XMM3': (128, 'XMM'),
|
||||
'XMM4': (128, 'XMM'), 'XMM5': (128, 'XMM'), 'XMM6': (128, 'XMM'), 'XMM7': (128, 'XMM'),
|
||||
'XMM8': (128, 'XMM'), 'XMM9': (128, 'XMM'), 'XMM10': (128, 'XMM'), 'XMM11': (128, 'XMM'),
|
||||
'XMM12': (128, 'XMM'), 'XMM13': (128, 'XMM'), 'XMM14': (128, 'XMM'), 'XMM15': (128, 'XMM'),
|
||||
'XMM16': (128, 'XMM'), 'XMM17': (128, 'XMM'), 'XMM18': (128, 'XMM'), 'XMM19': (128, 'XMM'),
|
||||
'XMM20': (128, 'XMM'), 'XMM21': (128, 'XMM'), 'XMM22': (128, 'XMM'), 'XMM23': (128, 'XMM'),
|
||||
'XMM24': (128, 'XMM'), 'XMM25': (128, 'XMM'), 'XMM26': (128, 'XMM'), 'XMM27': (128, 'XMM'),
|
||||
'XMM28': (128, 'XMM'), 'XMM29': (128, 'XMM'), 'XMM30': (128, 'XMM'), 'XMM31': (128, 'XMM'),
|
||||
# YMM Registers
|
||||
'YMM0': (256, 'YMM'), 'YMM1': (256, 'YMM'), 'YMM2': (256, 'YMM'), 'YMM3': (256, 'YMM'),
|
||||
'YMM4': (256, 'YMM'), 'YMM5': (256, 'YMM'), 'YMM6': (256, 'YMM'), 'YMM7': (256, 'YMM'),
|
||||
'YMM8': (256, 'YMM'), 'YMM9': (256, 'YMM'), 'YMM10': (256, 'YMM'), 'YMM11': (256, 'YMM'),
|
||||
'YMM12': (256, 'YMM'), 'YMM13': (256, 'YMM'), 'YMM14': (256, 'YMM'), 'YMM15': (256, 'YMM'),
|
||||
'YMM16': (256, 'YMM'), 'YMM17': (256, 'YMM'), 'YMM18': (256, 'YMM'), 'YMM19': (256, 'YMM'),
|
||||
'YMM20': (256, 'YMM'), 'YMM21': (256, 'YMM'), 'YMM22': (256, 'YMM'), 'YMM23': (256, 'YMM'),
|
||||
'YMM24': (256, 'YMM'), 'YMM25': (256, 'YMM'), 'YMM26': (256, 'YMM'), 'YMM27': (256, 'YMM'),
|
||||
'YMM28': (256, 'YMM'), 'YMM29': (256, 'YMM'), 'YMM30': (256, 'YMM'), 'YMM31': (256, 'YMM'),
|
||||
# ZMM Registers
|
||||
'ZMM0': (512, 'ZMM'), 'ZMM1': (512, 'ZMM'), 'ZMM2': (512, 'ZMM'), 'ZMM3': (512, 'ZMM'),
|
||||
'ZMM4': (512, 'ZMM'), 'ZMM5': (512, 'ZMM'), 'ZMM6': (512, 'ZMM'), 'ZMM7': (512, 'ZMM'),
|
||||
'ZMM8': (512, 'ZMM'), 'ZMM9': (512, 'ZMM'), 'ZMM10': (512, 'ZMM'), 'ZMM11': (512, 'ZMM'),
|
||||
'ZMM12': (512, 'ZMM'), 'ZMM13': (512, 'ZMM'), 'ZMM14': (512, 'ZMM'), 'ZMM15': (512, 'ZMM'),
|
||||
'ZMM16': (512, 'ZMM'), 'ZMM17': (512, 'ZMM'), 'ZMM18': (512, 'ZMM'), 'ZMM19': (512, 'ZMM'),
|
||||
'ZMM20': (512, 'ZMM'), 'ZMM21': (512, 'ZMM'), 'ZMM22': (512, 'ZMM'), 'ZMM23': (512, 'ZMM'),
|
||||
'ZMM24': (512, 'ZMM'), 'ZMM25': (512, 'ZMM'), 'ZMM26': (512, 'ZMM'), 'ZMM27': (512, 'ZMM'),
|
||||
'ZMM28': (512, 'ZMM'), 'ZMM29': (512, 'ZMM'), 'ZMM30': (512, 'ZMM'), 'ZMM31': (512, 'ZMM'),
|
||||
# Opmask Register
|
||||
'K0': (64, 'K'), 'K1': (64, 'K'), 'K2': (64, 'K'), 'K3': (64, 'K'), 'K4': (64, 'K'),
|
||||
'K5': (64, 'K'), 'K6': (64, 'K'), 'K7': (64, 'K'),
|
||||
# Bounds Registers
|
||||
'BND0': (128, 'BND'), 'BND1': (128, 'BND'), 'BND2': (128, 'BND'), 'BND3': (128, 'BND'),
|
||||
# Registers in gerneral
|
||||
'R16': (16, 'GPR'), 'R32': (32, 'GPR'), 'R64': (64, 'GPR'), 'FPU': (80, 'FPU'),
|
||||
'MMX': (64, 'MMX'), 'XMM': (128, 'XMM'), 'YMM': (256, 'YMM'), 'ZMM': (512, 'ZMM'),
|
||||
'K': (64, 'K'), 'BND': (128, 'BND')
|
||||
}
|
||||
|
||||
def __init__(self, name, mask=False):
|
||||
super().__init__("REG")
|
||||
self.name = name.upper()
|
||||
self.mask = mask
|
||||
if self.name in self.sizes:
|
||||
self.size = self.sizes[self.name][0]
|
||||
self.reg_type = self.sizes[self.name][1]
|
||||
else:
|
||||
raise NameError('Register name not in dictionary: {}'.format(self.name))
|
||||
|
||||
def __str__(self):
|
||||
"""Return string representation."""
|
||||
opmask = ''
|
||||
if self.mask:
|
||||
opmask = '{opmask}'
|
||||
return self.reg_type + opmask
|
||||
19
osaca/parser/__init__.py
Normal file
19
osaca/parser/__init__.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Collection of parsers supported by OSACA.
|
||||
|
||||
Only the parser below will be exported, so please add new parsers to __all__.
|
||||
"""
|
||||
from .attr_dict import AttrDict
|
||||
from .base_parser import BaseParser
|
||||
from .parser_x86att import ParserX86ATT
|
||||
from .parser_AArch64v81 import ParserAArch64v81
|
||||
|
||||
__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64v81', 'get_parser']
|
||||
|
||||
def get_parser(isa):
|
||||
if isa.lower() == 'x86':
|
||||
return ParserX86ATT()
|
||||
elif isa.lower() == 'aarch64':
|
||||
return ParserAArch64v81()
|
||||
else:
|
||||
raise ValueError("Unknown ISA {!r}.".format(isa))
|
||||
23
osaca/parser/attr_dict.py
Executable file
23
osaca/parser/attr_dict.py
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
class AttrDict(dict):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AttrDict, self).__init__(*args, **kwargs)
|
||||
self.__dict__ = self
|
||||
|
||||
@staticmethod
|
||||
def convert_dict(dictionary):
|
||||
if isinstance(dictionary, type(list())):
|
||||
return [AttrDict.convert_dict(x) for x in dictionary]
|
||||
if isinstance(dictionary, type(dict())):
|
||||
for key in list(dictionary.keys()):
|
||||
entry = dictionary[key]
|
||||
if isinstance(entry, type(dict())) or isinstance(
|
||||
entry, type(AttrDict())
|
||||
):
|
||||
dictionary[key] = AttrDict.convert_dict(dictionary[key])
|
||||
if isinstance(entry, type(list())):
|
||||
dictionary[key] = [AttrDict.convert_dict(x) for x in entry]
|
||||
return AttrDict(dictionary)
|
||||
return dictionary
|
||||
74
osaca/parser/base_parser.py
Executable file
74
osaca/parser/base_parser.py
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
# Identifiers for operand types
|
||||
COMMENT_ID = 'comment'
|
||||
DIRECTIVE_ID = 'directive'
|
||||
IMMEDIATE_ID = 'immediate'
|
||||
LABEL_ID = 'label'
|
||||
MEMORY_ID = 'memory'
|
||||
REGISTER_ID = 'register'
|
||||
INSTRUCTION_ID = 'instruction'
|
||||
OPERANDS_ID = 'operands'
|
||||
|
||||
def __init__(self):
|
||||
self.construct_parser()
|
||||
|
||||
def parse_file(self, file_content, start_line=0):
|
||||
'''
|
||||
Parse assembly file. This includes *not* extracting of the marked kernel and
|
||||
the parsing of the instruction forms.
|
||||
|
||||
:param str file_content: assembly code
|
||||
:param int start_line: offset, if first line in file_content is meant to be not 1
|
||||
:return: list of instruction forms
|
||||
'''
|
||||
# Create instruction form list
|
||||
asm_instructions = []
|
||||
lines = file_content.split('\n')
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip() == '':
|
||||
continue
|
||||
asm_instructions.append(self.parse_line(line, i + 1 + start_line))
|
||||
return asm_instructions
|
||||
|
||||
def parse_line(self, line, line_number=None):
|
||||
# Done in derived classes
|
||||
raise NotImplementedError
|
||||
|
||||
def parse_instruction(self, instruction):
|
||||
# Done in derived classes
|
||||
raise NotImplementedError
|
||||
|
||||
def parse_register(self, register_string):
|
||||
raise NotImplementedError
|
||||
|
||||
def is_gpr(self, register):
|
||||
raise NotImplementedError
|
||||
|
||||
def is_vector_register(self, register):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_reg_type(self, register):
|
||||
raise NotImplementedError
|
||||
|
||||
def construct_parser(self):
|
||||
return
|
||||
# raise NotImplementedError
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
def process_operand(self, operand):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_full_reg_name(self, register):
|
||||
raise NotImplementedError
|
||||
|
||||
def normalize_imd(self, imd):
|
||||
raise NotImplementedError
|
||||
|
||||
def is_reg_dependend_of(self, reg_a, reg_b):
|
||||
raise NotImplementedError
|
||||
421
osaca/parser/parser_AArch64v81.py
Executable file
421
osaca/parser/parser_AArch64v81.py
Executable file
@@ -0,0 +1,421 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import pyparsing as pp
|
||||
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
|
||||
|
||||
class ParserAArch64v81(BaseParser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def construct_parser(self):
|
||||
# Comment
|
||||
symbol_comment = '//'
|
||||
self.comment = pp.Literal(symbol_comment) + pp.Group(
|
||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||
).setResultsName(self.COMMENT_ID)
|
||||
# Define ARM assembly identifier
|
||||
relocation = pp.Combine(pp.Literal(':') + pp.Word(pp.alphanums + '_') + pp.Literal(':'))
|
||||
first = pp.Word(pp.alphas + '_.', exact=1)
|
||||
rest = pp.Word(pp.alphanums + '_.')
|
||||
identifier = pp.Group(
|
||||
pp.Optional(relocation).setResultsName('relocation')
|
||||
+ pp.Combine(first + pp.Optional(rest)).setResultsName('name')
|
||||
).setResultsName('identifier')
|
||||
# Label
|
||||
self.label = pp.Group(
|
||||
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
|
||||
).setResultsName(self.LABEL_ID)
|
||||
# Directive
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
|
||||
).setResultsName('value')
|
||||
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
|
||||
directive_option = pp.Combine(
|
||||
pp.Word(pp.alphas + '#@.%', exact=1)
|
||||
+ pp.Optional(pp.Word(pp.printables + ' ', excludeChars=','))
|
||||
)
|
||||
directive_parameter = (
|
||||
pp.quotedString | directive_option | identifier | hex_number | decimal_number
|
||||
)
|
||||
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',')
|
||||
self.directive = pp.Group(
|
||||
pp.Literal('.')
|
||||
+ pp.Word(pp.alphanums + '_').setResultsName('name')
|
||||
+ commaSeparatedList.setResultsName('parameters')
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.DIRECTIVE_ID)
|
||||
|
||||
##############################
|
||||
# Instructions
|
||||
# Mnemonic
|
||||
# (?P<instr>[a-zA-Z][a-zA-Z0-9]*)(?P<setflg>S?)(P?<CC>.[a-zA-Z]{2})
|
||||
mnemonic = pp.Word(pp.alphanums + '.').setResultsName('mnemonic')
|
||||
# Immediate:
|
||||
# int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+ | fp: ^[0-9]{1}.[0-9]+[eE]{1}[\+-]{1}[0-9]+[fF]?
|
||||
symbol_immediate = '#'
|
||||
mantissa = pp.Combine(
|
||||
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) + pp.Literal('.') + pp.Word(pp.nums)
|
||||
).setResultsName('mantissa')
|
||||
exponent = (
|
||||
pp.CaselessLiteral('e')
|
||||
+ pp.Word('+-').setResultsName('e_sign')
|
||||
+ pp.Word(pp.nums).setResultsName('exponent')
|
||||
)
|
||||
float_ = pp.Group(
|
||||
mantissa + pp.Optional(exponent) + pp.CaselessLiteral('f')
|
||||
).setResultsName('float')
|
||||
double_ = pp.Group(mantissa + pp.Optional(exponent)).setResultsName('double')
|
||||
immediate = pp.Group(
|
||||
pp.Optional(pp.Literal(symbol_immediate))
|
||||
+ (hex_number ^ decimal_number ^ float_ ^ double_)
|
||||
| (pp.Optional(pp.Literal(symbol_immediate)) + identifier)
|
||||
).setResultsName(self.IMMEDIATE_ID)
|
||||
shift_op = (
|
||||
pp.CaselessLiteral('lsl')
|
||||
^ pp.CaselessLiteral('lsr')
|
||||
^ pp.CaselessLiteral('asr')
|
||||
^ pp.CaselessLiteral('ror')
|
||||
^ pp.CaselessLiteral('sxtw')
|
||||
^ pp.CaselessLiteral('uxtw')
|
||||
)
|
||||
arith_immediate = pp.Group(
|
||||
immediate.setResultsName('base_immediate')
|
||||
+ pp.Suppress(pp.Literal(','))
|
||||
+ shift_op.setResultsName('shift_op')
|
||||
+ immediate.setResultsName('shift')
|
||||
).setResultsName(self.IMMEDIATE_ID)
|
||||
# Register:
|
||||
# scalar: [XWBHSDQ][0-9]{1,2} | vector: V[0-9]{1,2}\.[12468]{1,2}[BHSD]()?
|
||||
# define SP and ZR register aliases as regex, due to pyparsing does not support
|
||||
# proper lookahead
|
||||
alias_r31_sp = pp.Regex('(?P<prefix>[a-zA-Z])?(?P<name>(sp|SP))')
|
||||
alias_r31_zr = pp.Regex('(?P<prefix>[a-zA-Z])?(?P<name>(zr|ZR))')
|
||||
scalar = pp.Word(pp.alphas, exact=1).setResultsName('prefix') + pp.Word(
|
||||
pp.nums
|
||||
).setResultsName('name')
|
||||
index = pp.Literal('[') + pp.Word(pp.nums).setResultsName('index') + pp.Literal(']')
|
||||
vector = (
|
||||
pp.CaselessLiteral('v').setResultsName('prefix')
|
||||
+ pp.Word(pp.nums).setResultsName('name')
|
||||
+ pp.Literal('.')
|
||||
+ pp.Optional(pp.Word('12468')).setResultsName('lanes')
|
||||
+ pp.Word(pp.alphas, exact=1).setResultsName('shape')
|
||||
+ pp.Optional(index)
|
||||
)
|
||||
self.list_element = vector ^ scalar
|
||||
register_list = (
|
||||
pp.Literal('{')
|
||||
+ (
|
||||
pp.delimitedList(pp.Combine(self.list_element), delim=',').setResultsName('list')
|
||||
^ pp.delimitedList(pp.Combine(self.list_element), delim='-').setResultsName(
|
||||
'range'
|
||||
)
|
||||
)
|
||||
+ pp.Literal('}')
|
||||
+ pp.Optional(index)
|
||||
)
|
||||
register = pp.Group(
|
||||
(alias_r31_sp | alias_r31_zr | vector | scalar | register_list)
|
||||
+ pp.Optional(
|
||||
pp.Suppress(pp.Literal(','))
|
||||
+ shift_op.setResultsName('shift_op')
|
||||
+ immediate.setResultsName('shift')
|
||||
)
|
||||
).setResultsName(self.REGISTER_ID)
|
||||
# Memory
|
||||
register_index = register.setResultsName('index') + pp.Optional(
|
||||
pp.Literal(',') + pp.Word(pp.alphas) + immediate.setResultsName('scale')
|
||||
)
|
||||
memory = pp.Group(
|
||||
pp.Literal('[')
|
||||
+ pp.Optional(register.setResultsName('base'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(register_index ^ immediate.setResultsName('offset'))
|
||||
+ pp.Literal(']')
|
||||
+ pp.Optional(
|
||||
pp.Literal('!').setResultsName('pre_indexed')
|
||||
| (pp.Suppress(pp.Literal(',')) + immediate.setResultsName('post_indexed'))
|
||||
)
|
||||
).setResultsName(self.MEMORY_ID)
|
||||
prefetch_op = pp.Group(
|
||||
pp.Group(pp.CaselessLiteral('PLD') ^ pp.CaselessLiteral('PST')).setResultsName('type')
|
||||
+ pp.Group(
|
||||
pp.CaselessLiteral('L1') ^ pp.CaselessLiteral('L2') ^ pp.CaselessLiteral('L3')
|
||||
).setResultsName('target')
|
||||
+ pp.Group(pp.CaselessLiteral('KEEP') ^ pp.CaselessLiteral('STRM')).setResultsName(
|
||||
'policy'
|
||||
)
|
||||
).setResultsName('prfop')
|
||||
# Combine to instruction form
|
||||
operand_first = pp.Group(
|
||||
register ^ (prefetch_op | immediate) ^ memory ^ arith_immediate ^ identifier
|
||||
)
|
||||
operand_rest = pp.Group((register ^ immediate ^ memory ^ arith_immediate) | identifier)
|
||||
self.instruction_parser = (
|
||||
mnemonic
|
||||
+ pp.Optional(operand_first.setResultsName('operand1'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(operand_rest.setResultsName('operand2'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(operand_rest.setResultsName('operand3'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(operand_rest.setResultsName('operand4'))
|
||||
+ pp.Optional(self.comment)
|
||||
)
|
||||
|
||||
def parse_line(self, line, line_number=None):
|
||||
"""
|
||||
Parse line and return instruction form.
|
||||
|
||||
:param str line: line of assembly code
|
||||
:param int line_id: default None, identifier of instruction form
|
||||
:return: parsed instruction form
|
||||
"""
|
||||
instruction_form = AttrDict(
|
||||
{
|
||||
self.INSTRUCTION_ID: None,
|
||||
self.OPERANDS_ID: None,
|
||||
self.DIRECTIVE_ID: None,
|
||||
self.COMMENT_ID: None,
|
||||
self.LABEL_ID: None,
|
||||
'line': line.strip(),
|
||||
'line_number': line_number,
|
||||
}
|
||||
)
|
||||
result = None
|
||||
|
||||
# 1. Parse comment
|
||||
try:
|
||||
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.COMMENT_ID] = ' '.join(result[self.COMMENT_ID])
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 2. Parse label
|
||||
if result is None:
|
||||
try:
|
||||
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
|
||||
if self.COMMENT_ID in result[self.LABEL_ID]:
|
||||
instruction_form[self.COMMENT_ID] = ' '.join(
|
||||
result[self.LABEL_ID][self.COMMENT_ID]
|
||||
)
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 3. Parse directive
|
||||
if result is None:
|
||||
try:
|
||||
result = self.process_operand(
|
||||
self.directive.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.DIRECTIVE_ID] = AttrDict(
|
||||
{
|
||||
'name': result[self.DIRECTIVE_ID].name,
|
||||
'parameters': result[self.DIRECTIVE_ID].parameters,
|
||||
}
|
||||
)
|
||||
if self.COMMENT_ID in result[self.DIRECTIVE_ID]:
|
||||
instruction_form[self.COMMENT_ID] = ' '.join(
|
||||
result[self.DIRECTIVE_ID][self.COMMENT_ID]
|
||||
)
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 4. Parse instruction
|
||||
if result is None:
|
||||
try:
|
||||
result = self.parse_instruction(line)
|
||||
except (pp.ParseException, KeyError):
|
||||
print(
|
||||
'\n\n*-*-*-*-*-*-*-*-*-*-\n{}: {}\n*-*-*-*-*-*-*-*-*-*-\n\n'.format(
|
||||
line_number, line
|
||||
)
|
||||
)
|
||||
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
||||
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
||||
instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
|
||||
|
||||
return instruction_form
|
||||
|
||||
def parse_instruction(self, instruction):
|
||||
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
|
||||
result = AttrDict.convert_dict(result)
|
||||
operands = []
|
||||
# Add operands to list
|
||||
# Check first operand
|
||||
if 'operand1' in result:
|
||||
operands.append(self.process_operand(result['operand1']))
|
||||
# Check second operand
|
||||
if 'operand2' in result:
|
||||
operands.append(self.process_operand(result['operand2']))
|
||||
# Check third operand
|
||||
if 'operand3' in result:
|
||||
operands.append(self.process_operand(result['operand3']))
|
||||
# Check fourth operand
|
||||
if 'operand4' in result:
|
||||
operands.append(self.process_operand(result['operand4']))
|
||||
|
||||
return_dict = AttrDict(
|
||||
{
|
||||
self.INSTRUCTION_ID: result.mnemonic,
|
||||
self.OPERANDS_ID: operands,
|
||||
self.COMMENT_ID: ' '.join(result[self.COMMENT_ID])
|
||||
if self.COMMENT_ID in result
|
||||
else None,
|
||||
}
|
||||
)
|
||||
return return_dict
|
||||
|
||||
def process_operand(self, operand):
|
||||
# structure memory addresses
|
||||
if self.MEMORY_ID in operand:
|
||||
return self.substitute_memory_address(operand[self.MEMORY_ID])
|
||||
# structure register lists
|
||||
if self.REGISTER_ID in operand and (
|
||||
'list' in operand[self.REGISTER_ID] or 'range' in operand[self.REGISTER_ID]
|
||||
):
|
||||
# TODO: discuss if ranges should be converted to lists
|
||||
return self.substitute_register_list(operand[self.REGISTER_ID])
|
||||
if self.REGISTER_ID in operand and operand[self.REGISTER_ID]['name'] == 'sp':
|
||||
return self.substitute_sp_register(operand[self.REGISTER_ID])
|
||||
# add value attribute to floating point immediates without exponent
|
||||
if self.IMMEDIATE_ID in operand:
|
||||
return self.substitute_immediate(operand[self.IMMEDIATE_ID])
|
||||
if self.LABEL_ID in operand:
|
||||
return self.substitute_label(operand[self.LABEL_ID])
|
||||
return operand
|
||||
|
||||
def substitute_memory_address(self, memory_address):
|
||||
# Remove unnecessarily created dictionary entries during parsing
|
||||
offset = None if 'offset' not in memory_address else memory_address['offset']
|
||||
base = None if 'base' not in memory_address else memory_address['base']
|
||||
index = None if 'index' not in memory_address else memory_address['index']
|
||||
scale = 1
|
||||
if base is not None and 'name' in base and base['name'] == 'sp':
|
||||
base['prefix'] = 'x'
|
||||
if index is not None and 'name' in index and index['name'] == 'sp':
|
||||
index['prefix'] = 'x'
|
||||
valid_shift_ops = ['lsl', 'uxtw', 'sxtw']
|
||||
if 'index' in memory_address:
|
||||
if 'shift' in memory_address['index']:
|
||||
if memory_address['index']['shift_op'].lower() in valid_shift_ops:
|
||||
scale = 2 ** int(memory_address['index']['shift']['value'])
|
||||
new_dict = AttrDict({'offset': offset, 'base': base, 'index': index, 'scale': scale})
|
||||
if 'pre_indexed' in memory_address:
|
||||
new_dict['pre_indexed'] = True
|
||||
if 'post_indexed' in memory_address:
|
||||
new_dict['post_indexed'] = memory_address['post_indexed']
|
||||
return AttrDict({self.MEMORY_ID: new_dict})
|
||||
|
||||
def substitute_sp_register(self, register):
|
||||
reg = register
|
||||
reg['prefix'] = 'x'
|
||||
return AttrDict({self.REGISTER_ID: reg})
|
||||
|
||||
def substitute_register_list(self, register_list):
|
||||
# Remove unnecessarily created dictionary entries during parsing
|
||||
vlist = []
|
||||
dict_name = ''
|
||||
if 'list' in register_list:
|
||||
dict_name = 'list'
|
||||
if 'range' in register_list:
|
||||
dict_name = 'range'
|
||||
for v in register_list[dict_name]:
|
||||
vlist.append(
|
||||
AttrDict.convert_dict(self.list_element.parseString(v, parseAll=True).asDict())
|
||||
)
|
||||
index = None if 'index' not in register_list else register_list['index']
|
||||
new_dict = AttrDict({dict_name: vlist, 'index': index})
|
||||
return AttrDict({self.REGISTER_ID: new_dict})
|
||||
|
||||
def substitute_immediate(self, immediate):
|
||||
dict_name = ''
|
||||
if 'identifier' in immediate:
|
||||
# actually an identifier, change declaration
|
||||
return immediate
|
||||
if 'value' in immediate:
|
||||
# normal integer value, nothing to do
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
if 'base_immediate' in immediate:
|
||||
# arithmetic immediate, nothing to do
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
if 'float' in immediate:
|
||||
dict_name = 'float'
|
||||
if 'double' in immediate:
|
||||
dict_name = 'double'
|
||||
if 'exponent' in immediate[dict_name]:
|
||||
# nothing to do
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
else:
|
||||
# change 'mantissa' key to 'value'
|
||||
return AttrDict(
|
||||
{self.IMMEDIATE_ID: AttrDict({'value': immediate[dict_name]['mantissa']})}
|
||||
)
|
||||
|
||||
def substitute_label(self, label):
|
||||
# remove duplicated 'name' level due to identifier
|
||||
label['name'] = label['name']['name']
|
||||
return AttrDict({self.LABEL_ID: label})
|
||||
|
||||
def get_full_reg_name(self, register):
|
||||
if 'lanes' in register:
|
||||
return (
|
||||
register['prefix']
|
||||
+ str(register['name'])
|
||||
+ '.'
|
||||
+ str(register['lanes'])
|
||||
+ register['shape']
|
||||
)
|
||||
return register['prefix'] + str(register['name'])
|
||||
|
||||
def normalize_imd(self, imd):
|
||||
if 'value' in imd:
|
||||
if imd['value'].lower().startswith('0x'):
|
||||
# hex, return decimal
|
||||
return int(imd['value'], 16)
|
||||
return int(imd['value'], 10)
|
||||
elif 'float' in imd:
|
||||
return self.ieee_to_int(imd['float'])
|
||||
elif 'double' in imd:
|
||||
return self.ieee_to_int(imd['double'])
|
||||
# identifier
|
||||
return imd
|
||||
|
||||
def ieee_to_int(self, ieee_val):
|
||||
exponent = int(ieee_val['exponent'], 10)
|
||||
if ieee_val['e_sign'] == '-':
|
||||
exponent *= -1
|
||||
return float(ieee_val['mantissa']) * (10 ** exponent)
|
||||
|
||||
def parse_register(self, register_string):
|
||||
raise NotImplementedError
|
||||
|
||||
def is_gpr(self, register):
|
||||
if register['prefix'] in 'wx':
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_vector_register(self, register):
|
||||
if register['prefix'] in 'bhsdqv':
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_reg_dependend_of(self, reg_a, reg_b):
|
||||
prefixes_gpr = 'wx'
|
||||
prefixes_vec = 'bhsdqv'
|
||||
if reg_a['name'] == reg_b['name']:
|
||||
if reg_a['prefix'].lower() in prefixes_gpr and reg_b['prefix'].lower() in prefixes_gpr:
|
||||
return True
|
||||
if reg_a['prefix'].lower() in prefixes_vec and reg_b['prefix'].lower() in prefixes_vec:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_reg_type(self, register):
|
||||
return register['prefix']
|
||||
328
osaca/parser/parser_x86att.py
Executable file
328
osaca/parser/parser_x86att.py
Executable file
@@ -0,0 +1,328 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import pyparsing as pp
|
||||
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
|
||||
|
||||
class ParserX86ATT(BaseParser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def construct_parser(self):
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
|
||||
).setResultsName('value')
|
||||
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
|
||||
# Comment
|
||||
symbol_comment = '#'
|
||||
self.comment = pp.Literal(symbol_comment) + pp.Group(
|
||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||
).setResultsName(self.COMMENT_ID)
|
||||
# Define x86 assembly identifier
|
||||
id_offset = pp.Word(pp.nums) + pp.Suppress(pp.Literal('+'))
|
||||
first = pp.Word(pp.alphas + '_.', exact=1)
|
||||
rest = pp.Word(pp.alphanums + '$_.')
|
||||
identifier = pp.Group(
|
||||
pp.Optional(id_offset).setResultsName('offset')
|
||||
+ pp.Combine(first + pp.Optional(rest)).setResultsName('name')
|
||||
).setResultsName('identifier')
|
||||
# Label
|
||||
self.label = pp.Group(
|
||||
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
|
||||
).setResultsName(self.LABEL_ID)
|
||||
# Register: pp.Regex('^%[0-9a-zA-Z]+,?')
|
||||
self.register = pp.Group(
|
||||
pp.Literal('%')
|
||||
+ pp.Word(pp.alphanums).setResultsName('name')
|
||||
+ pp.Optional(
|
||||
pp.Literal('{')
|
||||
+ pp.Literal('%')
|
||||
+ pp.Word(pp.alphanums).setResultsName('mask')
|
||||
+ pp.Literal('}')
|
||||
)
|
||||
).setResultsName(self.REGISTER_ID)
|
||||
# Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?')
|
||||
symbol_immediate = '$'
|
||||
immediate = pp.Group(
|
||||
pp.Literal(symbol_immediate) + (hex_number | decimal_number | identifier)
|
||||
).setResultsName(self.IMMEDIATE_ID)
|
||||
# Memory: offset(base, index, scale)
|
||||
offset = pp.Group(identifier | hex_number | decimal_number).setResultsName(
|
||||
self.IMMEDIATE_ID
|
||||
)
|
||||
scale = pp.Word('1248', exact=1)
|
||||
memory = pp.Group(
|
||||
pp.Optional(offset.setResultsName('offset'))
|
||||
+ pp.Literal('(')
|
||||
+ pp.Optional(self.register.setResultsName('base'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(self.register.setResultsName('index'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(scale.setResultsName('scale'))
|
||||
+ pp.Literal(')')
|
||||
).setResultsName(self.MEMORY_ID)
|
||||
|
||||
# Directive
|
||||
directive_option = pp.Combine(
|
||||
pp.Word('#@.', exact=1) + pp.Word(pp.printables, excludeChars=',')
|
||||
)
|
||||
directive_parameter = (pp.quotedString | directive_option | identifier | hex_number |
|
||||
decimal_number | self.register
|
||||
)
|
||||
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',')
|
||||
self.directive = pp.Group(
|
||||
pp.Literal('.')
|
||||
+ pp.Word(pp.alphanums + '_').setResultsName('name')
|
||||
+ commaSeparatedList.setResultsName('parameters')
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.DIRECTIVE_ID)
|
||||
|
||||
# Instructions
|
||||
# Mnemonic
|
||||
mnemonic = pp.ZeroOrMore(pp.Literal('data16') | pp.Literal('data32')) + pp.Word(
|
||||
pp.alphanums
|
||||
).setResultsName('mnemonic')
|
||||
# Combine to instruction form
|
||||
operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier)
|
||||
operand_rest = pp.Group(self.register ^ immediate ^ memory)
|
||||
self.instruction_parser = (
|
||||
mnemonic
|
||||
+ pp.Optional(operand_first.setResultsName('operand1'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(operand_rest.setResultsName('operand2'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(operand_rest.setResultsName('operand3'))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(operand_rest.setResultsName('operand4'))
|
||||
+ pp.Optional(self.comment)
|
||||
)
|
||||
|
||||
def parse_register(self, register_string):
|
||||
try:
|
||||
return self.process_operand(
|
||||
self.register.parseString(register_string, parseAll=True).asDict()
|
||||
)
|
||||
except pp.ParseException:
|
||||
return None
|
||||
|
||||
def parse_line(self, line, line_number=None):
|
||||
"""
|
||||
Parse line and return instruction form.
|
||||
|
||||
:param str line: line of assembly code
|
||||
:param int line_id: default None, identifier of instruction form
|
||||
:return: parsed instruction form
|
||||
"""
|
||||
instruction_form = AttrDict(
|
||||
{
|
||||
self.INSTRUCTION_ID: None,
|
||||
self.OPERANDS_ID: None,
|
||||
self.DIRECTIVE_ID: None,
|
||||
self.COMMENT_ID: None,
|
||||
self.LABEL_ID: None,
|
||||
'line': line.strip(),
|
||||
'line_number': line_number,
|
||||
}
|
||||
)
|
||||
result = None
|
||||
|
||||
# 1. Parse comment
|
||||
try:
|
||||
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.COMMENT_ID] = ' '.join(result[self.COMMENT_ID])
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 2. Parse label
|
||||
if result is None:
|
||||
try:
|
||||
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.LABEL_ID] = result[self.LABEL_ID]['name']
|
||||
if self.COMMENT_ID in result[self.LABEL_ID]:
|
||||
instruction_form[self.COMMENT_ID] = ' '.join(
|
||||
result[self.LABEL_ID][self.COMMENT_ID]
|
||||
)
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 3. Parse directive
|
||||
if result is None:
|
||||
try:
|
||||
result = self.process_operand(
|
||||
self.directive.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.DIRECTIVE_ID] = AttrDict(
|
||||
{
|
||||
'name': result[self.DIRECTIVE_ID]['name'],
|
||||
'parameters': result[self.DIRECTIVE_ID]['parameters'],
|
||||
}
|
||||
)
|
||||
if self.COMMENT_ID in result[self.DIRECTIVE_ID]:
|
||||
instruction_form[self.COMMENT_ID] = ' '.join(
|
||||
result[self.DIRECTIVE_ID][self.COMMENT_ID]
|
||||
)
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 4. Parse instruction
|
||||
if result is None:
|
||||
try:
|
||||
result = self.parse_instruction(line)
|
||||
except pp.ParseException as e:
|
||||
raise ValueError('Could not parse instruction on line {}: {!r}'.format(
|
||||
line_number, line))
|
||||
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
||||
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
||||
instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
|
||||
|
||||
return instruction_form
|
||||
|
||||
def parse_instruction(self, instruction):
|
||||
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
|
||||
result = AttrDict.convert_dict(result)
|
||||
operands = []
|
||||
# Add operands to list
|
||||
# Check first operand
|
||||
if 'operand1' in result:
|
||||
operands.append(self.process_operand(result['operand1']))
|
||||
# Check second operand
|
||||
if 'operand2' in result:
|
||||
operands.append(self.process_operand(result['operand2']))
|
||||
# Check third operand
|
||||
if 'operand3' in result:
|
||||
operands.append(self.process_operand(result['operand3']))
|
||||
# Check fourth operand
|
||||
if 'operand4' in result:
|
||||
operands.append(self.process_operand(result['operand4']))
|
||||
return_dict = AttrDict(
|
||||
{
|
||||
self.INSTRUCTION_ID: result['mnemonic'],
|
||||
self.OPERANDS_ID: operands,
|
||||
self.COMMENT_ID:
|
||||
' '.join(result[self.COMMENT_ID]) if self.COMMENT_ID in result else None,
|
||||
}
|
||||
)
|
||||
return return_dict
|
||||
|
||||
def process_operand(self, operand):
|
||||
# For the moment, only used to structure memory addresses
|
||||
if self.MEMORY_ID in operand:
|
||||
return self.substitute_memory_address(operand[self.MEMORY_ID])
|
||||
if self.IMMEDIATE_ID in operand:
|
||||
return self.substitue_immediate(operand[self.IMMEDIATE_ID])
|
||||
if self.LABEL_ID in operand:
|
||||
return self.substitute_label(operand[self.LABEL_ID])
|
||||
return operand
|
||||
|
||||
def substitute_memory_address(self, memory_address):
|
||||
# Remove unecessarily created dictionary entries during memory address parsing
|
||||
offset = None if 'offset' not in memory_address else memory_address['offset']
|
||||
base = None if 'base' not in memory_address else memory_address['base']
|
||||
index = None if 'index' not in memory_address else memory_address['index']
|
||||
scale = 1 if 'scale' not in memory_address else int(memory_address['scale'])
|
||||
new_dict = AttrDict({'offset': offset, 'base': base, 'index': index, 'scale': scale})
|
||||
return AttrDict({self.MEMORY_ID: new_dict})
|
||||
|
||||
def substitute_label(self, label):
|
||||
# remove duplicated 'name' level due to identifier
|
||||
label['name'] = label['name']['name']
|
||||
return AttrDict({self.LABEL_ID: label})
|
||||
|
||||
def substitue_immediate(self, immediate):
|
||||
if 'identifier' in immediate:
|
||||
# actually an identifier, change declaration
|
||||
return immediate
|
||||
# otherwise nothing to do
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
|
||||
def get_full_reg_name(self, register):
|
||||
# nothing to do
|
||||
return register['name']
|
||||
|
||||
def normalize_imd(self, imd):
|
||||
if 'value' in imd:
|
||||
if imd['value'].lower().startswith('0x'):
|
||||
# hex, return decimal
|
||||
return int(imd['value'], 16)
|
||||
return int(imd['value'], 10)
|
||||
# identifier
|
||||
return imd
|
||||
|
||||
def is_reg_dependend_of(self, reg_a, reg_b):
|
||||
# Check if they are the same registers
|
||||
if reg_a.name == reg_b.name:
|
||||
return True
|
||||
# Check vector registers first
|
||||
if self.is_vector_register(reg_a):
|
||||
if self.is_vector_register(reg_b):
|
||||
if reg_a.name[1:] == reg_b.name[1:]:
|
||||
# Registers in the same vector space
|
||||
return True
|
||||
return False
|
||||
# Check basic GPRs
|
||||
a_dep = ['RAX', 'EAX', 'AX', 'AH', 'AL']
|
||||
b_dep = ['RBX', 'EBX', 'BX', 'BH', 'BL']
|
||||
c_dep = ['RCX', 'ECX', 'CX', 'CH', 'CL']
|
||||
d_dep = ['RDX', 'EDX', 'DX', 'DH', 'DL']
|
||||
sp_dep = ['RSP', 'ESP', 'SP', 'SPL']
|
||||
src_dep = ['RSI', 'ESI', 'SI', 'SIL']
|
||||
dst_dep = ['RDI', 'EDI', 'DI', 'DIL']
|
||||
basic_gprs = [a_dep, b_dep, c_dep, d_dep, sp_dep, src_dep, dst_dep]
|
||||
if self.is_basic_gpr(reg_a):
|
||||
if self.is_basic_gpr(reg_b):
|
||||
for dep_group in basic_gprs:
|
||||
if reg_a['name'].upper() in dep_group:
|
||||
if reg_b['name'].upper() in dep_group:
|
||||
return True
|
||||
return False
|
||||
# Check other GPRs
|
||||
gpr_parser = (
|
||||
pp.CaselessLiteral('R')
|
||||
+ pp.Word(pp.nums).setResultsName('id')
|
||||
+ pp.Optional(pp.Word('dwbDWB', exact=1))
|
||||
)
|
||||
try:
|
||||
id_a = gpr_parser.parseString(reg_a['name'], parseAll=True).asDict()['id']
|
||||
id_b = gpr_parser.parseString(reg_b['name'], parseAll=True).asDict()['id']
|
||||
if id_a == id_b:
|
||||
return True
|
||||
except pp.ParseException:
|
||||
return False
|
||||
# No dependencies
|
||||
return False
|
||||
|
||||
def is_basic_gpr(self, register):
|
||||
if any(char.isdigit() for char in register['name']):
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_gpr(self, register):
|
||||
gpr_parser = (
|
||||
pp.CaselessLiteral('R')
|
||||
+ pp.Word(pp.nums).setResultsName('id')
|
||||
+ pp.Optional(pp.Word('dwbDWB', exact=1))
|
||||
)
|
||||
if self.is_basic_gpr(register):
|
||||
return True
|
||||
else:
|
||||
try:
|
||||
gpr_parser.parseString(register['name'], parseAll=True)
|
||||
return True
|
||||
except pp.ParseException:
|
||||
return False
|
||||
|
||||
def is_vector_register(self, register):
|
||||
if len(register['name']) > 2 and register['name'][1:3].lower() == 'mm':
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_reg_type(self, register):
|
||||
if self.is_gpr(register):
|
||||
return 'gpr'
|
||||
elif self.is_vector_register(register):
|
||||
return register['name'][:3].lower()
|
||||
raise ValueError
|
||||
11
osaca/semantics/__init__.py
Normal file
11
osaca/semantics/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Tools for semantic analysis of parser result.
|
||||
|
||||
Only the classes below will be exported, so please add new semantic tools to __all__.
|
||||
"""
|
||||
from .hw_model import MachineModel
|
||||
from .kernel_dg import KernelDG
|
||||
from .marker_utils import reduce_to_section
|
||||
from .semantics_appender import SemanticsAppender, INSTR_FLAGS
|
||||
|
||||
__all__ = ['MachineModel', 'KernelDG', 'reduce_to_section', 'SemanticsAppender', 'INSTR_FLAGS']
|
||||
404
osaca/semantics/hw_model.py
Executable file
404
osaca/semantics/hw_model.py
Executable file
@@ -0,0 +1,404 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
from copy import deepcopy
|
||||
from itertools import product
|
||||
|
||||
import ruamel.yaml
|
||||
from ruamel.yaml.compat import StringIO
|
||||
|
||||
from osaca import __version__, utils
|
||||
from osaca.parser import ParserX86ATT
|
||||
|
||||
|
||||
class MachineModel(object):
|
||||
def __init__(self, arch=None, path_to_yaml=None, isa=None):
|
||||
if not arch and not path_to_yaml:
|
||||
if not isa:
|
||||
raise ValueError('One of arch, path_to_yaml and isa must be specified')
|
||||
self._data = {
|
||||
'osaca_version': str(__version__),
|
||||
'micro_architecture': None,
|
||||
'arch_code': None,
|
||||
'isa': isa,
|
||||
'ROB_size': None,
|
||||
'retired_uOps_per_cycle': None,
|
||||
'scheduler_size': None,
|
||||
'hidden_loads': None,
|
||||
'load_latency': {},
|
||||
'load_throughput': [
|
||||
{'base': b, 'index': i, 'offset': o, 'scale': s, 'port_pressure': []}
|
||||
for b, i, o, s in product(['gpr'], ['gpr', None], ['imd', None], [1, 8])
|
||||
],
|
||||
'ports': [],
|
||||
'port_model_scheme': None,
|
||||
'instruction_forms': [],
|
||||
}
|
||||
else:
|
||||
if arch and path_to_yaml:
|
||||
raise ValueError('Only one of arch and path_to_yaml is allowed.')
|
||||
self._path = path_to_yaml
|
||||
self._arch = arch
|
||||
yaml = self._create_yaml_object()
|
||||
if arch:
|
||||
self._arch = arch.lower()
|
||||
with open(utils.find_file(self._arch + '.yml'), 'r') as f:
|
||||
self._data = yaml.load(f)
|
||||
elif path_to_yaml:
|
||||
with open(self._path, 'r') as f:
|
||||
self._data = yaml.load(f)
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Return configuration entry."""
|
||||
return self._data[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
"""Return true if configuration key is present."""
|
||||
return key in self._data
|
||||
|
||||
######################################################
|
||||
|
||||
def get_instruction(self, name, operands):
|
||||
"""Find and return instruction data from name and operands."""
|
||||
if name is None:
|
||||
return None
|
||||
try:
|
||||
return next(
|
||||
instruction_form
|
||||
for instruction_form in self._data['instruction_forms']
|
||||
if instruction_form['name'].upper() == name.upper()
|
||||
and self._match_operands(instruction_form['operands'], operands)
|
||||
)
|
||||
except StopIteration:
|
||||
return None
|
||||
except TypeError as e:
|
||||
print('\nname: {}\noperands: {}'.format(name, operands))
|
||||
raise TypeError from e
|
||||
|
||||
def average_port_pressure(self, port_pressure):
|
||||
"""Construct average port pressure list from instruction data."""
|
||||
port_list = self._data['ports']
|
||||
average_pressure = [0.0] * len(port_list)
|
||||
for cycles, ports in port_pressure:
|
||||
for p in ports:
|
||||
average_pressure[port_list.index(p)] += cycles / len(ports)
|
||||
return average_pressure
|
||||
|
||||
def set_instruction(
|
||||
self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None
|
||||
):
|
||||
"""Import instruction form information."""
|
||||
# If it already exists. Overwrite information.
|
||||
instr_data = self.get_instruction(name, operands)
|
||||
if instr_data is None:
|
||||
instr_data = {}
|
||||
self._data['instruction_forms'].append(instr_data)
|
||||
|
||||
instr_data['name'] = name
|
||||
instr_data['operands'] = operands
|
||||
instr_data['latency'] = latency
|
||||
instr_data['port_pressure'] = port_pressure
|
||||
instr_data['throughput'] = throughput
|
||||
instr_data['uops'] = uops
|
||||
|
||||
def set_instruction_entry(self, entry):
|
||||
self.set_instruction(
|
||||
entry['name'],
|
||||
entry['operands'] if 'operands' in entry else None,
|
||||
entry['latency'] if 'latency' in entry else None,
|
||||
entry['port_pressure'] if 'port_pressure' in entry else None,
|
||||
entry['throughput'] if 'throughput' in entry else None,
|
||||
entry['uops'] if 'uops' in entry else None,
|
||||
)
|
||||
|
||||
def add_port(self, port):
|
||||
if port not in self._data['ports']:
|
||||
self._data['ports'].append(port)
|
||||
|
||||
def get_ISA(self):
|
||||
return self._data['isa'].lower()
|
||||
|
||||
def get_arch(self):
|
||||
return self._data['arch_code'].lower()
|
||||
|
||||
def get_ports(self):
|
||||
return self._data['ports']
|
||||
|
||||
def has_hidden_loads(self):
|
||||
if 'hidden_loads' in self._data:
|
||||
return self._data['hidden_loads']
|
||||
return False
|
||||
|
||||
def get_load_latency(self, reg_type):
|
||||
return self._data['load_latency'][reg_type]
|
||||
|
||||
def get_load_throughput(self, memory):
|
||||
ld_tp = [m for m in self._data['load_throughput'] if self._match_mem_entries(memory, m)]
|
||||
if len(ld_tp) > 0:
|
||||
return ld_tp[0]['port_pressure']
|
||||
return None
|
||||
|
||||
def _match_mem_entries(self, mem, i_mem):
|
||||
if self._data['isa'].lower() == 'aarch64':
|
||||
return self._is_AArch64_mem_type(i_mem, mem)
|
||||
if self._data['isa'].lower() == 'x86':
|
||||
return self._is_x86_mem_type(i_mem, mem)
|
||||
|
||||
def get_data_ports(self):
|
||||
data_port = re.compile(r'^[0-9]+D$')
|
||||
data_ports = [x for x in filter(data_port.match, self._data['ports'])]
|
||||
return data_ports
|
||||
|
||||
@staticmethod
|
||||
def get_full_instruction_name(instruction_form):
|
||||
operands = []
|
||||
for op in instruction_form['operands']:
|
||||
op_attrs = [
|
||||
y + ':' + str(op[y])
|
||||
for y in list(filter(lambda x: True if x != 'class' else False, op))
|
||||
]
|
||||
operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
|
||||
return '{} {}'.format(instruction_form['name'], ','.join(operands))
|
||||
|
||||
@staticmethod
|
||||
def get_isa_for_arch(arch):
|
||||
arch_dict = {
|
||||
'tx2': 'aarch64',
|
||||
'zen1': 'x86',
|
||||
'snb': 'x86',
|
||||
'ivb': 'x86',
|
||||
'hsw': 'x86',
|
||||
'bdw': 'x86',
|
||||
'skl': 'x86',
|
||||
'skx': 'x86',
|
||||
'csx': 'x86',
|
||||
'wsm': 'x86',
|
||||
'nhm': 'x86',
|
||||
'kbl': 'x86',
|
||||
'cnl': 'x86',
|
||||
'cfl': 'x86',
|
||||
'zen+': 'x86',
|
||||
}
|
||||
arch = arch.lower()
|
||||
if arch in arch_dict:
|
||||
return arch_dict[arch].lower()
|
||||
else:
|
||||
raise ValueError("Unknown architecture {!r}.".format(arch))
|
||||
|
||||
def dump(self, stream=None):
|
||||
# Replace instruction form's port_pressure with styled version for RoundtripDumper
|
||||
formatted_instruction_forms = deepcopy(self._data['instruction_forms'])
|
||||
for instruction_form in formatted_instruction_forms:
|
||||
cs = ruamel.yaml.comments.CommentedSeq(instruction_form['port_pressure'])
|
||||
cs.fa.set_flow_style()
|
||||
instruction_form['port_pressure'] = cs
|
||||
|
||||
# Create YAML object
|
||||
yaml = self._create_yaml_object()
|
||||
if not stream:
|
||||
# Create stream object to output string
|
||||
stream = StringIO()
|
||||
yaml.dump({k: v for k, v in self._data.items() if k != 'instruction_forms'}, stream)
|
||||
yaml.dump({'instruction_forms': formatted_instruction_forms}, stream)
|
||||
return stream.getvalue()
|
||||
else:
|
||||
# Write in given stream
|
||||
yaml.dump({k: v for k, v in self._data.items() if k != 'instruction_forms'}, stream)
|
||||
yaml.dump({'instruction_forms': formatted_instruction_forms}, stream)
|
||||
|
||||
######################################################
|
||||
|
||||
def _check_for_duplicate(self, name, operands):
|
||||
matches = [
|
||||
instruction_form
|
||||
for instruction_form in self._data['instruction_forms']
|
||||
if instruction_form['name'].lower() == name.lower()
|
||||
and self._match_operands(instruction_form['operands'], operands)
|
||||
]
|
||||
if len(matches) > 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _match_operands(self, i_operands, operands):
|
||||
if isinstance(operands, dict):
|
||||
operands = operands['operand_list']
|
||||
operands_ok = True
|
||||
if len(operands) != len(i_operands):
|
||||
return False
|
||||
for idx, operand in enumerate(operands):
|
||||
i_operand = i_operands[idx]
|
||||
operands_ok = operands_ok and self._check_operands(i_operand, operand)
|
||||
if operands_ok:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def _check_operands(self, i_operands, operands):
|
||||
if self._data['isa'].lower() == 'aarch64':
|
||||
return self._check_AArch64_operands(i_operands, operands)
|
||||
if self._data['isa'].lower() == 'x86':
|
||||
return self._check_x86_operands(i_operands, operands)
|
||||
|
||||
def _check_AArch64_operands(self, i_operand, operand):
|
||||
if 'class' in operand:
|
||||
# compare two DB entries
|
||||
return self._compare_db_entries(i_operand, operand)
|
||||
# register
|
||||
if 'register' in operand:
|
||||
if i_operand['class'] != 'register':
|
||||
return False
|
||||
return self._is_AArch64_reg_type(i_operand, operand['register'])
|
||||
# memory
|
||||
if 'memory' in operand:
|
||||
if i_operand['class'] != 'memory':
|
||||
return False
|
||||
return self._is_AArch64_mem_type(i_operand, operand['memory'])
|
||||
# immediate
|
||||
if 'value' in operand or ('immediate' in operand and 'value' in operand['immediate']):
|
||||
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'int'
|
||||
if 'float' in operand or ('immediate' in operand and 'float' in operand['immediate']):
|
||||
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'float'
|
||||
if 'double' in operand or ('immediate' in operand and 'double' in operand['immediate']):
|
||||
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'double'
|
||||
if 'identifier' in operand or (
|
||||
'immediate' in operand and 'identifier' in operand['immediate']
|
||||
):
|
||||
return i_operand['class'] == 'identifier'
|
||||
# prefetch option
|
||||
if 'prfop' in operand:
|
||||
return i_operand['class'] == 'prfop'
|
||||
# no match
|
||||
return False
|
||||
|
||||
def _check_x86_operands(self, i_operand, operand):
|
||||
if 'class' in operand:
|
||||
# compare two DB entries
|
||||
return self._compare_db_entries(i_operand, operand)
|
||||
# register
|
||||
if 'register' in operand:
|
||||
if i_operand['class'] != 'register':
|
||||
return False
|
||||
return self._is_x86_reg_type(i_operand['name'], operand['register'])
|
||||
# memory
|
||||
if 'memory' in operand:
|
||||
if i_operand['class'] != 'memory':
|
||||
return False
|
||||
return self._is_x86_mem_type(i_operand, operand['memory'])
|
||||
# immediate
|
||||
if 'immediate' in operand or 'value' in operand:
|
||||
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'int'
|
||||
# identifier (e.g., labels)
|
||||
if 'identifier' in operand:
|
||||
return i_operand['class'] == 'identifier'
|
||||
|
||||
def _compare_db_entries(self, operand_1, operand_2):
|
||||
operand_attributes = list(
|
||||
filter(lambda x: True if x != 'source' and x != 'destination' else False, operand_1)
|
||||
)
|
||||
for key in operand_attributes:
|
||||
try:
|
||||
if operand_1[key] != operand_2[key]:
|
||||
return False
|
||||
except KeyError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _is_AArch64_reg_type(self, i_reg, reg):
|
||||
if reg['prefix'] != i_reg['prefix']:
|
||||
return False
|
||||
if 'shape' in reg:
|
||||
if 'shape' in i_reg and reg['shape'] == i_reg['shape']:
|
||||
return True
|
||||
return False
|
||||
return True
|
||||
|
||||
def _is_x86_reg_type(self, i_reg_name, reg):
|
||||
# differentiate between vector registers (xmm, ymm, zmm) and others (gpr)
|
||||
parser_x86 = ParserX86ATT()
|
||||
if parser_x86.is_vector_register(reg):
|
||||
if reg['name'][0:3] == i_reg_name:
|
||||
return True
|
||||
else:
|
||||
if i_reg_name == 'gpr':
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_AArch64_mem_type(self, i_mem, mem):
|
||||
if (
|
||||
# check base
|
||||
mem['base']['prefix'] == i_mem['base']
|
||||
# check offset
|
||||
and (
|
||||
mem['offset'] == i_mem['offset']
|
||||
or (
|
||||
mem['offset'] is not None
|
||||
and 'identifier' in mem['offset']
|
||||
and i_mem['offset'] == 'identifier'
|
||||
)
|
||||
or (
|
||||
mem['offset'] is not None
|
||||
and 'value' in mem['offset']
|
||||
and i_mem['offset'] == 'imd'
|
||||
)
|
||||
)
|
||||
# check index
|
||||
and (
|
||||
mem['index'] == i_mem['index']
|
||||
or (
|
||||
mem['index'] is not None
|
||||
and 'prefix' in mem['index']
|
||||
and mem['index']['prefix'] == i_mem['index']
|
||||
)
|
||||
)
|
||||
and (mem['scale'] == i_mem['scale'] or (mem['scale'] != 1 and i_mem['scale'] != 1))
|
||||
and (('pre_indexed' in mem) == (i_mem['pre-indexed']))
|
||||
and (('post_indexed' in mem) == (i_mem['post-indexed']))
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_x86_mem_type(self, i_mem, mem):
|
||||
if (
|
||||
# check base
|
||||
self._is_x86_reg_type(i_mem['base'], mem['base'])
|
||||
# check offset
|
||||
and (
|
||||
mem['offset'] == i_mem['offset']
|
||||
or (
|
||||
mem['offset'] is not None
|
||||
and 'identifier' in mem['offset']
|
||||
and i_mem['offset'] == 'identifier'
|
||||
)
|
||||
or (
|
||||
mem['offset'] is not None
|
||||
and 'value' in mem['offset']
|
||||
and (
|
||||
i_mem['offset'] == 'imd'
|
||||
or (i_mem['offset'] is None and mem['offset']['value'] == '0')
|
||||
)
|
||||
)
|
||||
)
|
||||
# check index
|
||||
and (
|
||||
mem['index'] == i_mem['index']
|
||||
or (
|
||||
mem['index'] is not None
|
||||
and 'name' in mem['index']
|
||||
and self._is_x86_reg_type(i_mem['index'], mem['index'])
|
||||
)
|
||||
)
|
||||
and (mem['scale'] == i_mem['scale'] or (mem['scale'] != 1 and i_mem['scale'] != 1))
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _create_yaml_object(self):
|
||||
yaml_obj = ruamel.yaml.YAML()
|
||||
yaml_obj.representer.add_representer(type(None), self.__represent_none)
|
||||
yaml_obj.default_flow_style = None
|
||||
yaml_obj.width = 120
|
||||
yaml_obj.representer.ignore_aliases = lambda *args: True
|
||||
return yaml_obj
|
||||
|
||||
def __represent_none(self, yaml_obj, data):
|
||||
return yaml_obj.represent_scalar(u'tag:yaml.org,2002:null', u'~')
|
||||
335
osaca/semantics/kernel_dg.py
Executable file
335
osaca/semantics/kernel_dg.py
Executable file
@@ -0,0 +1,335 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import copy
|
||||
from itertools import chain, product
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from osaca.parser import AttrDict
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
class KernelDG(nx.DiGraph):
|
||||
def __init__(self, parsed_kernel, parser, hw_model: MachineModel):
|
||||
self.kernel = parsed_kernel
|
||||
self.parser = parser
|
||||
self.model = hw_model
|
||||
self.dg = self.create_DG(self.kernel)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
|
||||
|
||||
def create_DG(self, kernel):
|
||||
# 1. go through kernel instruction forms and add them as node attribute
|
||||
# 2. find edges (to dependend further instruction)
|
||||
# 3. get LT value and set as edge weight
|
||||
dg = nx.DiGraph()
|
||||
for i, instruction_form in enumerate(kernel):
|
||||
dg.add_node(instruction_form['line_number'])
|
||||
dg.nodes[instruction_form['line_number']]['instruction_form'] = instruction_form
|
||||
# add load as separate node if existent
|
||||
# TODO use INSTR_FLAGS here
|
||||
if (
|
||||
'performs_load' in instruction_form['flags']
|
||||
and 'is_load_instruction' not in instruction_form['flags']
|
||||
):
|
||||
# add new node
|
||||
dg.add_node(instruction_form['line_number'] + 0.1)
|
||||
dg.nodes[instruction_form['line_number'] + 0.1][
|
||||
'instruction_form'
|
||||
] = instruction_form
|
||||
# and set LD latency as edge weight
|
||||
dg.add_edge(
|
||||
instruction_form['line_number'] + 0.1,
|
||||
instruction_form['line_number'],
|
||||
latency=instruction_form['latency'] - instruction_form['latency_wo_load'],
|
||||
)
|
||||
for dep in self.find_depending(instruction_form, kernel[i + 1:]):
|
||||
edge_weight = (
|
||||
instruction_form['latency']
|
||||
if 'latency_wo_load' not in instruction_form
|
||||
else instruction_form['latency_wo_load']
|
||||
)
|
||||
dg.add_edge(
|
||||
instruction_form['line_number'], dep['line_number'], latency=edge_weight
|
||||
)
|
||||
dg.nodes[dep['line_number']]['instruction_form'] = dep
|
||||
return dg
|
||||
|
||||
def check_for_loopcarried_dep(self, kernel):
|
||||
multiplier = len(kernel) + 1
|
||||
# increase line number for second kernel loop
|
||||
kernel_length = len(kernel)
|
||||
first_line_no = kernel[0].line_number
|
||||
kernel_copy = [AttrDict.convert_dict(d) for d in copy.deepcopy(kernel)]
|
||||
tmp_kernel = kernel + kernel_copy
|
||||
for i, instruction_form in enumerate(tmp_kernel[kernel_length:]):
|
||||
tmp_kernel[i + kernel_length].line_number = instruction_form.line_number * multiplier
|
||||
# get dependency graph
|
||||
dg = self.create_DG(tmp_kernel)
|
||||
|
||||
# build cyclic loop-carried dependencies
|
||||
loopcarried_deps = [
|
||||
(node, list(nx.algorithms.simple_paths.all_simple_paths(dg, node, node * multiplier)))
|
||||
for node in dg.nodes
|
||||
if node < first_line_no * multiplier and node == int(node)
|
||||
]
|
||||
# filter others and create graph
|
||||
loopcarried_deps = list(
|
||||
chain.from_iterable(
|
||||
[list(product([dep_chain[0]], dep_chain[1])) for dep_chain in loopcarried_deps]
|
||||
)
|
||||
)
|
||||
# adjust line numbers, filter duplicates
|
||||
# and add reference to kernel again
|
||||
loopcarried_deps_dict = {}
|
||||
tmp_list = []
|
||||
for i, dep in enumerate(loopcarried_deps):
|
||||
nodes = [int(n / multiplier) for n in dep[1] if n >= first_line_no * multiplier]
|
||||
loopcarried_deps[i] = (dep[0], nodes)
|
||||
for dep in loopcarried_deps:
|
||||
is_subset = False
|
||||
for other_dep in [x for x in loopcarried_deps if x[0] != dep[0]]:
|
||||
if set(dep[1]).issubset(set(other_dep[1])) and dep[0] in other_dep[1]:
|
||||
is_subset = True
|
||||
if not is_subset:
|
||||
tmp_list.append(dep)
|
||||
loopcarried_deps = tmp_list
|
||||
for dep in loopcarried_deps:
|
||||
nodes = []
|
||||
for n in dep[1]:
|
||||
self._get_node_by_lineno(int(n))['latency_lcd'] = 0
|
||||
for n in dep[1]:
|
||||
node = self._get_node_by_lineno(int(n))
|
||||
if int(n) != n and int(n) in dep[1]:
|
||||
node['latency_lcd'] += node['latency'] - node['latency_wo_load']
|
||||
else:
|
||||
node['latency_lcd'] += node['latency_wo_load']
|
||||
nodes.append(node)
|
||||
loopcarried_deps_dict[dep[0]] = {
|
||||
'root': self._get_node_by_lineno(dep[0]),
|
||||
'dependencies': nodes,
|
||||
}
|
||||
|
||||
return loopcarried_deps_dict
|
||||
|
||||
def _get_node_by_lineno(self, lineno):
|
||||
return [instr for instr in self.kernel if instr.line_number == lineno][0]
|
||||
|
||||
def get_critical_path(self):
|
||||
if nx.algorithms.dag.is_directed_acyclic_graph(self.dg):
|
||||
longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight='latency')
|
||||
for line_number in longest_path:
|
||||
self._get_node_by_lineno(int(line_number))['latency_cp'] = 0
|
||||
# add LD latency to instruction
|
||||
for line_number in longest_path:
|
||||
node = self._get_node_by_lineno(int(line_number))
|
||||
if line_number != int(line_number) and int(line_number) in longest_path:
|
||||
node['latency_cp'] += self.dg.edges[(line_number, int(line_number))]['latency']
|
||||
elif (
|
||||
line_number == int(line_number)
|
||||
and 'mem_dep' in node
|
||||
and self.dg.has_edge(node['mem_dep']['line_number'], line_number)
|
||||
):
|
||||
node['latency_cp'] += node['latency']
|
||||
else:
|
||||
node['latency_cp'] += (
|
||||
node['latency']
|
||||
if 'latency_wo_load' not in node
|
||||
else node['latency_wo_load']
|
||||
)
|
||||
return [x for x in self.kernel if x['line_number'] in longest_path]
|
||||
else:
|
||||
# split to DAG
|
||||
raise NotImplementedError('Kernel is cyclic.')
|
||||
|
||||
def get_loopcarried_dependencies(self):
|
||||
if nx.algorithms.dag.is_directed_acyclic_graph(self.dg):
|
||||
return self.loopcarried_deps
|
||||
else:
|
||||
# split to DAG
|
||||
raise NotImplementedError('Kernel is cyclic.')
|
||||
|
||||
def find_depending(self, instruction_form, kernel, include_write=False):
|
||||
if instruction_form.operands is None:
|
||||
return
|
||||
for dst in instruction_form.operands.destination + instruction_form.operands.src_dst:
|
||||
if 'register' in dst:
|
||||
# Check for read of register until overwrite
|
||||
for instr_form in kernel:
|
||||
if self.is_read(dst.register, instr_form):
|
||||
yield instr_form
|
||||
if self.is_written(dst.register, instr_form):
|
||||
# operand in src_dst list
|
||||
if include_write:
|
||||
yield instr_form
|
||||
break
|
||||
elif self.is_written(dst.register, instr_form):
|
||||
if include_write:
|
||||
yield instr_form
|
||||
break
|
||||
elif 'memory' in dst:
|
||||
# Check if base register is altered during memory access
|
||||
if 'pre_indexed' in dst.memory or 'post_indexed' in dst.memory:
|
||||
# Check for read of base register until overwrite
|
||||
for instr_form in kernel:
|
||||
if self.is_read(dst.memory.base, instr_form):
|
||||
instr_form['mem_dep'] = instruction_form
|
||||
yield instr_form
|
||||
if self.is_written(dst.memory.base, instr_form):
|
||||
# operand in src_dst list
|
||||
if include_write:
|
||||
instr_form['mem_dep'] = instruction_form
|
||||
yield instr_form
|
||||
break
|
||||
elif self.is_written(dst.memory.base, instr_form):
|
||||
if include_write:
|
||||
instr_form['mem_dep'] = instruction_form
|
||||
yield instr_form
|
||||
break
|
||||
|
||||
def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
|
||||
"""
|
||||
Returns iterator
|
||||
"""
|
||||
if not instr_form and not line_number:
|
||||
raise ValueError('Either instruction form or line_number required.')
|
||||
line_number = line_number if line_number else instr_form['line_number']
|
||||
if self.dg.has_node(line_number):
|
||||
return self.dg.successors(line_number)
|
||||
return iter([])
|
||||
|
||||
def is_read(self, register, instruction_form):
|
||||
is_read = False
|
||||
if instruction_form.operands is None:
|
||||
return is_read
|
||||
for src in instruction_form.operands.source + instruction_form.operands.src_dst:
|
||||
if 'register' in src:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
|
||||
if 'memory' in src:
|
||||
if src.memory.base is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
|
||||
if src.memory.index is not None:
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
|
||||
)
|
||||
# Check also if read in destination memory address
|
||||
for dst in instruction_form.operands.destination + instruction_form.operands.src_dst:
|
||||
if 'memory' in dst:
|
||||
if dst.memory.base is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
|
||||
if dst.memory.index is not None:
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
|
||||
)
|
||||
return is_read
|
||||
|
||||
def is_written(self, register, instruction_form):
|
||||
is_written = False
|
||||
if instruction_form.operands is None:
|
||||
return is_written
|
||||
for dst in instruction_form.operands.destination + instruction_form.operands.src_dst:
|
||||
if 'register' in dst:
|
||||
is_written = self.parser.is_reg_dependend_of(register, dst.register) or is_written
|
||||
if 'memory' in dst:
|
||||
if 'pre_indexed' in dst.memory or 'post_indexed' in dst.memory:
|
||||
is_written = (
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.base) or is_written
|
||||
)
|
||||
# Check also for possible pre- or post-indexing in memory addresses
|
||||
for src in instruction_form.operands.source + instruction_form.operands.src_dst:
|
||||
if 'memory' in src:
|
||||
if 'pre_indexed' in src.memory or 'post_indexed' in src.memory:
|
||||
is_written = (
|
||||
self.parser.is_reg_dependend_of(register, src.memory.base) or is_written
|
||||
)
|
||||
return is_written
|
||||
|
||||
def export_graph(self, filepath=None):
|
||||
graph = copy.deepcopy(self.dg)
|
||||
cp = self.get_critical_path()
|
||||
cp_line_numbers = [x['line_number'] for x in cp]
|
||||
lcd = self.get_loopcarried_dependencies()
|
||||
lcd_line_numbers = {}
|
||||
for dep in lcd:
|
||||
lcd_line_numbers[dep] = [x['line_number'] for x in lcd[dep]['dependencies']]
|
||||
# add color scheme
|
||||
graph.graph['node'] = {'colorscheme': 'accent8'}
|
||||
graph.graph['edge'] = {'colorscheme': 'accent8'}
|
||||
|
||||
# create LCD edges
|
||||
for dep in lcd_line_numbers:
|
||||
min_line_number = min(lcd_line_numbers[dep])
|
||||
max_line_number = max(lcd_line_numbers[dep])
|
||||
graph.add_edge(max_line_number, min_line_number)
|
||||
graph.edges[max_line_number, min_line_number]['latency'] = [
|
||||
x for x in lcd[dep]['dependencies'] if x['line_number'] == max_line_number
|
||||
][0]['latency_lcd']
|
||||
|
||||
# add label to edges
|
||||
for e in graph.edges:
|
||||
graph.edges[e]['label'] = graph.edges[e]['latency']
|
||||
|
||||
# add CP values to graph
|
||||
for n in cp:
|
||||
graph.nodes[n['line_number']]['instruction_form']['latency_cp'] = n['latency_cp']
|
||||
|
||||
# color CP and LCD
|
||||
for n in graph.nodes:
|
||||
if n in cp_line_numbers:
|
||||
# graph.nodes[n]['color'] = 1
|
||||
graph.nodes[n]['style'] = 'bold'
|
||||
graph.nodes[n]['penwidth'] = 4
|
||||
for col, dep in enumerate(lcd):
|
||||
if n in lcd_line_numbers[dep]:
|
||||
if 'style' not in graph.nodes[n]:
|
||||
graph.nodes[n]['style'] = 'filled'
|
||||
else:
|
||||
graph.nodes[n]['style'] += ',filled'
|
||||
graph.nodes[n]['fillcolor'] = 2 + col
|
||||
|
||||
# color edges
|
||||
for e in graph.edges:
|
||||
if (
|
||||
graph.nodes[e[0]]['instruction_form']['line_number'] in cp_line_numbers
|
||||
and graph.nodes[e[1]]['instruction_form']['line_number'] in cp_line_numbers
|
||||
and e[0] < e[1]
|
||||
):
|
||||
bold_edge = True
|
||||
for i in range(e[0] + 1, e[1]):
|
||||
if i in cp_line_numbers:
|
||||
bold_edge = False
|
||||
if bold_edge:
|
||||
graph.edges[e]['style'] = 'bold'
|
||||
graph.edges[e]['penwidth'] = 3
|
||||
for dep in lcd_line_numbers:
|
||||
if (
|
||||
graph.nodes[e[0]]['instruction_form']['line_number'] in lcd_line_numbers[dep]
|
||||
and graph.nodes[e[1]]['instruction_form']['line_number']
|
||||
in lcd_line_numbers[dep]
|
||||
):
|
||||
graph.edges[e]['color'] = graph.nodes[e[1]]['fillcolor']
|
||||
|
||||
# rename node from [idx] to [idx mnemonic] and add shape
|
||||
mapping = {}
|
||||
for n in graph.nodes:
|
||||
if int(n) != n:
|
||||
mapping[n] = '{}: LOAD'.format(int(n))
|
||||
graph.nodes[n]['fontname'] = 'italic'
|
||||
graph.nodes[n]['fontsize'] = 11.0
|
||||
else:
|
||||
node = graph.nodes[n]['instruction_form']
|
||||
if node['instruction'] is not None:
|
||||
mapping[n] = '{}: {}'.format(n, node['instruction'])
|
||||
else:
|
||||
label = 'label' if node['label'] else None
|
||||
label = 'directive' if node['directive'] else label
|
||||
label = 'comment' if node['comment'] and label is None else label
|
||||
mapping[n] = '{}: {}'.format(n, label)
|
||||
graph.nodes[n]['fontname'] = 'italic'
|
||||
graph.nodes[n]['fontsize'] = 11.0
|
||||
graph.nodes[n]['shape'] = 'rectangle'
|
||||
|
||||
nx.relabel.relabel_nodes(graph, mapping, copy=False)
|
||||
if filepath:
|
||||
nx.drawing.nx_agraph.write_dot(graph, filepath)
|
||||
else:
|
||||
nx.drawing.nx_agraph.write_dot(graph, 'osaca_dg.dot')
|
||||
85
osaca/semantics/marker_utils.py
Executable file
85
osaca/semantics/marker_utils.py
Executable file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from osaca.parser import ParserAArch64v81, ParserX86ATT
|
||||
|
||||
|
||||
def reduce_to_section(kernel, isa):
|
||||
isa = isa.lower()
|
||||
if isa == 'x86':
|
||||
start, end = find_marked_kernel_x86ATT(kernel)
|
||||
elif isa == 'aarch64':
|
||||
start, end = find_marked_kernel_AArch64(kernel)
|
||||
else:
|
||||
raise ValueError('ISA not supported.')
|
||||
if start == -1:
|
||||
raise LookupError('Could not find START MARKER. Make sure it is inserted!')
|
||||
if end == -1:
|
||||
raise LookupError('Could not find END MARKER. Make sure it is inserted!')
|
||||
return kernel[start:end]
|
||||
|
||||
|
||||
def find_marked_kernel_AArch64(lines):
|
||||
nop_bytes = ['213', '3', '32', '31']
|
||||
return find_marked_kernel(
|
||||
lines, ParserAArch64v81(), ['mov'], 'x1', [111, 222], nop_bytes, reverse=True
|
||||
)
|
||||
|
||||
|
||||
def find_marked_kernel_x86ATT(lines):
|
||||
nop_bytes = ['100', '103', '144']
|
||||
return find_marked_kernel(lines, ParserX86ATT(), ['mov', 'movl'], 'ebx', [111, 222], nop_bytes)
|
||||
|
||||
|
||||
def find_marked_kernel(lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes, reverse=False):
|
||||
index_start = -1
|
||||
index_end = -1
|
||||
for i, line in enumerate(lines):
|
||||
try:
|
||||
if line.instruction in mov_instr and lines[i + 1].directive is not None:
|
||||
source = line.operands[0 if not reverse else 1]
|
||||
destination = line.operands[1 if not reverse else 0]
|
||||
# instruction pair matches, check for operands
|
||||
if (
|
||||
'immediate' in source
|
||||
and parser.normalize_imd(source.immediate) == mov_vals[0]
|
||||
and 'register' in destination
|
||||
and parser.get_full_reg_name(destination.register) == mov_reg
|
||||
):
|
||||
# operands of first instruction match start, check for second one
|
||||
match, line_count = match_bytes(lines, i + 1, nop_bytes)
|
||||
if match:
|
||||
# return first line after the marker
|
||||
index_start = i + 1 + line_count
|
||||
elif (
|
||||
'immediate' in source
|
||||
and parser.normalize_imd(source.immediate) == mov_vals[1]
|
||||
and 'register' in destination
|
||||
and parser.get_full_reg_name(destination.register) == mov_reg
|
||||
):
|
||||
# operand of first instruction match end, check for second one
|
||||
match, line_count = match_bytes(lines, i + 1, nop_bytes)
|
||||
if match:
|
||||
# return line of the marker
|
||||
index_end = i
|
||||
except TypeError:
|
||||
print(i, line)
|
||||
if index_start != -1 and index_end != -1:
|
||||
break
|
||||
return index_start, index_end
|
||||
|
||||
|
||||
def match_bytes(lines, index, byte_list):
|
||||
# either all bytes are in one line or in separate ones
|
||||
extracted_bytes = []
|
||||
line_count = 0
|
||||
while (
|
||||
index < len(lines)
|
||||
and lines[index].directive is not None
|
||||
and lines[index].directive.name == 'byte'
|
||||
):
|
||||
line_count += 1
|
||||
extracted_bytes += lines[index].directive.parameters
|
||||
index += 1
|
||||
if extracted_bytes[0:len(byte_list)] == byte_list:
|
||||
return True, line_count
|
||||
return False, -1
|
||||
348
osaca/semantics/semantics_appender.py
Executable file
348
osaca/semantics/semantics_appender.py
Executable file
@@ -0,0 +1,348 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import warnings
|
||||
from functools import reduce
|
||||
|
||||
from osaca import utils
|
||||
from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
class INSTR_FLAGS:
|
||||
"""
|
||||
Flags used for unknown or special instructions
|
||||
"""
|
||||
|
||||
LD = 'is_load_instruction'
|
||||
TP_UNKWN = 'tp_unknown'
|
||||
LT_UNKWN = 'lt_unknown'
|
||||
NOT_BOUND = 'not_bound'
|
||||
HIDDEN_LD = 'hidden_load'
|
||||
HAS_LD = 'performs_load'
|
||||
HAS_ST = 'performs_store'
|
||||
|
||||
|
||||
class SemanticsAppender(object):
|
||||
def __init__(self, machine_model: MachineModel, path_to_yaml=None):
|
||||
self._machine_model = machine_model
|
||||
self._isa = machine_model.get_ISA().lower()
|
||||
path = utils.find_file('isa/' + self._isa + '.yml')
|
||||
self._isa_model = MachineModel(path_to_yaml=path)
|
||||
if self._isa == 'x86':
|
||||
self._parser = ParserX86ATT()
|
||||
elif self._isa == 'aarch64':
|
||||
self._parser = ParserAArch64v81()
|
||||
|
||||
# SUMMARY FUNCTION
|
||||
def add_semantics(self, kernel):
|
||||
for instruction_form in kernel:
|
||||
self.assign_src_dst(instruction_form)
|
||||
self.assign_tp_lt(instruction_form)
|
||||
if self._machine_model.has_hidden_loads():
|
||||
self.set_hidden_loads(kernel)
|
||||
|
||||
def set_hidden_loads(self, kernel):
|
||||
loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr['flags']]
|
||||
stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr['flags']]
|
||||
# Filter instructions including load and store
|
||||
load_ids = [instr['line_number'] for instr in loads]
|
||||
store_ids = [instr['line_number'] for instr in stores]
|
||||
shared_ldst = list(set(load_ids).intersection(set(store_ids)))
|
||||
loads = [instr for instr in loads if instr['line_number'] not in shared_ldst]
|
||||
stores = [instr for instr in stores if instr['line_number'] not in shared_ldst]
|
||||
|
||||
if len(stores) == 0 or len(loads) == 0:
|
||||
# nothing to do
|
||||
return
|
||||
if len(loads) <= len(stores):
|
||||
# Hide all loads
|
||||
for load in loads:
|
||||
load['flags'] += [INSTR_FLAGS.HIDDEN_LD]
|
||||
load['port_pressure'] = self._nullify_data_ports(load['port_pressure'])
|
||||
else:
|
||||
for store in stores:
|
||||
# Get 'closest' load instruction
|
||||
min_distance_load = min(
|
||||
[
|
||||
(
|
||||
abs(load_instr['line_number'] - store['line_number']),
|
||||
load_instr['line_number'],
|
||||
)
|
||||
for load_instr in loads
|
||||
if INSTR_FLAGS.HIDDEN_LD not in load_instr['flags']
|
||||
]
|
||||
)
|
||||
load = [instr for instr in kernel if instr['line_number'] == min_distance_load[1]][
|
||||
0
|
||||
]
|
||||
# Hide load
|
||||
load['flags'] += [INSTR_FLAGS.HIDDEN_LD]
|
||||
load['port_pressure'] = self._nullify_data_ports(load['port_pressure'])
|
||||
|
||||
# get parser result and assign throughput and latency value to instruction form
|
||||
# mark instruction form with semantic flags
|
||||
def assign_tp_lt(self, instruction_form):
|
||||
flags = []
|
||||
port_number = len(self._machine_model['ports'])
|
||||
if instruction_form['instruction'] is None:
|
||||
# No instruction (label, comment, ...) --> ignore
|
||||
throughput = 0.0
|
||||
latency = 0.0
|
||||
latency_wo_load = latency
|
||||
instruction_form['port_pressure'] = [0.0 for i in range(port_number)]
|
||||
else:
|
||||
instruction_data = self._machine_model.get_instruction(
|
||||
instruction_form['instruction'], instruction_form['operands']
|
||||
)
|
||||
if instruction_data:
|
||||
# instruction form in DB
|
||||
throughput = instruction_data['throughput']
|
||||
port_pressure = self._machine_model.average_port_pressure(
|
||||
instruction_data['port_pressure']
|
||||
)
|
||||
try:
|
||||
assert isinstance(port_pressure, list)
|
||||
assert len(port_pressure) == port_number
|
||||
instruction_form['port_pressure'] = port_pressure
|
||||
if sum(port_pressure) == 0 and throughput is not None:
|
||||
# port pressure on all ports 0 --> not bound to a port
|
||||
flags.append(INSTR_FLAGS.NOT_BOUND)
|
||||
except AssertionError:
|
||||
warnings.warn(
|
||||
'Port pressure could not be imported correctly from database. '
|
||||
+ 'Please check entry for:\n {}'.format(instruction_form)
|
||||
)
|
||||
instruction_form['port_pressure'] = [0.0 for i in range(port_number)]
|
||||
flags.append(INSTR_FLAGS.TP_UNKWN)
|
||||
if throughput is None:
|
||||
# assume 0 cy and mark as unknown
|
||||
throughput = 0.0
|
||||
flags.append(INSTR_FLAGS.TP_UNKWN)
|
||||
latency = instruction_data['latency']
|
||||
latency_wo_load = latency
|
||||
if latency is None:
|
||||
# assume 0 cy and mark as unknown
|
||||
latency = 0.0
|
||||
latency_wo_load = latency
|
||||
flags.append(INSTR_FLAGS.LT_UNKWN)
|
||||
if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
|
||||
flags.append(INSTR_FLAGS.LD)
|
||||
else:
|
||||
# instruction could not be found in DB
|
||||
assign_unknown = True
|
||||
# check for equivalent register-operands DB entry if LD
|
||||
if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
|
||||
# --> combine LD and reg form of instruction form
|
||||
operands = self.substitute_mem_address(instruction_form['operands'])
|
||||
instruction_data_reg = self._machine_model.get_instruction(
|
||||
instruction_form['instruction'], operands
|
||||
)
|
||||
if instruction_data_reg:
|
||||
assign_unknown = False
|
||||
reg_types = [
|
||||
self._parser.get_reg_type(op['register'])
|
||||
for op in operands['operand_list']
|
||||
if 'register' in op
|
||||
]
|
||||
load_port_pressure = self._machine_model.average_port_pressure(
|
||||
self._machine_model.get_load_throughput(
|
||||
[
|
||||
x['memory']
|
||||
for x in instruction_form['operands']['source']
|
||||
if 'memory' in x
|
||||
][0]
|
||||
)
|
||||
)
|
||||
if 'load_throughput_multiplier' in self._machine_model:
|
||||
multiplier = self._machine_model['load_throughput_multiplier'][
|
||||
reg_types[0]
|
||||
]
|
||||
load_port_pressure = [pp * multiplier for pp in load_port_pressure]
|
||||
throughput = max(
|
||||
max(load_port_pressure), instruction_data_reg['throughput']
|
||||
)
|
||||
latency = (
|
||||
self._machine_model.get_load_latency(reg_types[0])
|
||||
+ instruction_data_reg['latency']
|
||||
)
|
||||
latency_wo_load = instruction_data_reg['latency']
|
||||
instruction_form['port_pressure'] = [
|
||||
sum(x)
|
||||
for x in zip(
|
||||
load_port_pressure,
|
||||
self._machine_model.average_port_pressure(
|
||||
instruction_data_reg['port_pressure']
|
||||
),
|
||||
)
|
||||
]
|
||||
if assign_unknown:
|
||||
# --> mark as unknown and assume 0 cy for latency/throughput
|
||||
throughput = 0.0
|
||||
latency = 0.0
|
||||
latency_wo_load = latency
|
||||
instruction_form['port_pressure'] = [0.0 for i in range(port_number)]
|
||||
flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
|
||||
# flatten flag list
|
||||
flags = list(set(flags))
|
||||
if 'flags' not in instruction_form:
|
||||
instruction_form['flags'] = flags
|
||||
else:
|
||||
instruction_form['flags'] += flags
|
||||
instruction_form['throughput'] = throughput
|
||||
instruction_form['latency'] = latency
|
||||
instruction_form['latency_wo_load'] = latency_wo_load
|
||||
# for later CP and loop-carried dependency analysis
|
||||
instruction_form['latency_cp'] = 0
|
||||
instruction_form['latency_lcd'] = 0
|
||||
|
||||
def substitute_mem_address(self, operands):
|
||||
regs = [op for op in operands['operand_list'] if 'register' in op]
|
||||
if (
|
||||
len(regs) > 1
|
||||
and len(set([self._parser.get_reg_type(x['register']) for x in regs])) != 1
|
||||
):
|
||||
warnings.warn('Load type could not be identified clearly.')
|
||||
reg_type = self._parser.get_reg_type(regs[0]['register'])
|
||||
|
||||
source = [
|
||||
operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type)
|
||||
for operand in operands['source']
|
||||
]
|
||||
destination = [
|
||||
operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type)
|
||||
for operand in operands['destination']
|
||||
]
|
||||
src_dst = [
|
||||
operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type)
|
||||
for operand in operands['destination']
|
||||
]
|
||||
operand_list = [
|
||||
operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type)
|
||||
for operand in operands['operand_list']
|
||||
]
|
||||
return {
|
||||
'source': source,
|
||||
'destination': destination,
|
||||
'src_dst': src_dst,
|
||||
'operand_list': operand_list,
|
||||
}
|
||||
|
||||
def convert_mem_to_reg(self, memory, reg_type, reg_id='0'):
|
||||
if self._isa == 'x86':
|
||||
register = {'register': {'name': reg_type + reg_id}}
|
||||
elif self._isa == 'aarch64':
|
||||
register = {'register': {'prefix': reg_type, 'name': reg_id}}
|
||||
return register
|
||||
|
||||
# get ;parser result and assign operands to
|
||||
# - source
|
||||
# - destination
|
||||
# - source/destination
|
||||
def assign_src_dst(self, instruction_form):
|
||||
# if the instruction form doesn't have operands, there's nothing to do
|
||||
if instruction_form['operands'] is None:
|
||||
return
|
||||
# check if instruction form is in ISA yaml, otherwise apply standard operand assignment
|
||||
# (one dest, others source)
|
||||
isa_data = self._isa_model.get_instruction(
|
||||
instruction_form['instruction'], instruction_form['operands']
|
||||
)
|
||||
operands = instruction_form['operands']
|
||||
op_dict = {}
|
||||
if isa_data is None:
|
||||
# no irregular operand structure, apply default
|
||||
op_dict['source'] = self._get_regular_source_operands(instruction_form)
|
||||
op_dict['destination'] = self._get_regular_destination_operands(instruction_form)
|
||||
op_dict['src_dst'] = []
|
||||
else:
|
||||
# load src/dst structure from isa_data
|
||||
op_dict['source'] = []
|
||||
op_dict['destination'] = []
|
||||
op_dict['src_dst'] = []
|
||||
for i, op in enumerate(isa_data['operands']):
|
||||
if op['source'] and op['destination']:
|
||||
op_dict['src_dst'].append(operands[i])
|
||||
continue
|
||||
if op['source']:
|
||||
op_dict['source'].append(operands[i])
|
||||
continue
|
||||
if op['destination']:
|
||||
op_dict['destination'].append(operands[i])
|
||||
continue
|
||||
# store operand list in dict and reassign operand key/value pair
|
||||
op_dict['operand_list'] = operands
|
||||
instruction_form['operands'] = AttrDict.convert_dict(op_dict)
|
||||
# assign LD/ST flags
|
||||
instruction_form['flags'] = (
|
||||
instruction_form['flags'] if 'flags' in instruction_form else []
|
||||
)
|
||||
if self._has_load(instruction_form):
|
||||
instruction_form['flags'] += [INSTR_FLAGS.HAS_LD]
|
||||
if self._has_store(instruction_form):
|
||||
instruction_form['flags'] += [INSTR_FLAGS.HAS_ST]
|
||||
|
||||
def _nullify_data_ports(self, port_pressure):
|
||||
data_ports = self._machine_model.get_data_ports()
|
||||
for port in data_ports:
|
||||
index = self._machine_model.get_ports().index(port)
|
||||
port_pressure[index] = 0.0
|
||||
return port_pressure
|
||||
|
||||
def _has_load(self, instruction_form):
|
||||
for operand in (
|
||||
instruction_form['operands']['source'] + instruction_form['operands']['src_dst']
|
||||
):
|
||||
if 'memory' in operand:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _has_store(self, instruction_form):
|
||||
for operand in (
|
||||
instruction_form['operands']['destination'] + instruction_form['operands']['src_dst']
|
||||
):
|
||||
if 'memory' in operand:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_regular_source_operands(self, instruction_form):
|
||||
if self._isa == 'x86':
|
||||
return self._get_regular_source_x86ATT(instruction_form)
|
||||
if self._isa == 'aarch64':
|
||||
return self._get_regular_source_AArch64(instruction_form)
|
||||
|
||||
def _get_regular_destination_operands(self, instruction_form):
|
||||
if self._isa == 'x86':
|
||||
return self._get_regular_destination_x86ATT(instruction_form)
|
||||
if self._isa == 'aarch64':
|
||||
return self._get_regular_destination_AArch64(instruction_form)
|
||||
|
||||
def _get_regular_source_x86ATT(self, instruction_form):
|
||||
# return all but last operand
|
||||
sources = [
|
||||
op for op in instruction_form['operands'][0 : len(instruction_form['operands']) - 1]
|
||||
]
|
||||
return sources
|
||||
|
||||
def _get_regular_source_AArch64(self, instruction_form):
|
||||
# return all but first operand
|
||||
sources = [
|
||||
op for op in instruction_form['operands'][1 : len(instruction_form['operands'])]
|
||||
]
|
||||
return sources
|
||||
|
||||
def _get_regular_destination_x86ATT(self, instruction_form):
|
||||
# return last operand
|
||||
return instruction_form['operands'][-1:]
|
||||
|
||||
def _get_regular_destination_AArch64(self, instruction_form):
|
||||
# return first operand
|
||||
return instruction_form['operands'][:1]
|
||||
|
||||
@staticmethod
|
||||
def get_throughput_sum(kernel):
|
||||
tp_sum = reduce(
|
||||
(lambda x, y: [sum(z) for z in zip(x, y)]),
|
||||
[instr['port_pressure'] for instr in kernel],
|
||||
)
|
||||
tp_sum = [round(x, 2) for x in tp_sum]
|
||||
return tp_sum
|
||||
@@ -1,410 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
from subprocess import call
|
||||
from math import ceil
|
||||
|
||||
from osaca.param import Register, MemAddr, Parameter
|
||||
#from param import Register, MemAddr, Parameter
|
||||
|
||||
|
||||
class Testcase(object):
|
||||
# ------------------Constant variables--------------------------
|
||||
# Lookup tables for regs
|
||||
gprs64 = ['rax', 'rbx', 'rcx', 'rdx', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15']
|
||||
gprs32 = ['eax', 'ebx', 'ecx', 'edx', 'r9d', 'r10d', 'r11d', 'r12d', 'r13d', 'r14d', 'r15d']
|
||||
gprs16 = ['ax', 'bx', 'cx', 'dx', 'r9w', 'r10w', 'r11w', 'r12w', 'r13w', 'r14w', 'r15w']
|
||||
gprs8 = ['al', 'bl', 'cl', 'dl', 'r9l', 'r10l', 'r11l', 'r12l', 'r13l', 'r14l', 'r15l']
|
||||
fpus = ['st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', 'st7']
|
||||
mmxs = ['mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7']
|
||||
ks = ['k0', 'k1', 'k2', 'k3', 'k4', 'k5', 'k6', 'k7']
|
||||
bnds = ['bnd0', 'bnd1', 'bnd2', 'bnd3', 'bnd4', 'bnd5', 'bnd6', 'bnd7']
|
||||
xmms = ['xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'xmm8', 'xmm9',
|
||||
'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15']
|
||||
ymms = ['ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', 'ymm6', 'ymm7', 'ymm8', 'ymm9',
|
||||
'ymm10', 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15']
|
||||
zmms = ['zmm0', 'zmm1', 'zmm2', 'zmm3', 'zmm4', 'zmm5', 'zmm6', 'zmm7', 'zmm8', 'zmm9',
|
||||
'zmm10', 'zmm11', 'zmm12', 'zmm13', 'zmm14', 'zmm15']
|
||||
# Lookup table for memory
|
||||
mems = ['[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]',
|
||||
'[rip+PI]']
|
||||
# Lookup table for immediates
|
||||
imds = ['1', '2', '13', '22', '8', '78', '159', '222', '3', '9', '5', '55', '173', '317',
|
||||
'254', '255']
|
||||
# TODO Differentiate between AVX512 (with additional xmm16-31) and the rest
|
||||
# ...
|
||||
# ...
|
||||
# end TODO
|
||||
|
||||
ops = {'gpr64': gprs64, 'gpr32': gprs32, 'gpr16': gprs16, 'gpr8': gprs8, 'fpu': fpus,
|
||||
'mmx': mmxs, 'k': ks, 'bnd': bnds, 'xmm': xmms, 'ymm': ymms, 'zmm': zmms, 'mem': mems,
|
||||
'imd': imds}
|
||||
|
||||
# Create Single Precision 1.0
|
||||
sp1 = ('\t\t# create SP 1.0\n'
|
||||
'\t\tvpcmpeqw xmm0, xmm0, xmm0\n'
|
||||
'\t\tvpslld xmm0, xmm0, 25\t\t\t# logical left shift: 11111110..0 (25=32-(8-1))\n'
|
||||
'\t\tvpsrld xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading '
|
||||
'mantissa bit is zero\n'
|
||||
'\t\t# copy SP 1.0\n')
|
||||
# Create Double Precision 1.0
|
||||
dp1 = ('\t\t# create DP 1.0\n'
|
||||
'\t\tvpcmpeqw xmm0, xmm0, xmm0\t\t# all ones\n'
|
||||
'\t\tvpsllq xmm0, xmm0, 54\t\t\t# logical left shift: 11111110..0 (54=64-(10-1))\n'
|
||||
'\t\tvpsrlq xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading '
|
||||
'mantissa bit is zero\n')
|
||||
# Create epilogue
|
||||
done = ('done:\n'
|
||||
'\t\tmov\trsp, rbp\n'
|
||||
'\t\tpop\trbp\n'
|
||||
'\t\tret\n'
|
||||
'.size latency, .-latency')
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
# Constructor
|
||||
def __init__(self, _mnemonic, _param_list, _num_instr='32'):
|
||||
self.instr = _mnemonic.lower()
|
||||
self.param_list = _param_list
|
||||
# num_instr must be an even number
|
||||
self.num_instr = str(ceil(int(_num_instr)/2)*2)
|
||||
# Check for the number of operands and initialise the GPRs if necessary
|
||||
self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = \
|
||||
self.__define_operands()
|
||||
self.num_operands = len(self.param_list)
|
||||
|
||||
# Create asm header
|
||||
self.def_instr, self.ninstr, self.init, self.expand = self.__define_header()
|
||||
# Create latency and throughput loop
|
||||
self.loop_lat = self.__define_loop_lat()
|
||||
self.loop_thrpt = self.__define_loop_thrpt()
|
||||
# Create extension for testcase name
|
||||
sep0 = '-' if (self.num_operands > 0) else ''
|
||||
sep1 = '_' if (self.num_operands > 1) else ''
|
||||
sep2 = '_' if (self.num_operands > 2) else ''
|
||||
self.extension = (sep0 + (self.op_a if ('gpr' not in self.op_a) else 'r' + self.op_a[3:])
|
||||
+ sep1 + (self.op_b if ('gpr' not in self.op_b) else 'r' + self.op_b[3:])
|
||||
+ sep2 + (self.op_c if ('gpr' not in self.op_c) else 'r' + self.op_c[3:]))
|
||||
|
||||
def write_testcase(self, tp=True, lt=True):
|
||||
"""
|
||||
Write testcase for class attributes in a file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tp : bool
|
||||
Controls if throughput testcase should be written
|
||||
(default True)
|
||||
|
||||
lt : bool
|
||||
Controls if latency testcase should be written
|
||||
(default True)
|
||||
"""
|
||||
osaca_dir = os.path.expanduser('~') + '/.osaca/'
|
||||
if lt:
|
||||
# Write latency file
|
||||
call(['mkdir', '-p', osaca_dir + 'benchmarks'])
|
||||
f = open(osaca_dir + 'benchmarks/'+self.instr+self.extension+'.S', 'w')
|
||||
data = (self.def_instr + self.ninstr + self.init + self.dp1 + self.expand + self.gprPush
|
||||
+ self.zeroGPR + self.copy + self.loop_lat + self.gprPop + self.done)
|
||||
f.write(data)
|
||||
f.close()
|
||||
if tp:
|
||||
# Write throughput file
|
||||
call(['mkdir', '-p', osaca_dir + 'benchmarks'])
|
||||
f = open(osaca_dir + 'benchmarks/' + self.instr + self.extension
|
||||
+ '-TP.S', 'w')
|
||||
data = (self.def_instr + self.ninstr + self.init + self.dp1 + self.expand + self.gprPush
|
||||
+ self.zeroGPR + self.copy + self.loop_thrpt + self.gprPop + self.done)
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
||||
# Check operands
|
||||
def __define_operands(self):
|
||||
"""
|
||||
Check for the number of operands and initialise the GPRs if necessary.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, str, str, str, str, str)
|
||||
String tuple containing types of operands and if needed push/pop operations, the
|
||||
initialisation of general purpose regs and the copy if registers.
|
||||
"""
|
||||
operands = self.param_list
|
||||
op_a, op_b, op_c = ('', '', '')
|
||||
gpr_push, gpr_pop, zero_gpr = ('', '', '')
|
||||
if len(operands) > 0:
|
||||
if isinstance(operands[0], Register):
|
||||
op_a = operands[0].reg_type.lower()
|
||||
elif isinstance(operands[0], MemAddr):
|
||||
op_a = 'mem'
|
||||
elif isinstance(operands[0], Parameter) and str(operands[0]) == 'IMD':
|
||||
op_a = 'imd'
|
||||
if op_a == 'gpr':
|
||||
gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs()
|
||||
op_a += str(operands[0].size)
|
||||
if len(operands) > 1:
|
||||
if isinstance(operands[1], Register):
|
||||
op_b = operands[1].reg_type.lower()
|
||||
elif isinstance(operands[1], MemAddr):
|
||||
op_b = 'mem'
|
||||
elif isinstance(operands[1], Parameter) and str(operands[1]) == 'IMD':
|
||||
op_b = 'imd'
|
||||
if op_b == 'gpr':
|
||||
op_b += str(operands[1].size)
|
||||
if 'gpr' not in op_a:
|
||||
gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs()
|
||||
if len(operands) == 3:
|
||||
if isinstance(operands[2], Register):
|
||||
op_c = operands[2].reg_type.lower()
|
||||
elif isinstance(operands[2], MemAddr):
|
||||
op_c = 'mem'
|
||||
elif isinstance(operands[2], Parameter) and str(operands[2]) == 'IMD':
|
||||
op_c = 'imd'
|
||||
if op_c == 'gpr':
|
||||
op_c += str(operands[2].size)
|
||||
if ('gpr' not in op_a) and ('gpr' not in op_b):
|
||||
gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs()
|
||||
if len(operands) == 1 and isinstance(operands[0], Register):
|
||||
copy = self.__copy_regs(operands[0])
|
||||
elif len(operands) > 1 and isinstance(operands[1], Register):
|
||||
copy = self.__copy_regs(operands[1])
|
||||
elif len(operands) > 2 and isinstance(operands[2], Register):
|
||||
copy = self.__copy_regs(operands[1])
|
||||
else:
|
||||
copy = ''
|
||||
return op_a, op_b, op_c, gpr_push, gpr_pop, zero_gpr, copy
|
||||
|
||||
def __initialise_gprs(self):
|
||||
"""
|
||||
Initialize eleven general purpose registers and set them to zero.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, str, str)
|
||||
String tuple for push, pop and initalisation operations
|
||||
"""
|
||||
|
||||
gpr_push = ''
|
||||
gpr_pop = ''
|
||||
zero_gpr = ''
|
||||
for reg in self.gprs64:
|
||||
gpr_push += '\t\tpush {}\n'.format(reg)
|
||||
for reg in reversed(self.gprs64):
|
||||
gpr_pop += '\t\tpop {}\n'.format(reg)
|
||||
for reg in self.gprs64:
|
||||
zero_gpr += '\t\txor {}, {}\n'.format(reg, reg)
|
||||
return gpr_push, gpr_pop, zero_gpr
|
||||
|
||||
|
||||
# Copy created values in specific register
|
||||
def __copy_regs(self, reg):
|
||||
"""
|
||||
Copy created values in specific register.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
reg : Register
|
||||
Register for copying the value
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the copy instructions
|
||||
"""
|
||||
copy = '\t\t# copy DP 1.0\n'
|
||||
# Different handling for GPR, MMX and SSE/AVX registers
|
||||
if reg.reg_type == 'GPR':
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][1])
|
||||
copy += '\t\t# Create DP 2.0\n'
|
||||
copy += '\t\tadd {}, {}\n'.format(self.ops['gpr64'][1], self.ops['gpr64'][0])
|
||||
copy += '\t\t# Create DP 0.5\n'
|
||||
copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\tmovq {}, {}\n'.format(self.ops['gpr64'][2], self.ops['gpr64'][0])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
|
||||
elif reg.reg_type == 'MMX':
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][0])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][1])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\t# Create DP 2.0\n'
|
||||
copy += '\t\tadd {}, {}\n'.format(self.ops['mmx'][1], self.ops['mmx'][0])
|
||||
copy += '\t\t# Create DP 0.5\n'
|
||||
copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\tmovq {}, {}\n'.format(self.ops['mmx'][2], self.ops['gpr64'][0])
|
||||
elif reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM':
|
||||
key = reg.reg_type.lower()
|
||||
copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][0], self.ops[key][0])
|
||||
copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][1], self.ops[key][0])
|
||||
copy += '\t\t# Create DP 2.0\n'
|
||||
copy += '\t\tvaddpd {}, {}, {}\n'.format(self.ops[key][1], self.ops[key][1],
|
||||
self.ops[key][1])
|
||||
copy += '\t\t# Create DP 0.5\n'
|
||||
copy += '\t\tvdivpd {}, {}, {}\n'.format(self.ops[key][2], self.ops[key][0],
|
||||
self.ops[key][1])
|
||||
else:
|
||||
copy = ''
|
||||
return copy
|
||||
|
||||
def __define_header(self):
|
||||
"""
|
||||
Define header.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, str, str, str)
|
||||
String tuple containing the header, value initalisations and extensions
|
||||
"""
|
||||
def_instr = '#define INSTR '+self.instr+'\n'
|
||||
ninstr = '#define NINST '+self.num_instr+'\n'
|
||||
pi = ('PI:\n'
|
||||
'.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' # 128 bit
|
||||
'0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' # 256 bit
|
||||
'0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' # 384 bit
|
||||
'0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9\n') # 512 bit
|
||||
init = ('#define N edi\n'
|
||||
'#define i r8d\n\n\n'
|
||||
'.intel_syntax noprefix\n'
|
||||
'.globl ninst\n'
|
||||
'.data\n'
|
||||
'ninst:\n'
|
||||
'.long NINST\n'
|
||||
'.align 32\n'
|
||||
+ pi +
|
||||
'.text\n'
|
||||
'.globl latency\n'
|
||||
'.type latency, @function\n'
|
||||
'.align 32\n'
|
||||
'latency:\n'
|
||||
'\t\tpush rbp\n'
|
||||
'\t\tmov rbp, rsp\n'
|
||||
'\t\txor i, i\n'
|
||||
'\t\ttest N, N\n'
|
||||
'\t\tjle done\n')
|
||||
# Expand to AVX(512) if necessary
|
||||
expand = ''
|
||||
if self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm':
|
||||
expand = ('\t\t# expand from SSE to AVX\n'
|
||||
'\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n')
|
||||
if self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm':
|
||||
expand = ('\t\t# expand from SSE to AVX\n'
|
||||
'\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n'
|
||||
'\t\t# expand from AVX to AVX512\n'
|
||||
'\t\tvinsert64x4 zmm0, zmm0, ymm0, 0x1\n')
|
||||
return def_instr, ninstr, init, expand
|
||||
|
||||
def __define_loop_lat(self):
|
||||
"""
|
||||
Create latency loop.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Latency loop as string
|
||||
"""
|
||||
loop_lat = ('loop:\n'
|
||||
'\t\tinc i\n')
|
||||
if self.num_operands == 0:
|
||||
for i in range(0, int(self.num_instr)):
|
||||
loop_lat += '\t\tINSTR\n'
|
||||
if self.num_operands == 1:
|
||||
for i in range(0, int(self.num_instr)):
|
||||
loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.op_a][0])
|
||||
elif self.num_operands == 2 and self.op_a == self.op_b:
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0],
|
||||
self.ops[self.op_b][1])
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1],
|
||||
self.ops[self.op_b][0])
|
||||
elif self.num_operands == 2 and self.op_a != self.op_b:
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0],
|
||||
self.ops[self.op_b][0])
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0],
|
||||
self.ops[self.op_b][0])
|
||||
elif self.num_operands == 3 and self.op_a == self.op_b:
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0],
|
||||
self.ops[self.op_b][1],
|
||||
self.ops[self.op_c][0])
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1],
|
||||
self.ops[self.op_b][0],
|
||||
self.ops[self.op_c][0])
|
||||
elif self.num_operands == 3 and self.op_a == self.op_c:
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0],
|
||||
self.ops[self.op_b][0],
|
||||
self.ops[self.op_c][0])
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1],
|
||||
self.ops[self.op_b][0],
|
||||
self.ops[self.op_c][0])
|
||||
loop_lat += ('\t\tcmp i, N\n'
|
||||
'\t\tjl loop\n')
|
||||
return loop_lat
|
||||
|
||||
def __define_loop_thrpt(self):
|
||||
"""
|
||||
Create throughput loop.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Throughput loop as string
|
||||
"""
|
||||
loop_thrpt = ('loop:\n'
|
||||
'\t\tinc i\n')
|
||||
ext = ''
|
||||
ext1 = False
|
||||
ext2 = False
|
||||
if self.num_operands == 2:
|
||||
ext1 = True
|
||||
if self.num_operands == 3:
|
||||
ext1 = True
|
||||
ext2 = True
|
||||
for i in range(0, int(self.num_instr)):
|
||||
if self.num_operands == 0:
|
||||
loop_thrpt += '\t\tINSTR\n'
|
||||
continue
|
||||
if ext1:
|
||||
ext = ', {}'.format(self.ops[self.op_b][i % 3])
|
||||
if ext2:
|
||||
ext += ', {}'.format(self.ops[self.op_c][i % 3])
|
||||
reg_num = (i % (len(self.ops[self.op_a]) - 3)) + 3
|
||||
loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.op_a][reg_num], ext)
|
||||
loop_thrpt += ('\t\tcmp i, N\n'
|
||||
'\t\tjl loop\n')
|
||||
return loop_thrpt
|
||||
|
||||
def is_in_dir(self):
|
||||
"""
|
||||
Check if testcases with the same name already exist in testcase
|
||||
directory.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(bool, bool)
|
||||
True if file is in directory
|
||||
False if file is not in directory
|
||||
While the first value stands for the throughput testcase
|
||||
and the second value stands for the latency testcase
|
||||
"""
|
||||
tp = False
|
||||
lt = False
|
||||
name = self.instr+self.extension
|
||||
for root, dirs, files in os.walk(os.path.dirname(__file__)+'/benchmarks'):
|
||||
if (name + '-tp.S') in files:
|
||||
tp = True
|
||||
if name+'.S' in files:
|
||||
lt = True
|
||||
return tp, lt
|
||||
|
||||
def get_entryname(self):
|
||||
"""
|
||||
Return the name of the entry the instruction form would be the data file
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The composited string out of instruction mnemonic and operands
|
||||
"""
|
||||
name = self.instr+self.extension
|
||||
return name
|
||||
13
osaca/utils.py
Normal file
13
osaca/utils.py
Normal file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env python3
|
||||
import os.path
|
||||
|
||||
|
||||
def find_file(name):
|
||||
"""Check for existence of name in user or package data folders and return path."""
|
||||
search_paths = [os.path.expanduser('~/.osaca/data'),
|
||||
os.path.join(os.path.dirname(__file__), 'data')]
|
||||
for dir in search_paths:
|
||||
path = os.path.join(dir, name)
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, search_paths))
|
||||
@@ -1,5 +1,5 @@
|
||||
[pep8]
|
||||
max-line-length=100
|
||||
max-line-length=99
|
||||
|
||||
[metadata]
|
||||
license-file=LICENSE
|
||||
|
||||
6
setup.py
6
setup.py
@@ -75,6 +75,7 @@ setup(
|
||||
# that you indicate wheter you support Python2, Python 3 or both.
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
],
|
||||
|
||||
# What doesd your project relate to?
|
||||
@@ -89,9 +90,10 @@ setup(
|
||||
# requirements files see:
|
||||
# https://packaging.python.org/en/latest/requirements.html
|
||||
install_requires=[
|
||||
'numpy',
|
||||
'pandas',
|
||||
'kerncraft',
|
||||
'networkx',
|
||||
'pyparsing',
|
||||
'pygraphviz',
|
||||
],
|
||||
python_requires='>=3.5',
|
||||
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
#!/usr//bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
sys.path[0:0] = ['.', '..']
|
||||
suite = unittest.TestLoader().loadTestsFromNames(
|
||||
[
|
||||
'test_osaca'
|
||||
'test_base_parser',
|
||||
'test_parser_x86att',
|
||||
'test_parser_AArch64v81',
|
||||
'test_marker_utils',
|
||||
'test_semantics',
|
||||
'test_frontend',
|
||||
'test_db_interface',
|
||||
'test_kerncraftAPI',
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
76
tests/test_base_parser.py
Executable file
76
tests/test_base_parser.py
Executable file
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for base assembly parser
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
|
||||
|
||||
class TestBaseParser(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
try:
|
||||
self.parser = BaseParser()
|
||||
except NotImplementedError:
|
||||
pass
|
||||
with open(self._find_file('triad-x86-iaca.s')) as f:
|
||||
self.triad_code = f.read()
|
||||
|
||||
##################
|
||||
# Test
|
||||
##################
|
||||
|
||||
def test_parse_file(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.parse_file(self.triad_code)
|
||||
|
||||
def test_parse_line(self):
|
||||
line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.parse_line(line_instruction)
|
||||
|
||||
def test_parse_instruction(self):
|
||||
instr1 = '\t\tvcvtsi2ss %edx, %xmm2, %xmm2\t\t\t#12.27'
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.parse_instruction(instr1)
|
||||
|
||||
def test_register_funcs(self):
|
||||
reg_a1 = AttrDict({'name': 'rax'})
|
||||
reg_a2 = AttrDict({'name': 'eax'})
|
||||
register_string = 'v1.2d'
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.is_reg_dependend_of(reg_a1, reg_a2)
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.parse_register(register_string)
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.is_gpr(reg_a1)
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.is_vector_register(reg_a1)
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.process_operand(reg_a1)
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.get_full_reg_name(reg_a1)
|
||||
|
||||
def test_normalize_imd(self):
|
||||
imd_hex_1 = {'value': '0x4f'}
|
||||
with self.assertRaises(NotImplementedError):
|
||||
self.parser.normalize_imd(imd_hex_1)
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
name = os.path.join(testdir, 'test_files', name)
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestBaseParser)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
86
tests/test_db_interface.py
Executable file
86
tests/test_db_interface.py
Executable file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for DB interface
|
||||
"""
|
||||
|
||||
import unittest
|
||||
|
||||
from osaca.db_interface import sanity_check
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
class TestDBInterface(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
sample_entry = {
|
||||
'name': 'DoItRightAndDoItFast',
|
||||
'operands': [
|
||||
{'class': 'memory', 'offset': 'imd', 'base': 'gpr', 'index': 'gpr', 'scale': 8},
|
||||
{'class': 'register', 'name': 'xmm'},
|
||||
],
|
||||
'throughput': 1.25,
|
||||
'latency': 125,
|
||||
'uops': 6,
|
||||
}
|
||||
self.entry_csx = sample_entry.copy()
|
||||
self.entry_tx2 = sample_entry.copy()
|
||||
self.entry_zen1 = sample_entry.copy()
|
||||
|
||||
# self.entry_csx['port_pressure'] = [1.25, 0, 1.25, 0.5, 0.5, 0.5, 0.5, 0, 1.25, 1.25, 0]
|
||||
self.entry_csx['port_pressure'] = [[5, '0156'], [1, '23'], [1, ['2D', '3D']]]
|
||||
# self.entry_tx2['port_pressure'] = [2.5, 2.5, 0, 0, 0.5, 0.5]
|
||||
self.entry_tx2['port_pressure'] = [[5, '01'], [1, '45']]
|
||||
del self.entry_tx2['operands'][1]['name']
|
||||
self.entry_tx2['operands'][1]['prefix'] = 'x'
|
||||
# self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
|
||||
self.entry_zen1['port_pressure'] = [[4, '0123'], [1, '4'], [1, '89'], [2, ['8D', '9D']]]
|
||||
|
||||
###########
|
||||
# Tests
|
||||
###########
|
||||
|
||||
def test_add_single_entry(self):
|
||||
mm_csx = MachineModel('csx')
|
||||
mm_tx2 = MachineModel('tx2')
|
||||
mm_zen1 = MachineModel('zen1')
|
||||
num_entries_csx = len(mm_csx['instruction_forms'])
|
||||
num_entries_tx2 = len(mm_tx2['instruction_forms'])
|
||||
num_entries_zen1 = len(mm_zen1['instruction_forms'])
|
||||
|
||||
mm_csx.set_instruction_entry(self.entry_csx)
|
||||
mm_tx2.set_instruction_entry(self.entry_tx2)
|
||||
mm_zen1.set_instruction_entry({'name': 'empty_operation'})
|
||||
|
||||
num_entries_csx = len(mm_csx['instruction_forms']) - num_entries_csx
|
||||
num_entries_tx2 = len(mm_tx2['instruction_forms']) - num_entries_tx2
|
||||
num_entries_zen1 = len(mm_zen1['instruction_forms']) - num_entries_zen1
|
||||
|
||||
self.assertEqual(num_entries_csx, 1)
|
||||
self.assertEqual(num_entries_tx2, 1)
|
||||
self.assertEqual(num_entries_zen1, 1)
|
||||
|
||||
def test_invalid_add(self):
|
||||
entry = {}
|
||||
with self.assertRaises(KeyError):
|
||||
MachineModel('csx').set_instruction_entry(entry)
|
||||
with self.assertRaises(TypeError):
|
||||
MachineModel('csx').set_instruction()
|
||||
|
||||
def test_sanity_check(self):
|
||||
# non-verbose
|
||||
sanity_check('csx', verbose=False)
|
||||
sanity_check('tx2', verbose=False)
|
||||
sanity_check('zen1', verbose=False)
|
||||
# verbose
|
||||
sanity_check('csx', verbose=True)
|
||||
sanity_check('tx2', verbose=True)
|
||||
sanity_check('zen1', verbose=True)
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestDBInterface)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
539
tests/test_files/hidden_load_machine_model.yml
Normal file
539
tests/test_files/hidden_load_machine_model.yml
Normal file
@@ -0,0 +1,539 @@
|
||||
osaca_version: 0.3.1
|
||||
micro_architecture: AMD Zen (family 17h)
|
||||
arch_code: ZEN1
|
||||
isa: x86
|
||||
load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0}
|
||||
load_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0}
|
||||
load_throughput:
|
||||
- {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: ~, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: ~, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: ~, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
hidden_loads: true
|
||||
ports: ['0', '1', '2', '3', 3DV, '4', '5', '6', '7', '8', '9', 8D, 9D, ST]
|
||||
port_model_scheme: |
|
||||
┌--------------------------------------┐ ┌-----------------------------------------------┐
|
||||
| 96 entries OoO scheduler | | 84 entries OoO scheduler |
|
||||
└--------------------------------------┘ └-----------------------------------------------┘
|
||||
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|
||||
▼ ▼ ▼ ▼ ▼ ▼ ▼ ▼ ▼ ▼
|
||||
┌-------┐ ┌-------┐ ┌-------┐ ┌-------┐ ┌------┐ ┌-----┐ ┌-----┐ ┌------┐ ┌-----┐ ┌-----┐
|
||||
|SSE ALU| |SSE ALU| |SSE ALU| |SSE ALU| | ALU | | ALU | | ALU | | ALU | | AGU | | AGU |
|
||||
└-------┘ └-------┘ └-------┘ └-------┘ └------┘ └-----┘ └-----┘ └------┘ └-----┘ └-----┘
|
||||
┌-------┐ ┌-------┐ ┌-------┐ ┌-------┐ ┌------┐ ┌-----┐ ┌-----┐ ┌------┐ | |
|
||||
|SSE MUL| |SSE MUL| |SSE ADD| |SSE ADD| |BRANCH| | MUL | | MUL | |BRANCH| ▼ ▼
|
||||
└-------┘ └-------┘ └-------┘ └-------┘ └------┘ └-----┘ └-----┘ └------┘ ┌-------------┐
|
||||
┌-------┐ ┌-------┐ ┌-------┐ ┌-------┐ | LOAD |
|
||||
|SSE FMA| |SSE FMA| | SSE | |SSE DIV| └-------------┘
|
||||
└-------┘ └-------┘ | SHUF | └-------┘ ┌-------------┐
|
||||
┌-------┐ └-------┘ | LOAD |
|
||||
| SSE | └-------------┘
|
||||
| SHUF | ┌-------------┐
|
||||
└-------┘ | STORE |
|
||||
└-------------┘
|
||||
instruction_forms:
|
||||
- name: add
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: add
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: addl
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: addq
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: cmpl
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: ~ # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: cmpq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: ~ # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: incq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: ja
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: ~
|
||||
port_pressure: []
|
||||
- name: jb
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: ~
|
||||
port_pressure: []
|
||||
- name: jne
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.0
|
||||
latency: ~
|
||||
port_pressure: []
|
||||
- name: leaq
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.5
|
||||
latency: ~ # 1*p89
|
||||
port_pressure: [[1, '89']]
|
||||
- name: movl
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: mulsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: mulss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: rcpss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: ~ #1.0
|
||||
latency: 5.0
|
||||
port_pressure: []
|
||||
- name: sqrtsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: ~ #8.0
|
||||
latency: 23.0
|
||||
port_pressure: []
|
||||
- name: sqrtss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: ~ #5.0
|
||||
latency: 17.0
|
||||
port_pressure: []
|
||||
- name: subq
|
||||
operands:
|
||||
- class: register
|
||||
name: gpr
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: subq
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: gpr
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p4567
|
||||
port_pressure: [[1, '4567']]
|
||||
- name: vaddpd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 3.0 # 2*p23
|
||||
port_pressure: [[2, '23']]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p23
|
||||
port_pressure: [[1, '23']]
|
||||
- name: vaddss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p23
|
||||
port_pressure: [[1, '23']]
|
||||
- name: vdivsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 4.0
|
||||
latency: 13.0 # 1*p3+4*p3DV
|
||||
port_pressure: [[1, '3'], [4.0, [3DV]]]
|
||||
- name: vdivss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 3.0
|
||||
latency: 10.0
|
||||
port_pressure: [[1, '3'], [3.0, [3DV]]]
|
||||
- name: vfmadd213pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vfmadd231pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vfmadd132pd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vmulsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulss
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01+1*p89+1*p8D9D
|
||||
port_pressure: [[1, '01'], [1, '89'], [1, [8D, 9D]]]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 2*p01
|
||||
port_pressure: [[2, '01']]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovaps
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovaps
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 2.0
|
||||
latency: 3.0 # 2*p89+2*pST
|
||||
port_pressure: [[2, '89'], [2, [ST]]]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 1*p89+1*p8D9D
|
||||
port_pressure: [[1, '89'], [1, [8D, 9D]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: ~
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: memory
|
||||
base: gpr
|
||||
offset: imd
|
||||
index: gpr
|
||||
scale: 1
|
||||
throughput: 1.0
|
||||
latency: 4.0 # 1*p89+1*pST
|
||||
port_pressure: [[1, '89'], [1, [ST]]]
|
||||
27
tests/test_files/kernel-AArch64.s
Normal file
27
tests/test_files/kernel-AArch64.s
Normal file
@@ -0,0 +1,27 @@
|
||||
// mov x1, #111
|
||||
// .byte 213,3,32,31
|
||||
.LBB0_32:
|
||||
ldp q4, q5, [x9, #-32]
|
||||
ldp q6, q7, [x9], #64
|
||||
ldp q16, q17, [x11, #-32]!
|
||||
ldp q18, q19, [x11], #64
|
||||
fmul v4.2d, v4.2d, v16.2d
|
||||
fmul v5.2d, v5.2d, v17.2d
|
||||
fmul v6.2d, v6.2d, v18.2d
|
||||
fmul v7.2d, v7.2d, v19.2d
|
||||
ldp q0, q1, [x8, #-32]
|
||||
ldp q2, q3, [x8], #64
|
||||
fadd v0.2d, v0.2d, v4.2d
|
||||
fadd v1.2d, v1.2d, v5.2d
|
||||
stp q0, q1, [x10, #-32]
|
||||
fadd v2.2d, v2.2d, v6.2d
|
||||
fadd v3.2d, v3.2d, v7.2d
|
||||
stp q2, q3, [x10]
|
||||
add x10, x10, #64 // =64
|
||||
adds x12, x12, #1 // =1
|
||||
fmov s0, -1.0e+0
|
||||
fmov s1, #2.0e+2f
|
||||
prfm pldl1keep, [x26, #2112]
|
||||
b.ne .LBB0_32
|
||||
// mov x1, #222
|
||||
// .byte 213,3,32,31
|
||||
13
tests/test_files/kernel-x86.s
Normal file
13
tests/test_files/kernel-x86.s
Normal file
@@ -0,0 +1,13 @@
|
||||
#movl $111,%ebx
|
||||
#.byte 100,103,144
|
||||
.L10:
|
||||
vmovapd (%r15,%rax), %ymm0
|
||||
vmovapd (%r12,%rax), %ymm3
|
||||
addl $1, %ecx
|
||||
vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0
|
||||
vmovapd %ymm0, (%r14,%rax)
|
||||
addq $32, %rax
|
||||
cmpl %ecx, %r10d
|
||||
ja .L10
|
||||
#movl $222,%ebx
|
||||
#.byte 100,103,144
|
||||
645
tests/test_files/triad-arm-iaca.s
Normal file
645
tests/test_files/triad-arm-iaca.s
Normal file
@@ -0,0 +1,645 @@
|
||||
.text
|
||||
.file "triad.c"
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.p2align 3 // -- Begin function triad
|
||||
.LCPI0_0:
|
||||
.xword 4596373779694328218 // double 0.20000000000000001
|
||||
.LCPI0_1:
|
||||
.xword 4652007308841189376 // double 1000
|
||||
.LCPI0_2:
|
||||
.xword 4517329193108106637 // double 9.9999999999999995E-7
|
||||
.LCPI0_3:
|
||||
.xword 4629700416936869888 // double 32
|
||||
.LCPI0_4:
|
||||
.xword 4562146422526312448 // double 9.765625E-4
|
||||
.text
|
||||
.globl triad
|
||||
.p2align 6
|
||||
.type triad,@function
|
||||
triad: // @triad
|
||||
.cfi_startproc
|
||||
// %bb.0:
|
||||
sub sp, sp, #224 // =224
|
||||
str d8, [sp, #112] // 8-byte Folded Spill
|
||||
stp x28, x27, [sp, #128] // 16-byte Folded Spill
|
||||
stp x26, x25, [sp, #144] // 16-byte Folded Spill
|
||||
stp x24, x23, [sp, #160] // 16-byte Folded Spill
|
||||
stp x22, x21, [sp, #176] // 16-byte Folded Spill
|
||||
stp x20, x19, [sp, #192] // 16-byte Folded Spill
|
||||
stp x29, x30, [sp, #208] // 16-byte Folded Spill
|
||||
add x29, sp, #208 // =208
|
||||
.cfi_def_cfa w29, 16
|
||||
.cfi_offset w30, -8
|
||||
.cfi_offset w29, -16
|
||||
.cfi_offset w19, -24
|
||||
.cfi_offset w20, -32
|
||||
.cfi_offset w21, -40
|
||||
.cfi_offset w22, -48
|
||||
.cfi_offset w23, -56
|
||||
.cfi_offset w24, -64
|
||||
.cfi_offset w25, -72
|
||||
.cfi_offset w26, -80
|
||||
.cfi_offset w27, -88
|
||||
.cfi_offset w28, -96
|
||||
.cfi_offset b8, -112
|
||||
mov w19, w0
|
||||
orr w0, wzr, #0x40
|
||||
sbfiz x23, x19, #3, #32
|
||||
mov x1, x23
|
||||
bl aligned_alloc
|
||||
mov x20, x0
|
||||
orr w0, wzr, #0x40
|
||||
mov x1, x23
|
||||
bl aligned_alloc
|
||||
str x0, [sp, #88] // 8-byte Folded Spill
|
||||
orr w0, wzr, #0x40
|
||||
mov x1, x23
|
||||
bl aligned_alloc
|
||||
mov x22, x0
|
||||
orr w0, wzr, #0x40
|
||||
mov x1, x23
|
||||
bl aligned_alloc
|
||||
mov x23, x0
|
||||
cmp w19, #0 // =0
|
||||
b.le .LBB0_3
|
||||
// %bb.1:
|
||||
mov w24, w19
|
||||
cmp w19, #7 // =7
|
||||
b.hi .LBB0_9
|
||||
// %bb.2:
|
||||
mov x8, xzr
|
||||
b .LBB0_17
|
||||
.LBB0_3:
|
||||
adrp x8, .LCPI0_0
|
||||
orr w25, wzr, #0x1
|
||||
ldr d8, [x8, :lo12:.LCPI0_0]
|
||||
.p2align 6
|
||||
.LBB0_4: // =>This Loop Header: Depth=1
|
||||
// Child Loop BB0_5 Depth 2
|
||||
sub x0, x29, #88 // =88
|
||||
add x1, sp, #96 // =96
|
||||
bl timing
|
||||
mov w21, w25
|
||||
cbz w25, .LBB0_8
|
||||
.p2align 6
|
||||
.LBB0_5: // Parent Loop BB0_4 Depth=1
|
||||
// => This Inner Loop Header: Depth=2
|
||||
ldr d0, [x20]
|
||||
fcmp d0, #0.0
|
||||
b.le .LBB0_7
|
||||
// %bb.6: // in Loop: Header=BB0_5 Depth=2
|
||||
mov x0, x20
|
||||
bl dummy
|
||||
.LBB0_7: // in Loop: Header=BB0_5 Depth=2
|
||||
subs w21, w21, #1 // =1
|
||||
b.ne .LBB0_5
|
||||
.LBB0_8: // in Loop: Header=BB0_4 Depth=1
|
||||
add x0, sp, #104 // =104
|
||||
add x1, sp, #96 // =96
|
||||
bl timing
|
||||
ldr d0, [sp, #104]
|
||||
ldur d1, [x29, #-88]
|
||||
fsub d1, d0, d1
|
||||
lsl w25, w25, #1
|
||||
fcmp d1, d8
|
||||
b.mi .LBB0_4
|
||||
b .LBB0_38
|
||||
.LBB0_9:
|
||||
and x8, x24, #0xfffffff8
|
||||
sub x10, x8, #8 // =8
|
||||
lsr x11, x10, #3
|
||||
add w9, w11, #1 // =1
|
||||
and x9, x9, #0x3
|
||||
cmp x10, #24 // =24
|
||||
b.hs .LBB0_11
|
||||
// %bb.10:
|
||||
orr w13, wzr, #0x20
|
||||
cbnz x9, .LBB0_14
|
||||
b .LBB0_16
|
||||
.LBB0_11:
|
||||
mov x16, #28286
|
||||
movk x16, #29109, lsl #16
|
||||
ldr x15, [sp, #88] // 8-byte Folded Reload
|
||||
movk x16, #34426, lsl #32
|
||||
movk x16, #16000, lsl #48
|
||||
dup v0.2d, x16
|
||||
mvn x11, x11
|
||||
mov x10, xzr
|
||||
add x11, x9, x11
|
||||
add x12, x23, #128 // =128
|
||||
add x13, x20, #128 // =128
|
||||
add x14, x22, #128 // =128
|
||||
add x15, x15, #128 // =128
|
||||
.p2align 6
|
||||
.LBB0_12: // =>This Inner Loop Header: Depth=1
|
||||
stp q0, q0, [x12]
|
||||
stp q0, q0, [x12, #-128]
|
||||
stp q0, q0, [x12, #32]
|
||||
stp q0, q0, [x12, #-96]
|
||||
stp q0, q0, [x14]
|
||||
add x10, x10, #32 // =32
|
||||
stp q0, q0, [x14, #-128]
|
||||
stp q0, q0, [x14, #32]
|
||||
stp q0, q0, [x14, #-96]
|
||||
stp q0, q0, [x15]
|
||||
stp q0, q0, [x15, #-128]
|
||||
stp q0, q0, [x15, #32]
|
||||
stp q0, q0, [x15, #-96]
|
||||
stp q0, q0, [x13]
|
||||
stp q0, q0, [x13, #-128]
|
||||
stp q0, q0, [x13, #32]
|
||||
stp q0, q0, [x13, #-96]
|
||||
stp q0, q0, [x12, #64]
|
||||
stp q0, q0, [x12, #-64]
|
||||
stp q0, q0, [x12, #96]
|
||||
stp q0, q0, [x12, #-32]
|
||||
add x12, x12, #256 // =256
|
||||
stp q0, q0, [x14, #64]
|
||||
stp q0, q0, [x14, #-64]
|
||||
stp q0, q0, [x14, #96]
|
||||
stp q0, q0, [x14, #-32]
|
||||
add x14, x14, #256 // =256
|
||||
stp q0, q0, [x15, #64]
|
||||
stp q0, q0, [x15, #-64]
|
||||
stp q0, q0, [x15, #96]
|
||||
stp q0, q0, [x15, #-32]
|
||||
add x15, x15, #256 // =256
|
||||
stp q0, q0, [x13, #64]
|
||||
stp q0, q0, [x13, #-64]
|
||||
stp q0, q0, [x13, #96]
|
||||
stp q0, q0, [x13, #-32]
|
||||
add x13, x13, #256 // =256
|
||||
adds x11, x11, #4 // =4
|
||||
b.ne .LBB0_12
|
||||
// %bb.13:
|
||||
lsl x10, x10, #3
|
||||
orr x13, x10, #0x20
|
||||
cbz x9, .LBB0_16
|
||||
.LBB0_14:
|
||||
ldr x14, [sp, #88] // 8-byte Folded Reload
|
||||
add x10, x23, x13
|
||||
add x11, x22, x13
|
||||
add x12, x20, x13
|
||||
add x13, x14, x13
|
||||
mov x14, #28286
|
||||
movk x14, #29109, lsl #16
|
||||
movk x14, #34426, lsl #32
|
||||
movk x14, #16000, lsl #48
|
||||
dup v0.2d, x14
|
||||
neg x9, x9
|
||||
.p2align 6
|
||||
.LBB0_15: // =>This Inner Loop Header: Depth=1
|
||||
stp q0, q0, [x10]
|
||||
stp q0, q0, [x11]
|
||||
stp q0, q0, [x10, #-32]
|
||||
stp q0, q0, [x13]
|
||||
stp q0, q0, [x11, #-32]
|
||||
add x10, x10, #64 // =64
|
||||
stp q0, q0, [x12]
|
||||
stp q0, q0, [x13, #-32]
|
||||
add x11, x11, #64 // =64
|
||||
stp q0, q0, [x12, #-32]
|
||||
add x12, x12, #64 // =64
|
||||
add x13, x13, #64 // =64
|
||||
adds x9, x9, #1 // =1
|
||||
b.ne .LBB0_15
|
||||
.LBB0_16:
|
||||
cmp x8, x24
|
||||
b.eq .LBB0_19
|
||||
.LBB0_17:
|
||||
ldr x10, [sp, #88] // 8-byte Folded Reload
|
||||
mov x13, #28286
|
||||
movk x13, #29109, lsl #16
|
||||
lsl x12, x8, #3
|
||||
movk x13, #34426, lsl #32
|
||||
add x9, x20, x12
|
||||
movk x13, #16000, lsl #48
|
||||
add x10, x10, x12
|
||||
add x11, x22, x12
|
||||
add x12, x23, x12
|
||||
sub x8, x24, x8
|
||||
.p2align 6
|
||||
.LBB0_18: // =>This Inner Loop Header: Depth=1
|
||||
str x13, [x12], #8
|
||||
str x13, [x11], #8
|
||||
str x13, [x10], #8
|
||||
str x13, [x9], #8
|
||||
subs x8, x8, #1 // =1
|
||||
b.ne .LBB0_18
|
||||
.LBB0_19:
|
||||
ldr x10, [sp, #88] // 8-byte Folded Reload
|
||||
add x8, x20, #256 // =256
|
||||
and x26, x24, #0xfffffff8
|
||||
str x8, [sp, #40] // 8-byte Folded Spill
|
||||
add x8, x23, #256 // =256
|
||||
sub x27, x26, #8 // =8
|
||||
str x8, [sp, #32] // 8-byte Folded Spill
|
||||
add x8, x22, #256 // =256
|
||||
orr w25, wzr, #0x1
|
||||
str x8, [sp, #24] // 8-byte Folded Spill
|
||||
add x8, x10, #256 // =256
|
||||
str x8, [sp, #16] // 8-byte Folded Spill
|
||||
lsr x8, x27, #3
|
||||
add w9, w8, #1 // =1
|
||||
mvn x8, x8
|
||||
and x28, x9, #0x7
|
||||
add x8, x28, x8
|
||||
str x8, [sp, #8] // 8-byte Folded Spill
|
||||
neg x8, x28
|
||||
str x8, [sp, #80] // 8-byte Folded Spill
|
||||
add x8, x10, #32 // =32
|
||||
str x8, [sp, #72] // 8-byte Folded Spill
|
||||
add x8, x22, #32 // =32
|
||||
str x8, [sp, #64] // 8-byte Folded Spill
|
||||
add x8, x20, #32 // =32
|
||||
str x8, [sp, #56] // 8-byte Folded Spill
|
||||
add x8, x23, #32 // =32
|
||||
str x8, [sp, #48] // 8-byte Folded Spill
|
||||
adrp x8, .LCPI0_0
|
||||
ldr d8, [x8, :lo12:.LCPI0_0]
|
||||
.p2align 6
|
||||
.LBB0_20: // =>This Loop Header: Depth=1
|
||||
// Child Loop BB0_22 Depth 2
|
||||
// Child Loop BB0_29 Depth 3
|
||||
// Child Loop BB0_32 Depth 3
|
||||
// Child Loop BB0_35 Depth 3
|
||||
sub x0, x29, #88 // =88
|
||||
add x1, sp, #96 // =96
|
||||
bl timing
|
||||
cbz w25, .LBB0_37
|
||||
// %bb.21: // in Loop: Header=BB0_20 Depth=1
|
||||
mov w21, wzr
|
||||
.p2align 6
|
||||
.LBB0_22: // Parent Loop BB0_20 Depth=1
|
||||
// => This Loop Header: Depth=2
|
||||
// Child Loop BB0_29 Depth 3
|
||||
// Child Loop BB0_32 Depth 3
|
||||
// Child Loop BB0_35 Depth 3
|
||||
ldr d0, [x20]
|
||||
fcmp d0, #0.0
|
||||
b.le .LBB0_24
|
||||
// %bb.23: // in Loop: Header=BB0_22 Depth=2
|
||||
mov x0, x20
|
||||
bl dummy
|
||||
.LBB0_24: // in Loop: Header=BB0_22 Depth=2
|
||||
cmp w19, #7 // =7
|
||||
b.hi .LBB0_26
|
||||
// %bb.25: // in Loop: Header=BB0_22 Depth=2
|
||||
mov x12, xzr
|
||||
b .LBB0_34
|
||||
.p2align 6
|
||||
.LBB0_26: // in Loop: Header=BB0_22 Depth=2
|
||||
cmp x27, #56 // =56
|
||||
b.hs .LBB0_28
|
||||
// %bb.27: // in Loop: Header=BB0_22 Depth=2
|
||||
mov x8, xzr
|
||||
cbnz x28, .LBB0_31
|
||||
b .LBB0_33
|
||||
.p2align 6
|
||||
.LBB0_28: // in Loop: Header=BB0_22 Depth=2
|
||||
ldp x9, x10, [sp, #16] // 8-byte Folded Reload
|
||||
ldp x11, x12, [sp, #32] // 8-byte Folded Reload
|
||||
ldr x13, [sp, #8] // 8-byte Folded Reload
|
||||
mov x8, xzr
|
||||
.p2align 6
|
||||
mov x1, #111 // OSACA START
|
||||
.byte 213,3,32,31 // OSACA START
|
||||
.LBB0_29: // Parent Loop BB0_20 Depth=1
|
||||
// Parent Loop BB0_22 Depth=2
|
||||
// => This Inner Loop Header: Depth=3
|
||||
ldp q2, q5, [x10, #-256]
|
||||
ldp q6, q7, [x10, #-224]
|
||||
ldp q16, q17, [x11, #-256]
|
||||
ldp q18, q19, [x11, #-224]
|
||||
fmul v2.2d, v2.2d, v16.2d
|
||||
fmul v5.2d, v5.2d, v17.2d
|
||||
fmul v6.2d, v6.2d, v18.2d
|
||||
ldp q0, q1, [x9, #-256]
|
||||
ldp q3, q4, [x9, #-224]
|
||||
fmul v7.2d, v7.2d, v19.2d
|
||||
fadd v0.2d, v0.2d, v2.2d
|
||||
fadd v2.2d, v1.2d, v5.2d
|
||||
stp q0, q2, [x12, #-256]
|
||||
fadd v1.2d, v3.2d, v6.2d
|
||||
ldp q6, q17, [x10, #-192]
|
||||
ldp q18, q19, [x10, #-160]
|
||||
ldp q20, q21, [x11, #-192]
|
||||
ldp q22, q23, [x11, #-160]
|
||||
fmul v6.2d, v6.2d, v20.2d
|
||||
fmul v17.2d, v17.2d, v21.2d
|
||||
fmul v18.2d, v18.2d, v22.2d
|
||||
fadd v3.2d, v4.2d, v7.2d
|
||||
stp q1, q3, [x12, #-224]
|
||||
ldp q4, q5, [x9, #-192]
|
||||
ldp q7, q16, [x9, #-160]
|
||||
fmul v19.2d, v19.2d, v23.2d
|
||||
fadd v4.2d, v4.2d, v6.2d
|
||||
fadd v6.2d, v5.2d, v17.2d
|
||||
stp q4, q6, [x12, #-192]
|
||||
fadd v5.2d, v7.2d, v18.2d
|
||||
ldp q18, q21, [x10, #-128]
|
||||
ldp q22, q23, [x10, #-96]
|
||||
ldp q24, q25, [x11, #-128]
|
||||
ldp q26, q27, [x11, #-96]
|
||||
fmul v18.2d, v18.2d, v24.2d
|
||||
fmul v21.2d, v21.2d, v25.2d
|
||||
fmul v22.2d, v22.2d, v26.2d
|
||||
fadd v7.2d, v16.2d, v19.2d
|
||||
stp q5, q7, [x12, #-160]
|
||||
ldp q16, q17, [x9, #-128]
|
||||
ldp q19, q20, [x9, #-96]
|
||||
fadd v16.2d, v16.2d, v18.2d
|
||||
fadd v18.2d, v17.2d, v21.2d
|
||||
stp q16, q18, [x12, #-128]
|
||||
fadd v17.2d, v19.2d, v22.2d
|
||||
ldp q22, q25, [x10, #-64]
|
||||
ldp q28, q29, [x11, #-64]
|
||||
fmul v23.2d, v23.2d, v27.2d
|
||||
ldp q26, q27, [x10, #-32]
|
||||
fmul v22.2d, v22.2d, v28.2d
|
||||
fmul v25.2d, v25.2d, v29.2d
|
||||
ldp q28, q29, [x11, #-32]
|
||||
fmul v26.2d, v26.2d, v28.2d
|
||||
fmul v27.2d, v27.2d, v29.2d
|
||||
fadd v19.2d, v20.2d, v23.2d
|
||||
stp q17, q19, [x12, #-96]
|
||||
ldp q20, q21, [x9, #-64]
|
||||
ldp q23, q24, [x9, #-32]
|
||||
fadd v20.2d, v20.2d, v22.2d
|
||||
fadd v22.2d, v21.2d, v25.2d
|
||||
stp q20, q22, [x12, #-64]
|
||||
fadd v21.2d, v23.2d, v26.2d
|
||||
fadd v23.2d, v24.2d, v27.2d
|
||||
stp q21, q23, [x12, #-32]
|
||||
ldp q24, q25, [x10]
|
||||
ldp q28, q29, [x11]
|
||||
ldp q26, q27, [x10, #32]
|
||||
fmul v24.2d, v24.2d, v28.2d
|
||||
fmul v25.2d, v25.2d, v29.2d
|
||||
ldp q28, q29, [x11, #32]
|
||||
fmul v26.2d, v26.2d, v28.2d
|
||||
fmul v27.2d, v27.2d, v29.2d
|
||||
ldp q28, q29, [x9]
|
||||
fadd v24.2d, v28.2d, v24.2d
|
||||
fadd v25.2d, v29.2d, v25.2d
|
||||
stp q24, q25, [x12]
|
||||
ldp q28, q29, [x9, #32]
|
||||
fadd v26.2d, v28.2d, v26.2d
|
||||
fadd v27.2d, v29.2d, v27.2d
|
||||
stp q26, q27, [x12, #32]
|
||||
ldp q24, q25, [x10, #64]
|
||||
ldp q28, q29, [x11, #64]
|
||||
ldp q26, q27, [x10, #96]
|
||||
fmul v24.2d, v24.2d, v28.2d
|
||||
fmul v25.2d, v25.2d, v29.2d
|
||||
ldp q28, q29, [x11, #96]
|
||||
fmul v26.2d, v26.2d, v28.2d
|
||||
fmul v27.2d, v27.2d, v29.2d
|
||||
ldp q28, q29, [x9, #64]
|
||||
fadd v24.2d, v28.2d, v24.2d
|
||||
fadd v25.2d, v29.2d, v25.2d
|
||||
stp q24, q25, [x12, #64]
|
||||
ldp q28, q29, [x9, #96]
|
||||
fadd v26.2d, v28.2d, v26.2d
|
||||
fadd v27.2d, v29.2d, v27.2d
|
||||
stp q26, q27, [x12, #96]
|
||||
ldp q24, q25, [x10, #128]
|
||||
ldp q28, q29, [x11, #128]
|
||||
ldp q26, q27, [x10, #160]
|
||||
fmul v24.2d, v24.2d, v28.2d
|
||||
fmul v25.2d, v25.2d, v29.2d
|
||||
ldp q28, q29, [x11, #160]
|
||||
fmul v26.2d, v26.2d, v28.2d
|
||||
fmul v27.2d, v27.2d, v29.2d
|
||||
ldp q28, q29, [x9, #128]
|
||||
fadd v24.2d, v28.2d, v24.2d
|
||||
fadd v25.2d, v29.2d, v25.2d
|
||||
stp q24, q25, [x12, #128]
|
||||
ldp q28, q29, [x9, #160]
|
||||
fadd v26.2d, v28.2d, v26.2d
|
||||
fadd v27.2d, v29.2d, v27.2d
|
||||
stp q26, q27, [x12, #160]
|
||||
ldp q24, q25, [x10, #192]
|
||||
ldp q26, q27, [x11, #192]
|
||||
fmul v24.2d, v24.2d, v26.2d
|
||||
ldp q26, q28, [x10, #224]
|
||||
fmul v25.2d, v25.2d, v27.2d
|
||||
ldp q27, q0, [x11, #224]
|
||||
fmul v2.2d, v26.2d, v27.2d
|
||||
fmul v0.2d, v28.2d, v0.2d
|
||||
ldp q1, q3, [x9, #192]
|
||||
ldp q4, q5, [x9, #224]
|
||||
fadd v1.2d, v1.2d, v24.2d
|
||||
fadd v3.2d, v3.2d, v25.2d
|
||||
stp q1, q3, [x12, #192]
|
||||
fadd v2.2d, v4.2d, v2.2d
|
||||
fadd v0.2d, v5.2d, v0.2d
|
||||
stp q2, q0, [x12, #224]
|
||||
add x8, x8, #64 // =64
|
||||
add x12, x12, #512 // =512
|
||||
add x11, x11, #512 // =512
|
||||
add x10, x10, #512 // =512
|
||||
add x9, x9, #512 // =512
|
||||
adds x13, x13, #8 // =8
|
||||
b.ne .LBB0_29
|
||||
mov x1, #222 // OSACA END
|
||||
.byte 213,3,32,31 // OSACA END
|
||||
// %bb.30: // in Loop: Header=BB0_22 Depth=2
|
||||
cbz x28, .LBB0_33
|
||||
.LBB0_31: // in Loop: Header=BB0_22 Depth=2
|
||||
lsl x11, x8, #3
|
||||
ldp x9, x8, [sp, #64] // 8-byte Folded Reload
|
||||
ldp x12, x10, [sp, #48] // 8-byte Folded Reload
|
||||
add x8, x8, x11
|
||||
add x9, x9, x11
|
||||
add x10, x10, x11
|
||||
add x11, x12, x11
|
||||
ldr x12, [sp, #80] // 8-byte Folded Reload
|
||||
.p2align 6
|
||||
.LBB0_32: // Parent Loop BB0_20 Depth=1
|
||||
// Parent Loop BB0_22 Depth=2
|
||||
// => This Inner Loop Header: Depth=3
|
||||
ldp q4, q5, [x9, #-32]
|
||||
ldp q6, q7, [x9], #64
|
||||
ldp q16, q17, [x11, #-32]
|
||||
ldp q18, q19, [x11], #64
|
||||
fmul v4.2d, v4.2d, v16.2d
|
||||
fmul v5.2d, v5.2d, v17.2d
|
||||
fmul v6.2d, v6.2d, v18.2d
|
||||
fmul v7.2d, v7.2d, v19.2d
|
||||
ldp q0, q1, [x8, #-32]
|
||||
ldp q2, q3, [x8], #64
|
||||
fadd v0.2d, v0.2d, v4.2d
|
||||
fadd v1.2d, v1.2d, v5.2d
|
||||
stp q0, q1, [x10, #-32]
|
||||
fadd v2.2d, v2.2d, v6.2d
|
||||
fadd v3.2d, v3.2d, v7.2d
|
||||
stp q2, q3, [x10]
|
||||
add x10, x10, #64 // =64
|
||||
adds x12, x12, #1 // =1
|
||||
b.ne .LBB0_32
|
||||
.LBB0_33: // in Loop: Header=BB0_22 Depth=2
|
||||
mov x12, x26
|
||||
cmp x26, x24
|
||||
b.eq .LBB0_36
|
||||
.LBB0_34: // in Loop: Header=BB0_22 Depth=2
|
||||
ldr x8, [sp, #88] // 8-byte Folded Reload
|
||||
lsl x11, x12, #3
|
||||
sub x12, x24, x12
|
||||
add x8, x8, x11
|
||||
add x9, x22, x11
|
||||
add x10, x23, x11
|
||||
add x11, x20, x11
|
||||
.p2align 6
|
||||
.LBB0_35: // Parent Loop BB0_20 Depth=1
|
||||
// Parent Loop BB0_22 Depth=2
|
||||
// => This Inner Loop Header: Depth=3
|
||||
ldr d0, [x8], #8
|
||||
ldr d1, [x9], #8
|
||||
ldr d2, [x10], #8
|
||||
fmul d1, d1, d2
|
||||
fadd d0, d0, d1
|
||||
str d0, [x11], #8
|
||||
subs x12, x12, #1 // =1
|
||||
b.ne .LBB0_35
|
||||
.LBB0_36: // in Loop: Header=BB0_22 Depth=2
|
||||
add w21, w21, #1 // =1
|
||||
cmp w21, w25
|
||||
b.ne .LBB0_22
|
||||
.LBB0_37: // in Loop: Header=BB0_20 Depth=1
|
||||
add x0, sp, #104 // =104
|
||||
add x1, sp, #96 // =96
|
||||
bl timing
|
||||
ldr d0, [sp, #104]
|
||||
ldur d1, [x29, #-88]
|
||||
fsub d1, d0, d1
|
||||
lsl w25, w25, #1
|
||||
fcmp d1, d8
|
||||
b.mi .LBB0_20
|
||||
.LBB0_38:
|
||||
scvtf d4, w19
|
||||
lsr w1, w25, #1
|
||||
adrp x8, .LCPI0_1
|
||||
scvtf d6, w1
|
||||
fadd d2, d4, d4
|
||||
ldr d5, [x8, :lo12:.LCPI0_1]
|
||||
adrp x8, .LCPI0_2
|
||||
fmov d0, #8.00000000
|
||||
fmul d2, d2, d6
|
||||
ldr d3, [x8, :lo12:.LCPI0_2]
|
||||
adrp x8, .LCPI0_3
|
||||
adrp x0, .L.str
|
||||
fmul d2, d2, d3
|
||||
ldr d3, [x8, :lo12:.LCPI0_3]
|
||||
adrp x8, .LCPI0_4
|
||||
add x0, x0, :lo12:.L.str
|
||||
fmul d3, d6, d3
|
||||
fmul d0, d4, d0
|
||||
fmul d3, d3, d4
|
||||
fmul d4, d4, d6
|
||||
fdiv d3, d3, d1
|
||||
fdiv d4, d4, d1
|
||||
fdiv d4, d4, d5
|
||||
fdiv d0, d0, d5
|
||||
fdiv d2, d2, d1
|
||||
ldr d7, [x8, :lo12:.LCPI0_4]
|
||||
fmul d3, d3, d7
|
||||
fdiv d4, d4, d5
|
||||
fmul d3, d3, d7
|
||||
mov w2, w19
|
||||
bl printf
|
||||
mov x0, x20
|
||||
bl free
|
||||
ldr x0, [sp, #88] // 8-byte Folded Reload
|
||||
bl free
|
||||
mov x0, x22
|
||||
bl free
|
||||
mov x0, x23
|
||||
bl free
|
||||
ldp x29, x30, [sp, #208] // 16-byte Folded Reload
|
||||
ldp x20, x19, [sp, #192] // 16-byte Folded Reload
|
||||
ldp x22, x21, [sp, #176] // 16-byte Folded Reload
|
||||
ldp x24, x23, [sp, #160] // 16-byte Folded Reload
|
||||
ldp x26, x25, [sp, #144] // 16-byte Folded Reload
|
||||
ldp x28, x27, [sp, #128] // 16-byte Folded Reload
|
||||
ldr d8, [sp, #112] // 8-byte Folded Reload
|
||||
add sp, sp, #224 // =224
|
||||
ret
|
||||
.Lfunc_end0:
|
||||
.size triad, .Lfunc_end0-triad
|
||||
.cfi_endproc
|
||||
// -- End function
|
||||
.globl main // -- Begin function main
|
||||
.p2align 6
|
||||
.type main,@function
|
||||
main: // @main
|
||||
.cfi_startproc
|
||||
// %bb.0:
|
||||
stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
||||
mov x29, sp
|
||||
.cfi_def_cfa w29, 16
|
||||
.cfi_offset w30, -8
|
||||
.cfi_offset w29, -16
|
||||
adrp x0, .Lstr
|
||||
add x0, x0, :lo12:.Lstr
|
||||
bl puts
|
||||
adrp x0, .Lstr.3
|
||||
add x0, x0, :lo12:.Lstr.3
|
||||
bl puts
|
||||
mov w0, #190
|
||||
bl triad
|
||||
mov w0, #247
|
||||
bl triad
|
||||
mov w0, #321
|
||||
bl triad
|
||||
mov w0, #417
|
||||
bl triad
|
||||
mov w0, #542
|
||||
bl triad
|
||||
mov w0, #705
|
||||
bl triad
|
||||
mov w0, #917
|
||||
bl triad
|
||||
mov w0, #1192
|
||||
bl triad
|
||||
mov w0, #1550
|
||||
bl triad
|
||||
mov w0, #2015
|
||||
bl triad
|
||||
mov w0, #2619
|
||||
bl triad
|
||||
mov w0, #3405
|
||||
bl triad
|
||||
mov w0, #4427
|
||||
bl triad
|
||||
mov w0, #5756
|
||||
bl triad
|
||||
mov w0, #7482
|
||||
bl triad
|
||||
mov w0, #9727
|
||||
bl triad
|
||||
mov w0, wzr
|
||||
ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
||||
ret
|
||||
.Lfunc_end1:
|
||||
.size main, .Lfunc_end1-main
|
||||
.cfi_endproc
|
||||
.type .L.str,@object // @.str
|
||||
.section .rodata.str1.1,"aMS",@progbits,1
|
||||
.L.str:
|
||||
.asciz "%12.1f | %9.8f | %9.3f | %7.1f | %7.1f | %7d | %4d \n"
|
||||
.size .L.str, 53
|
||||
.type .Lstr,@object // @str
|
||||
.section .rodata.str1.16,"aMS",@progbits,1
|
||||
.p2align 4
|
||||
.Lstr:
|
||||
.asciz "TRIAD a[i] = b[i]+c[i]*d[i], 32 byte/it, 2 Flop/it"
|
||||
.size .Lstr, 51
|
||||
.type .Lstr.3,@object // @str.3
|
||||
.p2align 4
|
||||
.Lstr.3:
|
||||
.asciz "Size (KByte) | runtime | MFlop/s | MB/s | MLUP/s | repeat | size"
|
||||
.size .Lstr.3, 74
|
||||
.ident "Arm C/C++/Fortran Compiler version 19.0 (build number 69) (based on LLVM 7.0.2)"
|
||||
.section ".note.GNU-stack","",@progbits
|
||||
.addrsig
|
||||
353
tests/test_files/triad-x86-iaca.s
Normal file
353
tests/test_files/triad-x86-iaca.s
Normal file
@@ -0,0 +1,353 @@
|
||||
.file "triad.c"
|
||||
.section .rodata.str1.8,"aMS",@progbits,1
|
||||
.align 8
|
||||
.LC9:
|
||||
.string "%12.1f | %9.8f | %9.3f | %7.1f | %7.1f | %7d | %4d \n"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl triad
|
||||
.type triad, @function
|
||||
triad:
|
||||
.LFB24:
|
||||
.cfi_startproc
|
||||
pushq %r13
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset 13, -16
|
||||
movslq %edi, %rax
|
||||
movl $64, %edi
|
||||
leaq 16(%rsp), %r13
|
||||
.cfi_def_cfa 13, 0
|
||||
andq $-32, %rsp
|
||||
pushq -8(%r13)
|
||||
pushq %rbp
|
||||
.cfi_escape 0x10,0x6,0x2,0x76,0
|
||||
movq %rsp, %rbp
|
||||
pushq %r15
|
||||
.cfi_escape 0x10,0xf,0x2,0x76,0x78
|
||||
leaq 0(,%rax,8), %r15
|
||||
pushq %r14
|
||||
movq %r15, %rsi
|
||||
pushq %r13
|
||||
.cfi_escape 0xf,0x3,0x76,0x68,0x6
|
||||
.cfi_escape 0x10,0xe,0x2,0x76,0x70
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
.cfi_escape 0x10,0xc,0x2,0x76,0x60
|
||||
.cfi_escape 0x10,0x3,0x2,0x76,0x58
|
||||
movq %rax, %rbx
|
||||
subq $72, %rsp
|
||||
call aligned_alloc
|
||||
movq %r15, %rsi
|
||||
movl $64, %edi
|
||||
movq %rax, %r14
|
||||
call aligned_alloc
|
||||
movq %r15, %rsi
|
||||
movl $64, %edi
|
||||
movq %rax, %r12
|
||||
call aligned_alloc
|
||||
movq %r15, %rsi
|
||||
movl $64, %edi
|
||||
movq %rax, %r13
|
||||
call aligned_alloc
|
||||
movq %rax, %r15
|
||||
leal -1(%rbx), %eax
|
||||
movl %eax, -96(%rbp)
|
||||
testl %ebx, %ebx
|
||||
jle .L2
|
||||
cmpl $2, %eax
|
||||
jbe .L14
|
||||
movl %ebx, %esi
|
||||
vmovapd .LC0(%rip), %ymm0
|
||||
xorl %eax, %eax
|
||||
xorl %ecx, %ecx
|
||||
shrl $2, %esi
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L4:
|
||||
addl $1, %ecx
|
||||
vmovapd %ymm0, (%r15,%rax)
|
||||
vmovapd %ymm0, 0(%r13,%rax)
|
||||
vmovapd %ymm0, (%r12,%rax)
|
||||
vmovapd %ymm0, (%r14,%rax)
|
||||
addq $32, %rax
|
||||
cmpl %ecx, %esi
|
||||
ja .L4
|
||||
movl %ebx, %eax
|
||||
andl $-4, %eax
|
||||
cmpl %eax, %ebx
|
||||
je .L26
|
||||
vzeroupper
|
||||
.L3:
|
||||
vmovsd .LC1(%rip), %xmm0
|
||||
movslq %eax, %rcx
|
||||
vmovsd %xmm0, (%r15,%rcx,8)
|
||||
vmovsd %xmm0, 0(%r13,%rcx,8)
|
||||
vmovsd %xmm0, (%r12,%rcx,8)
|
||||
vmovsd %xmm0, (%r14,%rcx,8)
|
||||
leal 1(%rax), %ecx
|
||||
cmpl %ecx, %ebx
|
||||
jle .L2
|
||||
movslq %ecx, %rcx
|
||||
addl $2, %eax
|
||||
vmovsd %xmm0, (%r15,%rcx,8)
|
||||
vmovsd %xmm0, 0(%r13,%rcx,8)
|
||||
vmovsd %xmm0, (%r12,%rcx,8)
|
||||
vmovsd %xmm0, (%r14,%rcx,8)
|
||||
cmpl %eax, %ebx
|
||||
jle .L2
|
||||
cltq
|
||||
vmovsd %xmm0, (%r15,%rax,8)
|
||||
vmovsd %xmm0, 0(%r13,%rax,8)
|
||||
vmovsd %xmm0, (%r12,%rax,8)
|
||||
vmovsd %xmm0, (%r14,%rax,8)
|
||||
.L2:
|
||||
movl %ebx, %eax
|
||||
movl $1, -84(%rbp)
|
||||
movl %ebx, %r10d
|
||||
andl $-4, %eax
|
||||
shrl $2, %r10d
|
||||
movl %eax, -100(%rbp)
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L13:
|
||||
leaq -56(%rbp), %rsi
|
||||
leaq -72(%rbp), %rdi
|
||||
movl %r10d, -88(%rbp)
|
||||
call timing
|
||||
movl -88(%rbp), %r10d
|
||||
xorl %r11d, %r11d
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L12:
|
||||
vmovsd (%r14), %xmm0
|
||||
vxorpd %xmm7, %xmm7, %xmm7
|
||||
vucomisd %xmm7, %xmm0
|
||||
jbe .L6
|
||||
movq %r14, %rdi
|
||||
movl %r11d, -92(%rbp)
|
||||
movl %r10d, -88(%rbp)
|
||||
vzeroupper
|
||||
call dummy
|
||||
movl -92(%rbp), %r11d
|
||||
movl -88(%rbp), %r10d
|
||||
.L6:
|
||||
testl %ebx, %ebx
|
||||
jle .L8
|
||||
cmpl $2, -96(%rbp)
|
||||
jbe .L15
|
||||
xorl %eax, %eax
|
||||
xorl %ecx, %ecx
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
movl $111, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.L10:
|
||||
vmovapd (%r15,%rax), %ymm0
|
||||
vmovapd (%r12,%rax), %ymm3
|
||||
addl $1, %ecx
|
||||
vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0
|
||||
vmovapd %ymm0, (%r14,%rax)
|
||||
addq $32, %rax
|
||||
cmpl %ecx, %r10d
|
||||
ja .L10
|
||||
movl $222, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
movl -100(%rbp), %eax
|
||||
cmpl %ebx, %eax
|
||||
je .L8
|
||||
.L9:
|
||||
movslq %eax, %rcx
|
||||
vmovsd 0(%r13,%rcx,8), %xmm0
|
||||
vmovsd (%r12,%rcx,8), %xmm5
|
||||
vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0
|
||||
vmovsd %xmm0, (%r14,%rcx,8)
|
||||
leal 1(%rax), %ecx
|
||||
cmpl %ebx, %ecx
|
||||
jge .L8
|
||||
movslq %ecx, %rcx
|
||||
addl $2, %eax
|
||||
vmovsd 0(%r13,%rcx,8), %xmm0
|
||||
vmovsd (%r12,%rcx,8), %xmm6
|
||||
vfmadd132sd (%r15,%rcx,8), %xmm6, %xmm0
|
||||
vmovsd %xmm0, (%r14,%rcx,8)
|
||||
cmpl %eax, %ebx
|
||||
jle .L8
|
||||
cltq
|
||||
vmovsd (%r15,%rax,8), %xmm0
|
||||
vmovsd (%r12,%rax,8), %xmm4
|
||||
vfmadd132sd 0(%r13,%rax,8), %xmm4, %xmm0
|
||||
vmovsd %xmm0, (%r14,%rax,8)
|
||||
.L8:
|
||||
addl $1, %r11d
|
||||
cmpl -84(%rbp), %r11d
|
||||
jne .L12
|
||||
leaq -56(%rbp), %rsi
|
||||
leaq -64(%rbp), %rdi
|
||||
movl %r11d, -84(%rbp)
|
||||
movl %r10d, -88(%rbp)
|
||||
vzeroupper
|
||||
call timing
|
||||
vmovsd -64(%rbp), %xmm1
|
||||
vsubsd -72(%rbp), %xmm1, %xmm1
|
||||
vmovsd .LC3(%rip), %xmm2
|
||||
movl -84(%rbp), %r11d
|
||||
movl -88(%rbp), %r10d
|
||||
vucomisd %xmm1, %xmm2
|
||||
leal (%r11,%r11), %eax
|
||||
movl %eax, -84(%rbp)
|
||||
ja .L13
|
||||
movl %eax, %esi
|
||||
vxorpd %xmm6, %xmm6, %xmm6
|
||||
vxorpd %xmm0, %xmm0, %xmm0
|
||||
movl %ebx, %edx
|
||||
sarl %esi
|
||||
vcvtsi2sd %ebx, %xmm0, %xmm0
|
||||
movl $.LC9, %edi
|
||||
movl $5, %eax
|
||||
vcvtsi2sd %esi, %xmm6, %xmm6
|
||||
vmulsd .LC5(%rip), %xmm6, %xmm2
|
||||
vmovsd .LC4(%rip), %xmm5
|
||||
vmovsd .LC6(%rip), %xmm7
|
||||
vmulsd %xmm0, %xmm6, %xmm4
|
||||
vmulsd %xmm0, %xmm2, %xmm2
|
||||
vdivsd %xmm1, %xmm4, %xmm4
|
||||
vdivsd %xmm1, %xmm2, %xmm2
|
||||
vdivsd %xmm5, %xmm4, %xmm4
|
||||
vmulsd %xmm7, %xmm2, %xmm3
|
||||
vaddsd %xmm0, %xmm0, %xmm2
|
||||
vmulsd .LC8(%rip), %xmm0, %xmm0
|
||||
vmulsd %xmm6, %xmm2, %xmm2
|
||||
vmulsd .LC7(%rip), %xmm2, %xmm2
|
||||
vmulsd %xmm7, %xmm3, %xmm3
|
||||
vdivsd %xmm5, %xmm0, %xmm0
|
||||
vdivsd %xmm5, %xmm4, %xmm4
|
||||
vdivsd %xmm1, %xmm2, %xmm2
|
||||
call printf
|
||||
movq %r14, %rdi
|
||||
call free
|
||||
movq %r12, %rdi
|
||||
call free
|
||||
movq %r13, %rdi
|
||||
call free
|
||||
addq $72, %rsp
|
||||
movq %r15, %rdi
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
.cfi_remember_state
|
||||
.cfi_def_cfa 13, 0
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
leaq -16(%r13), %rsp
|
||||
.cfi_def_cfa 7, 16
|
||||
popq %r13
|
||||
.cfi_def_cfa_offset 8
|
||||
jmp free
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L15:
|
||||
.cfi_restore_state
|
||||
xorl %eax, %eax
|
||||
jmp .L9
|
||||
.L26:
|
||||
vzeroupper
|
||||
jmp .L2
|
||||
.L14:
|
||||
xorl %eax, %eax
|
||||
jmp .L3
|
||||
.cfi_endproc
|
||||
.LFE24:
|
||||
.size triad, .-triad
|
||||
.section .rodata.str1.8
|
||||
.align 8
|
||||
.LC10:
|
||||
.string "TRIAD a[i] = b[i]+c[i]*d[i], 32 byte/it, 2 Flop/it"
|
||||
.align 8
|
||||
.LC11:
|
||||
.string "Size (KByte) | runtime | MFlop/s | MB/s | MLUP/s | repeat | size"
|
||||
.section .text.startup,"ax",@progbits
|
||||
.p2align 4,,15
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
.LFB25:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset 3, -16
|
||||
movl $.LC10, %edi
|
||||
movl $20, %ebx
|
||||
call puts
|
||||
movl $.LC11, %edi
|
||||
call puts
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L28:
|
||||
vxorpd %xmm1, %xmm1, %xmm1
|
||||
movq .LC12(%rip), %rax
|
||||
vcvtsi2sd %ebx, %xmm1, %xmm1
|
||||
addl $1, %ebx
|
||||
vmovq %rax, %xmm0
|
||||
call pow
|
||||
vcvttsd2si %xmm0, %edi
|
||||
call triad
|
||||
cmpl $36, %ebx
|
||||
jne .L28
|
||||
xorl %eax, %eax
|
||||
popq %rbx
|
||||
.cfi_def_cfa_offset 8
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LFE25:
|
||||
.size main, .-main
|
||||
.section .rodata.cst32,"aM",@progbits,32
|
||||
.align 32
|
||||
.LC0:
|
||||
.long 1907715710
|
||||
.long 1048610426
|
||||
.long 1907715710
|
||||
.long 1048610426
|
||||
.long 1907715710
|
||||
.long 1048610426
|
||||
.long 1907715710
|
||||
.long 1048610426
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LC1:
|
||||
.long 1907715710
|
||||
.long 1048610426
|
||||
.align 8
|
||||
.LC3:
|
||||
.long 2576980378
|
||||
.long 1070176665
|
||||
.align 8
|
||||
.LC4:
|
||||
.long 0
|
||||
.long 1083129856
|
||||
.align 8
|
||||
.LC5:
|
||||
.long 0
|
||||
.long 1077936128
|
||||
.align 8
|
||||
.LC6:
|
||||
.long 0
|
||||
.long 1062207488
|
||||
.align 8
|
||||
.LC7:
|
||||
.long 2696277389
|
||||
.long 1051772663
|
||||
.align 8
|
||||
.LC8:
|
||||
.long 0
|
||||
.long 1075838976
|
||||
.align 8
|
||||
.LC12:
|
||||
.long 3435973837
|
||||
.long 1073007820
|
||||
.ident "GCC: (GNU) 7.2.0"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
94
tests/test_frontend.py
Executable file
94
tests/test_frontend.py
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for OSACA Frontend
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from osaca.frontend import Frontend
|
||||
from osaca.parser import ParserAArch64v81, ParserX86ATT
|
||||
from osaca.semantics.hw_model import MachineModel
|
||||
from osaca.semantics.kernel_dg import KernelDG
|
||||
from osaca.semantics.semantics_appender import SemanticsAppender
|
||||
|
||||
|
||||
class TestFrontend(unittest.TestCase):
|
||||
MODULE_DATA_DIR = os.path.join(
|
||||
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
# set up parser and kernels
|
||||
self.parser_x86 = ParserX86ATT()
|
||||
self.parser_AArch64 = ParserAArch64v81()
|
||||
with open(self._find_file('kernel-x86.s')) as f:
|
||||
code_x86 = f.read()
|
||||
with open(self._find_file('kernel-AArch64.s')) as f:
|
||||
code_AArch64 = f.read()
|
||||
self.kernel_x86 = self.parser_x86.parse_file(code_x86)
|
||||
self.kernel_AArch64 = self.parser_AArch64.parse_file(code_AArch64)
|
||||
|
||||
# set up machine models
|
||||
self.machine_model_csx = MachineModel(
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
|
||||
)
|
||||
self.machine_model_tx2 = MachineModel(
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
|
||||
)
|
||||
self.semantics_csx = SemanticsAppender(
|
||||
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')
|
||||
)
|
||||
self.semantics_tx2 = SemanticsAppender(
|
||||
self.machine_model_tx2,
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'),
|
||||
)
|
||||
for i in range(len(self.kernel_x86)):
|
||||
self.semantics_csx.assign_src_dst(self.kernel_x86[i])
|
||||
self.semantics_csx.assign_tp_lt(self.kernel_x86[i])
|
||||
for i in range(len(self.kernel_AArch64)):
|
||||
self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i])
|
||||
self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i])
|
||||
|
||||
###########
|
||||
# Tests
|
||||
###########
|
||||
|
||||
def test_frontend_creation(self):
|
||||
with self.assertRaises(ValueError):
|
||||
Frontend()
|
||||
with self.assertRaises(ValueError):
|
||||
Frontend(arch='csx', path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml'))
|
||||
with self.assertRaises(FileNotFoundError):
|
||||
Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'THE_MACHINE.yml'))
|
||||
with self.assertRaises(FileNotFoundError):
|
||||
Frontend(arch='THE_MACHINE')
|
||||
Frontend(arch='zen1')
|
||||
|
||||
def test_frontend_x86(self):
|
||||
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
|
||||
fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml'))
|
||||
fe.print_throughput_analysis(self.kernel_x86, show_cmnts=False)
|
||||
fe.print_latency_analysis(dg.get_critical_path())
|
||||
|
||||
def test_frontend_AArch64(self):
|
||||
dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2)
|
||||
fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml'))
|
||||
fe.print_full_analysis(self.kernel_AArch64, dg, verbose=True)
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
name = os.path.join(testdir, 'test_files', name)
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestFrontend)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
90
tests/test_kerncraftAPI.py
Executable file
90
tests/test_kerncraftAPI.py
Executable file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for OSACA Kerncraft API
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from osaca.api import KerncraftAPI
|
||||
from osaca.parser import ParserAArch64v81, ParserX86ATT
|
||||
|
||||
|
||||
class TestKerncraftAPI(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
# set up parser and kernels
|
||||
self.parser_x86 = ParserX86ATT()
|
||||
self.parser_AArch64 = ParserAArch64v81()
|
||||
with open(self._find_file('triad-x86-iaca.s')) as f:
|
||||
self.code_x86 = f.read()
|
||||
with open(self._find_file('triad-arm-iaca.s')) as f:
|
||||
self.code_AArch64 = f.read()
|
||||
|
||||
###########
|
||||
# Tests
|
||||
###########
|
||||
|
||||
def test_kerncraft_API_x86(self):
|
||||
kapi = KerncraftAPI('csx', self.code_x86)
|
||||
|
||||
kapi.create_output()
|
||||
self.assertEqual(kapi.get_unmatched_instruction_ratio(), 0.0)
|
||||
port_occupation = OrderedDict(
|
||||
[
|
||||
('0', 1.25),
|
||||
('0DV', 0.0),
|
||||
('1', 1.25),
|
||||
('2', 2.0),
|
||||
('2D', 1.5),
|
||||
('3', 2.0),
|
||||
('3D', 1.5),
|
||||
('4', 1.0),
|
||||
('5', 0.75),
|
||||
('6', 0.75),
|
||||
('7', 0.0),
|
||||
]
|
||||
)
|
||||
self.assertEqual(kapi.get_port_occupation_cycles(), port_occupation)
|
||||
self.assertEqual(kapi.get_total_throughput(), 2.0)
|
||||
self.assertEqual(kapi.get_latency(), (1.0, 13.0))
|
||||
|
||||
def test_kerncraft_API_AArch64(self):
|
||||
kapi = KerncraftAPI('tx2', self.code_AArch64)
|
||||
|
||||
kapi.create_output()
|
||||
self.assertEqual(kapi.get_unmatched_instruction_ratio(), 0.0)
|
||||
port_occupation = OrderedDict(
|
||||
[
|
||||
('0', 34.0),
|
||||
('0DV', 0.0),
|
||||
('1', 34.0),
|
||||
('1DV', 0.0),
|
||||
('2', 2.0),
|
||||
('3', 64.0),
|
||||
('4', 64.0),
|
||||
('5', 32.0),
|
||||
]
|
||||
)
|
||||
self.assertEqual(kapi.get_port_occupation_cycles(), port_occupation)
|
||||
self.assertEqual(kapi.get_total_throughput(), 64.0)
|
||||
# TODO add missing latency values
|
||||
# self.assertEqual(kapi.get_latency(kernel), 20.0)
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
name = os.path.join(testdir, 'test_files', name)
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestKerncraftAPI)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
308
tests/test_marker_utils.py
Executable file
308
tests/test_marker_utils.py
Executable file
@@ -0,0 +1,308 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for IACA/OSACA marker utilities
|
||||
"""
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from osaca.semantics import reduce_to_section
|
||||
from osaca.parser import ParserAArch64v81, ParserX86ATT
|
||||
|
||||
|
||||
class TestMarkerUtils(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
self.parser_AArch = ParserAArch64v81()
|
||||
self.parser_x86 = ParserX86ATT()
|
||||
with open(self._find_file('triad-arm-iaca.s')) as f:
|
||||
triad_code_arm = f.read()
|
||||
with open(self._find_file('triad-x86-iaca.s')) as f:
|
||||
triad_code_x86 = f.read()
|
||||
self.parsed_AArch = self.parser_AArch.parse_file(triad_code_arm)
|
||||
self.parsed_x86 = self.parser_x86.parse_file(triad_code_x86)
|
||||
|
||||
#################
|
||||
# Test
|
||||
#################
|
||||
|
||||
def test_marker_detection_AArch64(self):
|
||||
kernel = reduce_to_section(self.parsed_AArch, 'AArch64')
|
||||
self.assertEquals(len(kernel), 138)
|
||||
self.assertEquals(kernel[0].line_number, 307)
|
||||
self.assertEquals(kernel[-1].line_number, 444)
|
||||
|
||||
def test_marker_detection_x86(self):
|
||||
kernel = reduce_to_section(self.parsed_x86, 'x86')
|
||||
self.assertEquals(len(kernel), 9)
|
||||
self.assertEquals(kernel[0].line_number, 146)
|
||||
self.assertEquals(kernel[-1].line_number, 154)
|
||||
|
||||
def test_marker_matching_AArch64(self):
|
||||
# preparation
|
||||
bytes_1_line = '.byte 213,3,32,31\n'
|
||||
bytes_2_lines_1 = '.byte 213,3,32\n' + '.byte 31\n'
|
||||
bytes_2_lines_2 = '.byte 213,3\n' + '.byte 32,31\n'
|
||||
bytes_2_lines_3 = '.byte 213\n' + '.byte 3,32,31\n'
|
||||
bytes_3_lines_1 = '.byte 213,3\n' + '.byte 32\n' + '.byte 31\n'
|
||||
bytes_3_lines_2 = '.byte 213\n' + '.byte 3,32\n' + '.byte 31\n'
|
||||
bytes_3_lines_3 = '.byte 213\n' + '.byte 3\n' + '.byte 32,31\n'
|
||||
bytes_4_lines = '.byte 213\n' + '.byte 3\n' + '.byte 32\n' + '.byte 31\n'
|
||||
mov_start_1 = 'mov x1, #111\n'
|
||||
mov_start_2 = 'mov x1, 111 // should work as well\n'
|
||||
mov_end_1 = 'mov x1, #222 // preferred way\n'
|
||||
mov_end_2 = 'mov x1, 222\n'
|
||||
prologue = (
|
||||
'mov x12, xzr\n'
|
||||
+ '\tldp x9, x10, [sp, #16] // 8-byte Folded Reload\n'
|
||||
+ ' .p2align 6\n'
|
||||
)
|
||||
kernel = (
|
||||
'.LBB0_28:\n'
|
||||
+ 'fmul v7.2d, v7.2d, v19.2d\n'
|
||||
+ 'stp q0, q1, [x10, #-32]\n'
|
||||
+ 'b.ne .LBB0_28\n'
|
||||
)
|
||||
epilogue = '.LBB0_29: // Parent Loop BB0_20 Depth=1\n' + 'bl dummy\n'
|
||||
kernel_length = len(list(filter(None, kernel.split('\n'))))
|
||||
|
||||
bytes_variations = [
|
||||
bytes_1_line,
|
||||
bytes_2_lines_1,
|
||||
bytes_2_lines_2,
|
||||
bytes_2_lines_3,
|
||||
bytes_3_lines_1,
|
||||
bytes_3_lines_2,
|
||||
bytes_3_lines_3,
|
||||
bytes_4_lines,
|
||||
]
|
||||
mov_start_variations = [mov_start_1, mov_start_2]
|
||||
mov_end_variations = [mov_end_1, mov_end_2]
|
||||
# actual tests
|
||||
for mov_start_var in mov_start_variations:
|
||||
for bytes_var_1 in bytes_variations:
|
||||
for mov_end_var in mov_end_variations:
|
||||
for bytes_var_2 in bytes_variations:
|
||||
sample_code = (
|
||||
prologue
|
||||
+ mov_start_var
|
||||
+ bytes_var_1
|
||||
+ kernel
|
||||
+ mov_end_var
|
||||
+ bytes_var_2
|
||||
+ epilogue
|
||||
)
|
||||
with self.subTest(
|
||||
mov_start=mov_start_var,
|
||||
bytes_start=bytes_var_1,
|
||||
mov_end=mov_end_var,
|
||||
bytes_end=bytes_var_2,
|
||||
):
|
||||
sample_parsed = self.parser_AArch.parse_file(sample_code)
|
||||
sample_kernel = reduce_to_section(sample_parsed, 'AArch64')
|
||||
self.assertEquals(len(sample_kernel), kernel_length)
|
||||
kernel_start = len(
|
||||
list(
|
||||
filter(
|
||||
None, (prologue + mov_start_var + bytes_var_1).split('\n')
|
||||
)
|
||||
)
|
||||
)
|
||||
parsed_kernel = self.parser_AArch.parse_file(
|
||||
kernel, start_line=kernel_start
|
||||
)
|
||||
self.assertEquals(sample_kernel, parsed_kernel)
|
||||
|
||||
def test_marker_matching_x86(self):
|
||||
# preparation
|
||||
bytes_1_line = '.byte 100,103,144\n'
|
||||
bytes_2_lines_1 = '.byte 100,103\n' + '.byte 144\n'
|
||||
bytes_2_lines_2 = '.byte 100\n' + '.byte 103,144\n'
|
||||
bytes_3_lines = (
|
||||
'.byte 100 # IACA MARKER UTILITY\n'
|
||||
+ '.byte 103 # IACA MARKER UTILITY\n'
|
||||
+ '.byte 144 # IACA MARKER UTILITY\n'
|
||||
)
|
||||
mov_start_1 = 'movl $111, %ebx # IACA START\n'
|
||||
mov_start_2 = 'mov $111, %ebx # IACA START\n'
|
||||
mov_end_1 = 'movl $222, %ebx # IACA END\n'
|
||||
mov_end_2 = 'mov $222, %ebx # IACA END\n'
|
||||
prologue = 'movl -92(%rbp), %r11d\n' + 'movl $111, %ebx\n'
|
||||
kernel = (
|
||||
'vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0\n'
|
||||
+ 'vmovsd %xmm0, (%r14,%rcx,8)\n'
|
||||
+ 'cmpl %ebx, %ecx\n'
|
||||
+ 'jge .L8\n'
|
||||
)
|
||||
epilogue = '.LE9:\t\t#12.2\n' 'call dummy\n'
|
||||
kernel_length = len(list(filter(None, kernel.split('\n'))))
|
||||
|
||||
bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines]
|
||||
mov_start_variations = [mov_start_1, mov_start_2]
|
||||
mov_end_variations = [mov_end_1, mov_end_2]
|
||||
# actual tests
|
||||
for mov_start_var in mov_start_variations:
|
||||
for bytes_var_1 in bytes_variations:
|
||||
for mov_end_var in mov_end_variations:
|
||||
for bytes_var_2 in bytes_variations:
|
||||
sample_code = (
|
||||
prologue
|
||||
+ mov_start_var
|
||||
+ bytes_var_1
|
||||
+ kernel
|
||||
+ mov_end_var
|
||||
+ bytes_var_2
|
||||
+ epilogue
|
||||
)
|
||||
with self.subTest(
|
||||
mov_start=mov_start_var,
|
||||
bytes_start=bytes_var_1,
|
||||
mov_end=mov_end_var,
|
||||
bytes_end=bytes_var_2,
|
||||
):
|
||||
sample_parsed = self.parser_x86.parse_file(sample_code)
|
||||
sample_kernel = reduce_to_section(sample_parsed, 'x86')
|
||||
self.assertEquals(len(sample_kernel), kernel_length)
|
||||
kernel_start = len(
|
||||
list(
|
||||
filter(
|
||||
None, (prologue + mov_start_var + bytes_var_1).split('\n')
|
||||
)
|
||||
)
|
||||
)
|
||||
parsed_kernel = self.parser_x86.parse_file(
|
||||
kernel, start_line=kernel_start
|
||||
)
|
||||
self.assertEquals(sample_kernel, parsed_kernel)
|
||||
|
||||
def test_marker_special_cases_AArch(self):
|
||||
bytes_line = '.byte 213,3,32,31\n'
|
||||
mov_start = 'mov x1, #111\n'
|
||||
mov_end = 'mov x1, #222\n'
|
||||
prologue = 'dup v0.2d, x14\n' + ' neg x9, x9\n' + ' .p2align 6\n'
|
||||
kernel = (
|
||||
'.LBB0_28:\n'
|
||||
+ 'fmul v7.2d, v7.2d, v19.2d\n'
|
||||
+ 'stp q0, q1, [x10, #-32]\n'
|
||||
+ 'b.ne .LBB0_28\n'
|
||||
)
|
||||
epilogue = '.LBB0_29: // Parent Loop BB0_20 Depth=1\n' + 'bl dummy\n'
|
||||
kernel_length = len(list(filter(None, kernel.split('\n'))))
|
||||
|
||||
# marker directly at the beginning
|
||||
code_beginning = mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
|
||||
beginning_parsed = self.parser_AArch.parse_file(code_beginning)
|
||||
test_kernel = reduce_to_section(beginning_parsed, 'AArch64')
|
||||
self.assertEquals(len(test_kernel), kernel_length)
|
||||
kernel_start = len(list(filter(None, (mov_start + bytes_line).split('\n'))))
|
||||
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
|
||||
self.assertEquals(test_kernel, parsed_kernel)
|
||||
|
||||
# marker at the end
|
||||
code_end = prologue + mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
|
||||
end_parsed = self.parser_AArch.parse_file(code_end)
|
||||
test_kernel = reduce_to_section(end_parsed, 'AArch64')
|
||||
self.assertEquals(len(test_kernel), kernel_length)
|
||||
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
|
||||
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
|
||||
self.assertEquals(test_kernel, parsed_kernel)
|
||||
|
||||
# no kernel
|
||||
code_empty = prologue + mov_start + bytes_line + mov_end + bytes_line + epilogue
|
||||
empty_parsed = self.parser_AArch.parse_file(code_empty)
|
||||
test_kernel = reduce_to_section(empty_parsed, 'AArch64')
|
||||
self.assertEquals(len(test_kernel), 0)
|
||||
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
|
||||
self.assertEquals(test_kernel, [])
|
||||
|
||||
# no start marker
|
||||
code_no_start = prologue + bytes_line + kernel + mov_end + bytes_line + epilogue
|
||||
no_start_parsed = self.parser_AArch.parse_file(code_no_start)
|
||||
with self.assertRaises(LookupError):
|
||||
reduce_to_section(no_start_parsed, 'AArch64')
|
||||
|
||||
# no end marker
|
||||
code_no_end = prologue + mov_start + bytes_line + kernel + mov_end + epilogue
|
||||
no_end_parsed = self.parser_AArch.parse_file(code_no_end)
|
||||
with self.assertRaises(LookupError):
|
||||
reduce_to_section(no_end_parsed, 'AArch64')
|
||||
|
||||
# no marker at all
|
||||
code_no_marker = prologue + kernel + epilogue
|
||||
no_marker_parsed = self.parser_AArch.parse_file(code_no_marker)
|
||||
with self.assertRaises(LookupError):
|
||||
reduce_to_section(no_marker_parsed, 'AArch64')
|
||||
|
||||
def test_marker_special_cases_x86(self):
|
||||
bytes_line = '.byte 100\n.byte 103\n.byte 144\n'
|
||||
mov_start = 'movl $111, %ebx\n'
|
||||
mov_end = 'movl $222, %ebx\n'
|
||||
prologue = 'movl -88(%rbp), %r10d\n' + 'xorl %r11d, %r11d\n' + '.p2align 4,,10\n'
|
||||
kernel = (
|
||||
'.L3: #L3\n'
|
||||
+ 'vmovsd .LC1(%rip), %xmm0\n'
|
||||
+ 'vmovsd %xmm0, (%r15,%rcx,8)\n'
|
||||
+ 'cmpl %ecx, %ebx\n'
|
||||
+ 'jle .L3\n'
|
||||
)
|
||||
epilogue = 'leaq -56(%rbp), %rsi\n' + 'movl %r10d, -88(%rbp)\n' + 'call timing\n'
|
||||
kernel_length = len(list(filter(None, kernel.split('\n'))))
|
||||
|
||||
# marker directly at the beginning
|
||||
code_beginning = mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
|
||||
beginning_parsed = self.parser_x86.parse_file(code_beginning)
|
||||
test_kernel = reduce_to_section(beginning_parsed, 'x86')
|
||||
self.assertEquals(len(test_kernel), kernel_length)
|
||||
kernel_start = len(list(filter(None, (mov_start + bytes_line).split('\n'))))
|
||||
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
|
||||
self.assertEquals(test_kernel, parsed_kernel)
|
||||
|
||||
# marker at the end
|
||||
code_end = prologue + mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
|
||||
end_parsed = self.parser_x86.parse_file(code_end)
|
||||
test_kernel = reduce_to_section(end_parsed, 'x86')
|
||||
self.assertEquals(len(test_kernel), kernel_length)
|
||||
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
|
||||
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
|
||||
self.assertEquals(test_kernel, parsed_kernel)
|
||||
|
||||
# no kernel
|
||||
code_empty = prologue + mov_start + bytes_line + mov_end + bytes_line + epilogue
|
||||
empty_parsed = self.parser_x86.parse_file(code_empty)
|
||||
test_kernel = reduce_to_section(empty_parsed, 'x86')
|
||||
self.assertEquals(len(test_kernel), 0)
|
||||
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
|
||||
self.assertEquals(test_kernel, [])
|
||||
|
||||
# no start marker
|
||||
code_no_start = prologue + bytes_line + kernel + mov_end + bytes_line + epilogue
|
||||
no_start_parsed = self.parser_x86.parse_file(code_no_start)
|
||||
with self.assertRaises(LookupError):
|
||||
reduce_to_section(no_start_parsed, 'x86')
|
||||
|
||||
# no end marker
|
||||
code_no_end = prologue + mov_start + bytes_line + kernel + mov_end + epilogue
|
||||
no_end_parsed = self.parser_x86.parse_file(code_no_end)
|
||||
with self.assertRaises(LookupError):
|
||||
reduce_to_section(no_end_parsed, 'x86')
|
||||
|
||||
# no marker at all
|
||||
code_no_marker = prologue + kernel + epilogue
|
||||
no_marker_parsed = self.parser_x86.parse_file(code_no_marker)
|
||||
with self.assertRaises(LookupError):
|
||||
reduce_to_section(no_marker_parsed, 'x86')
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
name = os.path.join(testdir, 'test_files', name)
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestMarkerUtils)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
@@ -1,69 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
from io import StringIO
|
||||
import os
|
||||
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, '..')
|
||||
from osaca import osaca
|
||||
|
||||
|
||||
class TestOsaca(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
def setUp(self):
|
||||
self.curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
|
||||
|
||||
@unittest.skip("Binary analysis is error prone and currently not working with FSF's objdump")
|
||||
def testIACABinary(self):
|
||||
assembly = osaca.get_assembly_from_binary(self.curr_dir + '/testfiles/taxCalc-ivb-iaca')
|
||||
osa = osaca.OSACA('IVB', assembly)
|
||||
result = osa.generate_text_output()
|
||||
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
|
||||
with open(self.curr_dir + '/test_osaca_iaca.out', encoding='utf-8') as f:
|
||||
assertion = f.read()
|
||||
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
|
||||
|
||||
# Test ASM file with IACA marker in two lines
|
||||
def testIACAasm1(self):
|
||||
with open(self.curr_dir + '/testfiles/taxCalc-ivb-iaca.S') as f:
|
||||
osa = osaca.OSACA('IVB', f.read())
|
||||
result = osa.generate_text_output()
|
||||
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
|
||||
with open(self.curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
|
||||
assertion = f.read()
|
||||
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
|
||||
|
||||
# Test ASM file with IACA marker in four lines
|
||||
def testIACAasm2(self):
|
||||
with open(self.curr_dir + '/testfiles/taxCalc-ivb-iaca2.S') as f:
|
||||
osa = osaca.OSACA('IVB', f.read())
|
||||
result = osa.generate_text_output()
|
||||
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
|
||||
with open(self.curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
|
||||
assertion = f.read()
|
||||
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
|
||||
|
||||
#@unittest.skip("Skip until required instructions are supported.")
|
||||
def test_asm_API(self):
|
||||
with open(self.curr_dir + '/testfiles/3d-7pt.icc.skx.avx512.iaca_marked.s') as f:
|
||||
osa = osaca.OSACA('SKX', f.read())
|
||||
|
||||
text_output = osa.create_output()
|
||||
print(text_output)
|
||||
# Derived from IACA (and manually considering OSACAs equal distribution to ports)
|
||||
self.assertEqual(dict(osa.get_port_occupation_cycles()),
|
||||
{'0': 4.0,
|
||||
'0DV': 0.0,
|
||||
'1': 3.5,
|
||||
'2': 3.5,
|
||||
'3': 3.5,
|
||||
'4': 1.0,
|
||||
'5': 4.5,
|
||||
'6': 3.5,
|
||||
'7': 0.0})
|
||||
# TODO consider frontend bottleneck -> 6.25 cy
|
||||
self.assertEqual(osa.get_total_throughput(),
|
||||
4.5)
|
||||
@@ -1,26 +0,0 @@
|
||||
Port Binding in Cycles Per Iteration:
|
||||
-------------------------------------------------
|
||||
| Port | 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------------
|
||||
| Cycles | 3.67 | 5.67 | 1.0 | 1.0 | 2.0 | 3.67 |
|
||||
-------------------------------------------------
|
||||
|
||||
|
||||
Ports Pressure in cycles
|
||||
| 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------
|
||||
| 0.50 | 0.50 | | | | | lea 1(%rax,%rax),%edx
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %edx,%xmm2,%xmm2
|
||||
| 1.00 | | | | | | vmulss %xmm2,%xmm0,%xmm3
|
||||
| 0.50 | 0.50 | | | | | lea 2(%rax,%rax),%ecx
|
||||
| | 1.00 | | | | | vaddss %xmm3,%xmm1,%xmm4
|
||||
| | | | | | 1.00 | vxorps %xmm1,%xmm1,%xmm1
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %ecx,%xmm1,%xmm1
|
||||
| 1.00 | | | | | | vmulss %xmm1,%xmm0,%xmm5
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm4,4(%rsp,%rax,8)
|
||||
| | 1.00 | | | | | vaddss %xmm5,%xmm4,%xmm1
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm1,8(%rsp,%rax,8)
|
||||
| 0.33 | 0.33 | | | | 0.33 | inc %rax
|
||||
| 0.33 | 0.33 | | | | 0.33 | cmp $499,%rax
|
||||
| | | | | | | X jb main_98
|
||||
Total number of estimated throughput: 5.67
|
||||
@@ -1,26 +0,0 @@
|
||||
Port Binding in Cycles Per Iteration:
|
||||
-------------------------------------------------
|
||||
| Port | 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------------
|
||||
| Cycles | 3.67 | 5.67 | 1.0 | 1.0 | 2.0 | 3.67 |
|
||||
-------------------------------------------------
|
||||
|
||||
|
||||
Ports Pressure in cycles
|
||||
| 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------
|
||||
| 0.50 | 0.50 | | | | | lea 1(%rax,%rax), %edx
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %edx, %xmm2, %xmm2
|
||||
| 1.00 | | | | | | vmulss %xmm2, %xmm0, %xmm3
|
||||
| 0.50 | 0.50 | | | | | lea 2(%rax,%rax), %ecx
|
||||
| | 1.00 | | | | | vaddss %xmm3, %xmm1, %xmm4
|
||||
| | | | | | 1.00 | vxorps %xmm1, %xmm1, %xmm1
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %ecx, %xmm1, %xmm1
|
||||
| 1.00 | | | | | | vmulss %xmm1, %xmm0, %xmm5
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm4, 4(%rsp,%rax,8)
|
||||
| | 1.00 | | | | | vaddss %xmm5, %xmm4, %xmm1
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm1, 8(%rsp,%rax,8)
|
||||
| 0.33 | 0.33 | | | | 0.33 | incq %rax
|
||||
| 0.33 | 0.33 | | | | 0.33 | cmpq $499, %rax
|
||||
| | | | | | | jb ..B1.4
|
||||
Total number of estimated throughput: 5.67
|
||||
413
tests/test_parser_AArch64v81.py
Executable file
413
tests/test_parser_AArch64v81.py
Executable file
@@ -0,0 +1,413 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for ARMv8 AArch64 assembly parser
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from pyparsing import ParseException
|
||||
|
||||
from osaca.parser import AttrDict, ParserAArch64v81
|
||||
|
||||
|
||||
class TestParserAArch64v81(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
self.parser = ParserAArch64v81()
|
||||
with open(self._find_file('triad-arm-iaca.s')) as f:
|
||||
self.triad_code = f.read()
|
||||
|
||||
##################
|
||||
# Test
|
||||
##################
|
||||
|
||||
def test_comment_parser(self):
|
||||
self.assertEqual(self._get_comment(self.parser, '// some comments'), 'some comments')
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, '\t\t//AA BB CC \t end \t'), 'AA BB CC end'
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, '\t//// comment //// comment'),
|
||||
'// comment //// comment',
|
||||
)
|
||||
|
||||
def test_label_parser(self):
|
||||
self.assertEqual(self._get_label(self.parser, 'main:').name, 'main')
|
||||
self.assertEqual(self._get_label(self.parser, '..B1.10:').name, '..B1.10')
|
||||
self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:').name, '.2.3_2_pack.3')
|
||||
self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t//label1').name, '.L1')
|
||||
self.assertEqual(
|
||||
' '.join(self._get_label(self.parser, '.L1:\t\t\t//label1').comment), 'label1'
|
||||
)
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_label(self.parser, '\t.cfi_startproc')
|
||||
|
||||
def test_directive_parser(self):
|
||||
self.assertEqual(self._get_directive(self.parser, '\t.text').name, 'text')
|
||||
self.assertEqual(len(self._get_directive(self.parser, '\t.text').parameters), 0)
|
||||
self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90').name, 'align')
|
||||
self.assertEqual(len(self._get_directive(self.parser, '\t.align\t16,0x90').parameters), 2)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, '\t.align\t16,0x90').parameters[1], '0x90'
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, ' .byte 100,103,144 //IACA START')[
|
||||
'name'
|
||||
],
|
||||
'byte',
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, ' .byte 100,103,144 //IACA START')[
|
||||
'parameters'
|
||||
][2],
|
||||
'144',
|
||||
)
|
||||
self.assertEqual(
|
||||
' '.join(
|
||||
self._get_directive(self.parser, ' .byte 100,103,144 //IACA START')[
|
||||
'comment'
|
||||
]
|
||||
),
|
||||
'IACA START',
|
||||
)
|
||||
|
||||
def test_parse_instruction(self):
|
||||
instr1 = '\t\tvcvt.F32.S32 w1, w2\t\t\t//12.27'
|
||||
instr2 = 'b.lo ..B1.4 \t'
|
||||
instr3 = ' mov x2,#0x222 //NOT IACA END'
|
||||
instr4 = 'str x28, [sp, x1, lsl #4] //12.9'
|
||||
instr5 = 'ldr x0, [x0, #:got_lo12:q2c]'
|
||||
instr6 = 'adrp x0, :got:visited'
|
||||
instr7 = 'fadd v17.2d, v16.2d, v1.2d'
|
||||
|
||||
parsed_1 = self.parser.parse_instruction(instr1)
|
||||
parsed_2 = self.parser.parse_instruction(instr2)
|
||||
parsed_3 = self.parser.parse_instruction(instr3)
|
||||
parsed_4 = self.parser.parse_instruction(instr4)
|
||||
parsed_5 = self.parser.parse_instruction(instr5)
|
||||
parsed_6 = self.parser.parse_instruction(instr6)
|
||||
parsed_7 = self.parser.parse_instruction(instr7)
|
||||
|
||||
self.assertEqual(parsed_1.instruction, 'vcvt.F32.S32')
|
||||
self.assertEqual(parsed_1.operands[0].register.name, '1')
|
||||
self.assertEqual(parsed_1.operands[0].register.prefix, 'w')
|
||||
self.assertEqual(parsed_1.operands[1].register.name, '2')
|
||||
self.assertEqual(parsed_1.operands[1].register.prefix, 'w')
|
||||
self.assertEqual(parsed_1.comment, '12.27')
|
||||
|
||||
self.assertEqual(parsed_2.instruction, 'b.lo')
|
||||
self.assertEqual(parsed_2.operands[0].identifier.name, '..B1.4')
|
||||
self.assertEqual(len(parsed_2.operands), 1)
|
||||
self.assertIsNone(parsed_2.comment)
|
||||
|
||||
self.assertEqual(parsed_3.instruction, 'mov')
|
||||
self.assertEqual(parsed_3.operands[0].register.name, '2')
|
||||
self.assertEqual(parsed_3.operands[0].register.prefix, 'x')
|
||||
self.assertEqual(parsed_3.operands[1].immediate.value, '0x222')
|
||||
self.assertEqual(parsed_3.comment, 'NOT IACA END')
|
||||
|
||||
self.assertEqual(parsed_4.instruction, 'str')
|
||||
self.assertIsNone(parsed_4.operands[1].memory.offset)
|
||||
self.assertEqual(parsed_4.operands[1].memory.base.name, 'sp')
|
||||
self.assertEqual(parsed_4.operands[1].memory.base.prefix, 'x')
|
||||
self.assertEqual(parsed_4.operands[1].memory.index.name, '1')
|
||||
self.assertEqual(parsed_4.operands[1].memory.index.prefix, 'x')
|
||||
self.assertEqual(parsed_4.operands[1].memory.scale, 16)
|
||||
self.assertEqual(parsed_4.operands[0].register.name, '28')
|
||||
self.assertEqual(parsed_4.operands[0].register.prefix, 'x')
|
||||
self.assertEqual(parsed_4.comment, '12.9')
|
||||
|
||||
self.assertEqual(parsed_5.instruction, 'ldr')
|
||||
self.assertEqual(parsed_5.operands[0].register.name, '0')
|
||||
self.assertEqual(parsed_5.operands[0].register.prefix, 'x')
|
||||
self.assertEqual(parsed_5.operands[1].memory.offset.identifier.name, 'q2c')
|
||||
self.assertEqual(parsed_5.operands[1].memory.offset.identifier.relocation, ':got_lo12:')
|
||||
self.assertEqual(parsed_5.operands[1].memory.base.name, '0')
|
||||
self.assertEqual(parsed_5.operands[1].memory.base.prefix, 'x')
|
||||
self.assertIsNone(parsed_5.operands[1].memory.index)
|
||||
self.assertEqual(parsed_5.operands[1].memory.scale, 1)
|
||||
|
||||
self.assertEqual(parsed_6.instruction, 'adrp')
|
||||
self.assertEqual(parsed_6.operands[0].register.name, '0')
|
||||
self.assertEqual(parsed_6.operands[0].register.prefix, 'x')
|
||||
self.assertEqual(parsed_6.operands[1].identifier.relocation, ':got:')
|
||||
self.assertEqual(parsed_6.operands[1].identifier.name, 'visited')
|
||||
|
||||
self.assertEqual(parsed_7.instruction, 'fadd')
|
||||
self.assertEqual(parsed_7.operands[0].register.name, '17')
|
||||
self.assertEqual(parsed_7.operands[0].register.prefix, 'v')
|
||||
self.assertEqual(parsed_7.operands[0].register.lanes, '2')
|
||||
self.assertEqual(parsed_7.operands[0].register.shape, 'd')
|
||||
self.assertEqual(self.parser.get_full_reg_name(parsed_7.operands[2].register), 'v1.2d')
|
||||
|
||||
def test_parse_line(self):
|
||||
line_comment = '// -- Begin main'
|
||||
line_label = '.LBB0_1: // =>This Inner Loop Header: Depth=1'
|
||||
line_directive = '\t.cfi_def_cfa w29, -16'
|
||||
line_instruction = '\tldr s0, [x11, w10, sxtw #2]\t\t// = <<2'
|
||||
line_prefetch = 'prfm pldl1keep, [x26, #2048] //HPL'
|
||||
line_preindexed = 'stp x29, x30, [sp, #-16]!'
|
||||
line_postindexed = 'ldp q2, q3, [x11], #64'
|
||||
|
||||
instruction_form_1 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': None,
|
||||
'comment': '-- Begin main',
|
||||
'label': None,
|
||||
'line': '// -- Begin main',
|
||||
'line_number': 1,
|
||||
}
|
||||
|
||||
instruction_form_2 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': None,
|
||||
'comment': '=>This Inner Loop Header: Depth=1',
|
||||
'label': '.LBB0_1',
|
||||
'line': '.LBB0_1: // =>This Inner Loop Header: Depth=1',
|
||||
'line_number': 2,
|
||||
}
|
||||
instruction_form_3 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': {'name': 'cfi_def_cfa', 'parameters': ['w29', '-16']},
|
||||
'comment': None,
|
||||
'label': None,
|
||||
'line': '.cfi_def_cfa w29, -16',
|
||||
'line_number': 3,
|
||||
}
|
||||
instruction_form_4 = {
|
||||
'instruction': 'ldr',
|
||||
'operands': [
|
||||
{'register': {'prefix': 's', 'name': '0'}},
|
||||
{
|
||||
'memory': {
|
||||
'offset': None,
|
||||
'base': {'prefix': 'x', 'name': '11'},
|
||||
'index': {
|
||||
'prefix': 'w',
|
||||
'name': '10',
|
||||
'shift_op': 'sxtw',
|
||||
'shift': {'value': '2'},
|
||||
},
|
||||
'scale': 4,
|
||||
}
|
||||
},
|
||||
],
|
||||
'directive': None,
|
||||
'comment': '= <<2',
|
||||
'label': None,
|
||||
'line': 'ldr s0, [x11, w10, sxtw #2]\t\t// = <<2',
|
||||
'line_number': 4,
|
||||
}
|
||||
instruction_form_5 = {
|
||||
'instruction': 'prfm',
|
||||
'operands': [
|
||||
{'prfop': {'type': ['PLD'], 'target': ['L1'], 'policy': ['KEEP']}},
|
||||
{
|
||||
'memory': {
|
||||
'offset': {'value': '2048'},
|
||||
'base': {'prefix': 'x', 'name': '26'},
|
||||
'index': None,
|
||||
'scale': 1,
|
||||
}
|
||||
},
|
||||
],
|
||||
'directive': None,
|
||||
'comment': 'HPL',
|
||||
'label': None,
|
||||
'line': 'prfm pldl1keep, [x26, #2048] //HPL',
|
||||
'line_number': 5,
|
||||
}
|
||||
instruction_form_6 = {
|
||||
'instruction': 'stp',
|
||||
'operands': [
|
||||
{'register': {'prefix': 'x', 'name': '29'}},
|
||||
{'register': {'prefix': 'x', 'name': '30'}},
|
||||
{
|
||||
'memory': {
|
||||
'offset': {'value': '-16'},
|
||||
'base': {'name': 'sp', 'prefix': 'x'},
|
||||
'index': None,
|
||||
'scale': 1,
|
||||
'pre_indexed': True,
|
||||
}
|
||||
},
|
||||
],
|
||||
'directive': None,
|
||||
'comment': None,
|
||||
'label': None,
|
||||
'line': 'stp x29, x30, [sp, #-16]!',
|
||||
'line_number': 6,
|
||||
}
|
||||
instruction_form_7 = {
|
||||
'instruction': 'ldp',
|
||||
'operands': [
|
||||
{'register': {'prefix': 'q', 'name': '2'}},
|
||||
{'register': {'prefix': 'q', 'name': '3'}},
|
||||
{
|
||||
'memory': {
|
||||
'offset': None,
|
||||
'base': {'prefix': 'x', 'name': '11'},
|
||||
'index': None,
|
||||
'scale': 1,
|
||||
'post_indexed': {'value': '64'},
|
||||
}
|
||||
},
|
||||
],
|
||||
'directive': None,
|
||||
'comment': None,
|
||||
'label': None,
|
||||
'line': 'ldp q2, q3, [x11], #64',
|
||||
'line_number': 7,
|
||||
}
|
||||
parsed_1 = self.parser.parse_line(line_comment, 1)
|
||||
parsed_2 = self.parser.parse_line(line_label, 2)
|
||||
parsed_3 = self.parser.parse_line(line_directive, 3)
|
||||
parsed_4 = self.parser.parse_line(line_instruction, 4)
|
||||
parsed_5 = self.parser.parse_line(line_prefetch, 5)
|
||||
parsed_6 = self.parser.parse_line(line_preindexed, 6)
|
||||
parsed_7 = self.parser.parse_line(line_postindexed, 7)
|
||||
|
||||
self.assertEqual(parsed_1, instruction_form_1)
|
||||
self.assertEqual(parsed_2, instruction_form_2)
|
||||
self.assertEqual(parsed_3, instruction_form_3)
|
||||
self.assertEqual(parsed_4, instruction_form_4)
|
||||
self.assertEqual(parsed_5, instruction_form_5)
|
||||
self.assertEqual(parsed_6, instruction_form_6)
|
||||
self.assertEqual(parsed_7, instruction_form_7)
|
||||
|
||||
def test_parse_file(self):
|
||||
parsed = self.parser.parse_file(self.triad_code)
|
||||
self.assertEqual(parsed[0].line_number, 1)
|
||||
self.assertEqual(len(parsed), 645)
|
||||
|
||||
def test_normalize_imd(self):
|
||||
imd_decimal_1 = {'value': '79'}
|
||||
imd_hex_1 = {'value': '0x4f'}
|
||||
imd_decimal_2 = {'value': '8'}
|
||||
imd_hex_2 = {'value': '0x8'}
|
||||
imd_float_11 = {'float': {'mantissa': '0.79', 'e_sign': '+', 'exponent': '2'}}
|
||||
imd_float_12 = {'float': {'mantissa': '790.0', 'e_sign': '-', 'exponent': '1'}}
|
||||
imd_double_11 = {'double': {'mantissa': '0.79', 'e_sign': '+', 'exponent': '2'}}
|
||||
imd_double_12 = {'double': {'mantissa': '790.0', 'e_sign': '-', 'exponent': '1'}}
|
||||
identifier = {'identifier': {'name': '..B1.4'}}
|
||||
|
||||
value1 = self.parser.normalize_imd(imd_decimal_1)
|
||||
self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
||||
)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_double_11), value1)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_double_12), value1)
|
||||
self.assertEqual(self.parser.normalize_imd(identifier), identifier)
|
||||
|
||||
def test_multiple_regs(self):
|
||||
instr_range = 'PUSH {r5-r7}'
|
||||
reg_range = AttrDict({
|
||||
'register': {
|
||||
'range': [
|
||||
{'prefix': 'r', 'name': '5'},
|
||||
{'prefix': 'r', 'name': '7'}
|
||||
],
|
||||
'index': None
|
||||
}
|
||||
})
|
||||
instr_list = 'POP {r5, r7, r9}'
|
||||
reg_list = AttrDict({
|
||||
'register': {
|
||||
'list': [
|
||||
{'prefix': 'r', 'name': '5'},
|
||||
{'prefix': 'r', 'name': '7'},
|
||||
{'prefix': 'r', 'name': '9'}
|
||||
],
|
||||
'index': None
|
||||
}
|
||||
})
|
||||
prange = self.parser.parse_line(instr_range)
|
||||
plist = self.parser.parse_line(instr_list)
|
||||
|
||||
self.assertEqual(prange.operands[0], reg_range)
|
||||
self.assertEqual(plist.operands[0], reg_list)
|
||||
|
||||
def test_reg_dependency(self):
|
||||
reg_1_1 = AttrDict({'prefix': 'b', 'name': '1'})
|
||||
reg_1_2 = AttrDict({'prefix': 'h', 'name': '1'})
|
||||
reg_1_3 = AttrDict({'prefix': 's', 'name': '1'})
|
||||
reg_1_4 = AttrDict({'prefix': 'd', 'name': '1'})
|
||||
reg_1_4 = AttrDict({'prefix': 'q', 'name': '1'})
|
||||
reg_2_1 = AttrDict({'prefix': 'w', 'name': '2'})
|
||||
reg_2_2 = AttrDict({'prefix': 'x', 'name': '2'})
|
||||
reg_v1_1 = AttrDict({'prefix': 'v', 'name': '11', 'lanes': '16', 'shape': 'b'})
|
||||
reg_v1_2 = AttrDict({'prefix': 'v', 'name': '11', 'lanes': '8', 'shape': 'h'})
|
||||
reg_v1_3 = AttrDict({'prefix': 'v', 'name': '11', 'lanes': '4', 'shape': 's'})
|
||||
reg_v1_4 = AttrDict({'prefix': 'v', 'name': '11', 'lanes': '2', 'shape': 'd'})
|
||||
|
||||
reg_b5 = AttrDict({'prefix': 'b', 'name': '5'})
|
||||
reg_q15 = AttrDict({'prefix': 'q', 'name': '15'})
|
||||
reg_v10 = AttrDict({'prefix': 'v', 'name': '10', 'lanes': '2', 'shape': 's'})
|
||||
reg_v20 = AttrDict({'prefix': 'v', 'name': '20', 'lanes': '2', 'shape': 'd'})
|
||||
|
||||
reg_1 = [reg_1_1, reg_1_2, reg_1_3, reg_1_4]
|
||||
reg_2 = [reg_2_1, reg_2_2]
|
||||
reg_v = [reg_v1_1, reg_v1_2, reg_v1_3, reg_v1_4]
|
||||
reg_others = [reg_b5, reg_q15, reg_v10, reg_v20]
|
||||
regs = reg_1 + reg_2 + reg_v + reg_others
|
||||
|
||||
# test each register against each other
|
||||
for ri in reg_1:
|
||||
for rj in regs:
|
||||
assert_value = True if rj in reg_1 else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
for ri in reg_2:
|
||||
for rj in regs:
|
||||
assert_value = True if rj in reg_2 else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
for ri in reg_v:
|
||||
for rj in regs:
|
||||
assert_value = True if rj in reg_v else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
for ri in reg_others:
|
||||
for rj in regs:
|
||||
assert_value = True if rj == ri else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
def _get_comment(self, parser, comment):
|
||||
return ' '.join(
|
||||
AttrDict.convert_dict(
|
||||
parser.process_operand(parser.comment.parseString(comment, parseAll=True).asDict())
|
||||
).comment
|
||||
)
|
||||
|
||||
def _get_label(self, parser, label):
|
||||
return AttrDict.convert_dict(
|
||||
parser.process_operand(parser.label.parseString(label, parseAll=True).asDict())
|
||||
).label
|
||||
|
||||
def _get_directive(self, parser, directive):
|
||||
return AttrDict.convert_dict(
|
||||
parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict())
|
||||
).directive
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
name = os.path.join(testdir, 'test_files', name)
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestParserAArch64v81)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
306
tests/test_parser_x86att.py
Executable file
306
tests/test_parser_x86att.py
Executable file
@@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for x86 AT&T assembly parser
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from pyparsing import ParseException
|
||||
|
||||
from osaca.parser import AttrDict, ParserX86ATT
|
||||
|
||||
|
||||
class TestParserX86ATT(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
self.parser = ParserX86ATT()
|
||||
with open(self._find_file('triad-x86-iaca.s')) as f:
|
||||
self.triad_code = f.read()
|
||||
|
||||
##################
|
||||
# Test
|
||||
##################
|
||||
|
||||
def test_comment_parser(self):
|
||||
self.assertEqual(self._get_comment(self.parser, '# some comments'), 'some comments')
|
||||
self.assertEqual(self._get_comment(self.parser, '\t\t#AA BB CC \t end \t'), 'AA BB CC end')
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, '\t## comment ## comment'), '# comment ## comment'
|
||||
)
|
||||
|
||||
def test_label_parser(self):
|
||||
self.assertEqual(self._get_label(self.parser, 'main:').name, 'main')
|
||||
self.assertEqual(self._get_label(self.parser, '..B1.10:').name, '..B1.10')
|
||||
self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:').name, '.2.3_2_pack.3')
|
||||
self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t#label1').name, '.L1')
|
||||
self.assertEqual(
|
||||
' '.join(self._get_label(self.parser, '.L1:\t\t\t#label1').comment), 'label1'
|
||||
)
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_label(self.parser, '\t.cfi_startproc')
|
||||
|
||||
def test_directive_parser(self):
|
||||
self.assertEqual(self._get_directive(self.parser, '\t.text').name, 'text')
|
||||
self.assertEqual(len(self._get_directive(self.parser, '\t.text').parameters), 0)
|
||||
self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90').name, 'align')
|
||||
self.assertEqual(len(self._get_directive(self.parser, '\t.align\t16,0x90').parameters), 2)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, '\t.align\t16,0x90').parameters[1], '0x90'
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, ' .byte 100,103,144 #IACA START')[
|
||||
'name'
|
||||
],
|
||||
'byte',
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, ' .byte 100,103,144 #IACA START')[
|
||||
'parameters'
|
||||
][2],
|
||||
'144',
|
||||
)
|
||||
self.assertEqual(
|
||||
' '.join(
|
||||
self._get_directive(self.parser, ' .byte 100,103,144 #IACA START')[
|
||||
'comment'
|
||||
]
|
||||
),
|
||||
'IACA START',
|
||||
)
|
||||
|
||||
def test_parse_instruction(self):
|
||||
instr1 = '\t\tvcvtsi2ss %edx, %xmm2, %xmm2\t\t\t#12.27'
|
||||
instr2 = 'jb ..B1.4 \t'
|
||||
instr3 = ' movl $222,%ebx #IACA END'
|
||||
instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9'
|
||||
instr5 = 'mov %ebx,var(,1)'
|
||||
instr6 = 'lea (,%rax,8),%rbx'
|
||||
instr7 = 'vinsertf128 $0x1, %xmm0, %ymm1, %ymm1'
|
||||
|
||||
parsed_1 = self.parser.parse_instruction(instr1)
|
||||
parsed_2 = self.parser.parse_instruction(instr2)
|
||||
parsed_3 = self.parser.parse_instruction(instr3)
|
||||
parsed_4 = self.parser.parse_instruction(instr4)
|
||||
parsed_5 = self.parser.parse_instruction(instr5)
|
||||
parsed_6 = self.parser.parse_instruction(instr6)
|
||||
parsed_7 = self.parser.parse_instruction(instr7)
|
||||
|
||||
self.assertEqual(parsed_1.instruction, 'vcvtsi2ss')
|
||||
self.assertEqual(parsed_1.operands[0].register.name, 'edx')
|
||||
self.assertEqual(parsed_1.operands[1].register.name, 'xmm2')
|
||||
self.assertEqual(parsed_1.comment, '12.27')
|
||||
|
||||
self.assertEqual(parsed_2.instruction, 'jb')
|
||||
self.assertEqual(parsed_2.operands[0].identifier.name, '..B1.4')
|
||||
self.assertEqual(len(parsed_2.operands), 1)
|
||||
self.assertIsNone(parsed_2.comment)
|
||||
|
||||
self.assertEqual(parsed_3.instruction, 'movl')
|
||||
self.assertEqual(parsed_3.operands[0].immediate.value, '222')
|
||||
self.assertEqual(parsed_3.operands[1].register.name, 'ebx')
|
||||
self.assertEqual(parsed_3.comment, 'IACA END')
|
||||
|
||||
self.assertEqual(parsed_4.instruction, 'vmovss')
|
||||
self.assertEqual(parsed_4.operands[1].memory.offset.value, '-4')
|
||||
self.assertEqual(parsed_4.operands[1].memory.base.name, 'rsp')
|
||||
self.assertEqual(parsed_4.operands[1].memory.index.name, 'rax')
|
||||
self.assertEqual(parsed_4.operands[1].memory.scale, 8)
|
||||
self.assertEqual(parsed_4.operands[0].register.name, 'xmm4')
|
||||
self.assertEqual(parsed_4.comment, '12.9')
|
||||
|
||||
self.assertEqual(parsed_5.instruction, 'mov')
|
||||
self.assertEqual(parsed_5.operands[1].memory.offset.identifier.name, 'var')
|
||||
self.assertIsNone(parsed_5.operands[1].memory.base)
|
||||
self.assertIsNone(parsed_5.operands[1].memory.index)
|
||||
self.assertEqual(parsed_5.operands[1].memory.scale, 1)
|
||||
self.assertEqual(parsed_5.operands[0].register.name, 'ebx')
|
||||
|
||||
self.assertEqual(parsed_6.instruction, 'lea')
|
||||
self.assertIsNone(parsed_6.operands[0].memory.offset)
|
||||
self.assertIsNone(parsed_6.operands[0].memory.base)
|
||||
self.assertEqual(parsed_6.operands[0].memory.index.name, 'rax')
|
||||
self.assertEqual(parsed_6.operands[0].memory.scale, 8)
|
||||
self.assertEqual(parsed_6.operands[1].register.name, 'rbx')
|
||||
|
||||
self.assertEqual(parsed_7.operands[0].immediate.value, '0x1')
|
||||
self.assertEqual(parsed_7.operands[1].register.name, 'xmm0')
|
||||
self.assertEqual(parsed_7.operands[2].register.name, 'ymm1')
|
||||
self.assertEqual(parsed_7.operands[3].register.name, 'ymm1')
|
||||
|
||||
def test_parse_line(self):
|
||||
line_comment = '# -- Begin main'
|
||||
line_label = '..B1.7: # Preds ..B1.6'
|
||||
line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed'
|
||||
line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
|
||||
|
||||
instruction_form_1 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': None,
|
||||
'comment': '-- Begin main',
|
||||
'label': None,
|
||||
'line': '# -- Begin main',
|
||||
'line_number': 1,
|
||||
}
|
||||
instruction_form_2 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': None,
|
||||
'comment': 'Preds ..B1.6',
|
||||
'label': '..B1.7',
|
||||
'line': '..B1.7: # Preds ..B1.6',
|
||||
'line_number': 2,
|
||||
}
|
||||
instruction_form_3 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': {'name': 'quad', 'parameters': ['.2.3_2__kmpc_loc_pack.2']},
|
||||
'comment': 'qed',
|
||||
'label': None,
|
||||
'line': '.quad .2.3_2__kmpc_loc_pack.2 #qed',
|
||||
'line_number': 3,
|
||||
}
|
||||
instruction_form_4 = {
|
||||
'instruction': 'lea',
|
||||
'operands': [
|
||||
{
|
||||
'memory': {
|
||||
'offset': {'value': '2'},
|
||||
'base': {'name': 'rax'},
|
||||
'index': {'name': 'rax'},
|
||||
'scale': 1,
|
||||
}
|
||||
},
|
||||
{
|
||||
'register': {'name': 'ecx'}
|
||||
}
|
||||
],
|
||||
'directive': None,
|
||||
'comment': '12.9',
|
||||
'label': None,
|
||||
'line': 'lea 2(%rax,%rax), %ecx #12.9',
|
||||
'line_number': 4,
|
||||
}
|
||||
|
||||
parsed_1 = self.parser.parse_line(line_comment, 1)
|
||||
parsed_2 = self.parser.parse_line(line_label, 2)
|
||||
parsed_3 = self.parser.parse_line(line_directive, 3)
|
||||
parsed_4 = self.parser.parse_line(line_instruction, 4)
|
||||
|
||||
self.assertEqual(parsed_1, instruction_form_1)
|
||||
self.assertEqual(parsed_2, instruction_form_2)
|
||||
self.assertEqual(parsed_3, instruction_form_3)
|
||||
self.assertEqual(parsed_4, instruction_form_4)
|
||||
|
||||
def test_parse_file(self):
|
||||
parsed = self.parser.parse_file(self.triad_code)
|
||||
self.assertEqual(parsed[0].line_number, 1)
|
||||
self.assertEqual(len(parsed), 353)
|
||||
|
||||
def test_parse_register(self):
|
||||
register_str_1 = '%rax'
|
||||
register_str_2 = '%r9'
|
||||
register_str_3 = '%xmm1'
|
||||
register_str_4 = '%rip'
|
||||
|
||||
parsed_reg_1 = {'register': {'name': 'rax'}}
|
||||
parsed_reg_2 = {'register': {'name': 'r9'}}
|
||||
parsed_reg_3 = {'register': {'name': 'xmm1'}}
|
||||
parsed_reg_4 = {'register': {'name': 'rip'}}
|
||||
|
||||
self.assertEqual(self.parser.parse_register(register_str_1), parsed_reg_1)
|
||||
self.assertEqual(self.parser.parse_register(register_str_2), parsed_reg_2)
|
||||
self.assertEqual(self.parser.parse_register(register_str_3), parsed_reg_3)
|
||||
self.assertEqual(self.parser.parse_register(register_str_4), parsed_reg_4)
|
||||
self.assertIsNone(self.parser.parse_register('rax'))
|
||||
|
||||
def test_normalize_imd(self):
|
||||
imd_decimal_1 = {'value': '79'}
|
||||
imd_hex_1 = {'value': '0x4f'}
|
||||
imd_decimal_2 = {'value': '8'}
|
||||
imd_hex_2 = {'value': '0x8'}
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
||||
)
|
||||
|
||||
def test_reg_dependency(self):
|
||||
reg_a1 = AttrDict({'name': 'rax'})
|
||||
reg_a2 = AttrDict({'name': 'eax'})
|
||||
reg_a3 = AttrDict({'name': 'ax'})
|
||||
reg_a4 = AttrDict({'name': 'al'})
|
||||
reg_r11 = AttrDict({'name': 'r11'})
|
||||
reg_r11b = AttrDict({'name': 'r11b'})
|
||||
reg_r11d = AttrDict({'name': 'r11d'})
|
||||
reg_r11w = AttrDict({'name': 'r11w'})
|
||||
reg_xmm1 = AttrDict({'name': 'xmm1'})
|
||||
reg_ymm1 = AttrDict({'name': 'ymm1'})
|
||||
reg_zmm1 = AttrDict({'name': 'zmm1'})
|
||||
|
||||
reg_b1 = AttrDict({'name': 'rbx'})
|
||||
reg_r15 = AttrDict({'name': 'r15'})
|
||||
reg_xmm2 = AttrDict({'name': 'xmm2'})
|
||||
reg_ymm3 = AttrDict({'name': 'ymm3'})
|
||||
|
||||
reg_a = [reg_a1, reg_a2, reg_a3, reg_a4]
|
||||
reg_r = [reg_r11, reg_r11b, reg_r11d, reg_r11w]
|
||||
reg_vec_1 = [reg_xmm1, reg_ymm1, reg_zmm1]
|
||||
reg_others = [reg_b1, reg_r15, reg_xmm2, reg_ymm3]
|
||||
regs = reg_a + reg_r + reg_vec_1 + reg_others
|
||||
|
||||
# test each register against each other
|
||||
for ri in reg_a:
|
||||
for rj in regs:
|
||||
assert_value = True if rj in reg_a else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
for ri in reg_r:
|
||||
for rj in regs:
|
||||
assert_value = True if rj in reg_r else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
for ri in reg_vec_1:
|
||||
for rj in regs:
|
||||
assert_value = True if rj in reg_vec_1 else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
for ri in reg_others:
|
||||
for rj in regs:
|
||||
assert_value = True if rj == ri else False
|
||||
with self.subTest(reg_a=ri, reg_b=rj, assert_val=assert_value):
|
||||
self.assertEqual(self.parser.is_reg_dependend_of(ri, rj), assert_value)
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
def _get_comment(self, parser, comment):
|
||||
return ' '.join(
|
||||
AttrDict.convert_dict(
|
||||
parser.process_operand(parser.comment.parseString(comment, parseAll=True).asDict())
|
||||
).comment
|
||||
)
|
||||
|
||||
def _get_label(self, parser, label):
|
||||
return AttrDict.convert_dict(
|
||||
parser.process_operand(parser.label.parseString(label, parseAll=True).asDict())
|
||||
).label
|
||||
|
||||
def _get_directive(self, parser, directive):
|
||||
return AttrDict.convert_dict(
|
||||
parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict())
|
||||
).directive
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
name = os.path.join(testdir, 'test_files', name)
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestParserX86ATT)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
359
tests/test_semantics.py
Executable file
359
tests/test_semantics.py
Executable file
@@ -0,0 +1,359 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for Semantic Analysis
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
from subprocess import call
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT
|
||||
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel,
|
||||
SemanticsAppender)
|
||||
|
||||
|
||||
class TestSemanticTools(unittest.TestCase):
|
||||
MODULE_DATA_DIR = os.path.join(
|
||||
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
|
||||
)
|
||||
USER_DATA_DIR = os.path.join(os.path.expanduser('~'), '.osaca/')
|
||||
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
# copy db files in user directory
|
||||
if not os.path.isdir(os.path.join(self.USER_DATA_DIR, 'data')):
|
||||
os.makedirs(os.path.join(self.USER_DATA_DIR, 'data'))
|
||||
call(['cp', '-r', self.MODULE_DATA_DIR, self.USER_DATA_DIR])
|
||||
# set up parser and kernels
|
||||
self.parser_x86 = ParserX86ATT()
|
||||
self.parser_AArch64 = ParserAArch64v81()
|
||||
with open(self._find_file('kernel-x86.s')) as f:
|
||||
self.code_x86 = f.read()
|
||||
with open(self._find_file('kernel-AArch64.s')) as f:
|
||||
self.code_AArch64 = f.read()
|
||||
self.kernel_x86 = self.parser_x86.parse_file(self.code_x86)
|
||||
self.kernel_AArch64 = self.parser_AArch64.parse_file(self.code_AArch64)
|
||||
|
||||
# set up machine models
|
||||
self.machine_model_csx = MachineModel(
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
|
||||
)
|
||||
self.machine_model_tx2 = MachineModel(
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
|
||||
)
|
||||
self.semantics_csx = SemanticsAppender(
|
||||
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')
|
||||
)
|
||||
self.semantics_tx2 = SemanticsAppender(
|
||||
self.machine_model_tx2,
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'),
|
||||
)
|
||||
self.machine_model_zen = MachineModel(arch='zen1')
|
||||
|
||||
for i in range(len(self.kernel_x86)):
|
||||
self.semantics_csx.assign_src_dst(self.kernel_x86[i])
|
||||
self.semantics_csx.assign_tp_lt(self.kernel_x86[i])
|
||||
for i in range(len(self.kernel_AArch64)):
|
||||
self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i])
|
||||
self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i])
|
||||
|
||||
###########
|
||||
# Tests
|
||||
###########
|
||||
|
||||
def test_creation_by_name(self):
|
||||
try:
|
||||
tmp_mm = MachineModel(arch='CSX')
|
||||
SemanticsAppender(tmp_mm)
|
||||
except ValueError:
|
||||
self.fail()
|
||||
|
||||
def test_src_dst_assignment_x86(self):
|
||||
for instruction_form in self.kernel_x86:
|
||||
with self.subTest(instruction_form=instruction_form):
|
||||
if instruction_form['operands'] is not None:
|
||||
self.assertTrue('source' in instruction_form['operands'])
|
||||
self.assertTrue('destination' in instruction_form['operands'])
|
||||
self.assertTrue('src_dst' in instruction_form['operands'])
|
||||
|
||||
def test_src_dst_assignment_AArch64(self):
|
||||
for instruction_form in self.kernel_AArch64:
|
||||
with self.subTest(instruction_form=instruction_form):
|
||||
if instruction_form['operands'] is not None:
|
||||
self.assertTrue('source' in instruction_form['operands'])
|
||||
self.assertTrue('destination' in instruction_form['operands'])
|
||||
self.assertTrue('src_dst' in instruction_form['operands'])
|
||||
|
||||
def test_tp_lt_assignment_x86(self):
|
||||
self.assertTrue('ports' in self.machine_model_csx)
|
||||
port_num = len(self.machine_model_csx['ports'])
|
||||
for instruction_form in self.kernel_x86:
|
||||
with self.subTest(instruction_form=instruction_form):
|
||||
self.assertTrue('throughput' in instruction_form)
|
||||
self.assertTrue('latency' in instruction_form)
|
||||
self.assertIsInstance(instruction_form['port_pressure'], list)
|
||||
self.assertEqual(len(instruction_form['port_pressure']), port_num)
|
||||
|
||||
def test_tp_lt_assignment_AArch64(self):
|
||||
self.assertTrue('ports' in self.machine_model_tx2)
|
||||
port_num = len(self.machine_model_tx2['ports'])
|
||||
for instruction_form in self.kernel_AArch64:
|
||||
with self.subTest(instruction_form=instruction_form):
|
||||
self.assertTrue('throughput' in instruction_form)
|
||||
self.assertTrue('latency' in instruction_form)
|
||||
self.assertIsInstance(instruction_form['port_pressure'], list)
|
||||
self.assertEqual(len(instruction_form['port_pressure']), port_num)
|
||||
|
||||
def test_kernelDG_x86(self):
|
||||
#
|
||||
# 3
|
||||
# \___>5__>6
|
||||
# /
|
||||
# 2
|
||||
# 4_______>8
|
||||
#
|
||||
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=4))), 1)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4)), 7)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5))), 1)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5)), 7)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=6))), 1)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=6)), 10)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=7))), 1)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=7)), 8)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=8))), 0)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=9))), 0)
|
||||
with self.assertRaises(ValueError):
|
||||
dg.get_dependent_instruction_forms()
|
||||
|
||||
def test_kernelDG_AArch64(self):
|
||||
dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {8, 9})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 11})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=6)), {7, 8, 9})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=7)), {10, 11})
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=8)), 14)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=9)), 15)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=10)), 17)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=11)), 18)
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=12)), {14, 15})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=13)), {17, 18})
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=14)), 16)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=15)), 16)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=16))), 0)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=17)), 19)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=18)), 19)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=19))), 0)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=20))), 0)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=21))), 0)
|
||||
with self.assertRaises(ValueError):
|
||||
dg.get_dependent_instruction_forms()
|
||||
|
||||
def test_hidden_load(self):
|
||||
machine_model_hld = MachineModel(
|
||||
path_to_yaml=self._find_file('hidden_load_machine_model.yml')
|
||||
)
|
||||
self.assertTrue(machine_model_hld.has_hidden_loads())
|
||||
semantics_hld = SemanticsAppender(machine_model_hld)
|
||||
kernel_hld = self.parser_x86.parse_file(self.code_x86)
|
||||
kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)
|
||||
kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)[-3:]
|
||||
kernel_hld_3 = self.parser_x86.parse_file(self.code_x86)[5:8]
|
||||
semantics_hld.add_semantics(kernel_hld)
|
||||
semantics_hld.add_semantics(kernel_hld_2)
|
||||
semantics_hld.add_semantics(kernel_hld_3)
|
||||
|
||||
num_hidden_loads = len([x for x in kernel_hld if INSTR_FLAGS.HIDDEN_LD in x['flags']])
|
||||
num_hidden_loads_2 = len([x for x in kernel_hld_2 if INSTR_FLAGS.HIDDEN_LD in x['flags']])
|
||||
num_hidden_loads_3 = len([x for x in kernel_hld_3 if INSTR_FLAGS.HIDDEN_LD in x['flags']])
|
||||
self.assertEqual(num_hidden_loads, 1)
|
||||
self.assertEqual(num_hidden_loads_2, 0)
|
||||
self.assertEqual(num_hidden_loads_3, 1)
|
||||
|
||||
def test_cyclic_dag(self):
|
||||
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
|
||||
dg.dg.add_edge(100, 101, latency=1.0)
|
||||
dg.dg.add_edge(101, 102, latency=2.0)
|
||||
dg.dg.add_edge(102, 100, latency=3.0)
|
||||
with self.assertRaises(NotImplementedError):
|
||||
dg.get_critical_path()
|
||||
with self.assertRaises(NotImplementedError):
|
||||
dg.get_loopcarried_dependencies()
|
||||
|
||||
def test_loop_carried_dependency_x86(self):
|
||||
lcd_id = 9
|
||||
lcd_id2 = 6
|
||||
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
# ID 9
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]['root'], dg.dg.nodes(data=True)[lcd_id]['instruction_form']
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id]['dependencies']), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]['dependencies'][0], dg.dg.nodes(data=True)[lcd_id]['instruction_form']
|
||||
)
|
||||
# ID 6
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]['root'], dg.dg.nodes(data=True)[lcd_id2]['instruction_form']
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id2]['dependencies']), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]['dependencies'][0],
|
||||
dg.dg.nodes(data=True)[lcd_id2]['instruction_form'],
|
||||
)
|
||||
|
||||
def test_is_read_is_written_x86(self):
|
||||
# independent form HW model
|
||||
dag = KernelDG(self.kernel_x86, self.parser_x86, None)
|
||||
reg_rcx = AttrDict({'name': 'rcx'})
|
||||
reg_ymm1 = AttrDict({'name': 'ymm1'})
|
||||
|
||||
instr_form_r_c = self.parser_x86.parse_line('vmovsd %xmm0, (%r15,%rcx,8)')
|
||||
self.semantics_csx.assign_src_dst(instr_form_r_c)
|
||||
instr_form_non_r_c = self.parser_x86.parse_line('movl %xmm0, (%r15,%rax,8)')
|
||||
self.semantics_csx.assign_src_dst(instr_form_non_r_c)
|
||||
instr_form_w_c = self.parser_x86.parse_line('movi $0x05ACA, %rcx')
|
||||
self.semantics_csx.assign_src_dst(instr_form_w_c)
|
||||
|
||||
instr_form_rw_ymm_1 = self.parser_x86.parse_line('vinsertf128 $0x1, %xmm1, %ymm0, %ymm1')
|
||||
self.semantics_csx.assign_src_dst(instr_form_rw_ymm_1)
|
||||
instr_form_rw_ymm_2 = self.parser_x86.parse_line('vinsertf128 $0x1, %xmm0, %ymm1, %ymm1')
|
||||
self.semantics_csx.assign_src_dst(instr_form_rw_ymm_2)
|
||||
instr_form_r_ymm = self.parser_x86.parse_line('vmovapd %ymm1, %ymm0')
|
||||
self.semantics_csx.assign_src_dst(instr_form_r_ymm)
|
||||
|
||||
self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c))
|
||||
self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c))
|
||||
self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c))
|
||||
self.assertTrue(dag.is_written(reg_rcx, instr_form_w_c))
|
||||
self.assertFalse(dag.is_written(reg_rcx, instr_form_r_c))
|
||||
|
||||
self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_1))
|
||||
self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_2))
|
||||
self.assertTrue(dag.is_read(reg_ymm1, instr_form_r_ymm))
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
|
||||
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
|
||||
|
||||
def test_is_read_is_written_AArch64(self):
|
||||
# independent form HW model
|
||||
dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None)
|
||||
reg_x1 = AttrDict({'prefix': 'x', 'name': '1'})
|
||||
reg_w1 = AttrDict({'prefix': 'w', 'name': '1'})
|
||||
reg_d1 = AttrDict({'prefix': 'd', 'name': '1'})
|
||||
reg_q1 = AttrDict({'prefix': 'q', 'name': '1'})
|
||||
reg_v1 = AttrDict({'prefix': 'v', 'name': '1', 'lanes': '2', 'shape': 'd'})
|
||||
regs = [reg_d1, reg_q1, reg_v1]
|
||||
regs_gp = [reg_w1, reg_x1]
|
||||
|
||||
instr_form_r_1 = self.parser_AArch64.parse_line('stp q1, q3, [x12, #192]')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_r_1)
|
||||
instr_form_r_2 = self.parser_AArch64.parse_line('fadd v2.2d, v1.2d, v0.2d')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_r_2)
|
||||
instr_form_w_1 = self.parser_AArch64.parse_line('ldr d1, [x1, #:got_lo12:q2c]')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_w_1)
|
||||
instr_form_non_w_1 = self.parser_AArch64.parse_line('ldr x1, [x1, #:got_lo12:q2c]')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_non_w_1)
|
||||
instr_form_rw_1 = self.parser_AArch64.parse_line('fmul v1.2d, v1.2d, v0.2d')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_rw_1)
|
||||
instr_form_rw_2 = self.parser_AArch64.parse_line('ldp q2, q4, [x1, #64]!')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_rw_2)
|
||||
instr_form_rw_3 = self.parser_AArch64.parse_line('str x4, [x1], #64')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_rw_3)
|
||||
instr_form_non_rw_1 = self.parser_AArch64.parse_line('adds x1, x11')
|
||||
self.semantics_tx2.assign_src_dst(instr_form_non_rw_1)
|
||||
|
||||
for reg in regs:
|
||||
with self.subTest(reg=reg):
|
||||
self.assertTrue(dag.is_read(reg, instr_form_r_1))
|
||||
self.assertTrue(dag.is_read(reg, instr_form_r_2))
|
||||
self.assertTrue(dag.is_read(reg, instr_form_rw_1))
|
||||
self.assertFalse(dag.is_read(reg, instr_form_rw_2))
|
||||
self.assertFalse(dag.is_read(reg, instr_form_rw_3))
|
||||
self.assertFalse(dag.is_read(reg, instr_form_w_1))
|
||||
self.assertTrue(dag.is_written(reg, instr_form_w_1))
|
||||
self.assertTrue(dag.is_written(reg, instr_form_rw_1))
|
||||
self.assertFalse(dag.is_written(reg, instr_form_non_w_1))
|
||||
self.assertFalse(dag.is_written(reg, instr_form_rw_2))
|
||||
self.assertFalse(dag.is_written(reg, instr_form_rw_3))
|
||||
self.assertFalse(dag.is_written(reg, instr_form_non_rw_1))
|
||||
self.assertFalse(dag.is_written(reg, instr_form_non_rw_1))
|
||||
for reg in regs_gp:
|
||||
with self.subTest(reg=reg):
|
||||
self.assertFalse(dag.is_read(reg, instr_form_r_1))
|
||||
self.assertFalse(dag.is_read(reg, instr_form_r_2))
|
||||
self.assertFalse(dag.is_read(reg, instr_form_rw_1))
|
||||
self.assertTrue(dag.is_read(reg, instr_form_rw_2))
|
||||
self.assertTrue(dag.is_read(reg, instr_form_rw_3))
|
||||
self.assertTrue(dag.is_read(reg, instr_form_w_1))
|
||||
self.assertFalse(dag.is_written(reg, instr_form_w_1))
|
||||
self.assertFalse(dag.is_written(reg, instr_form_rw_1))
|
||||
self.assertTrue(dag.is_written(reg, instr_form_non_w_1))
|
||||
self.assertTrue(dag.is_written(reg, instr_form_rw_2))
|
||||
self.assertTrue(dag.is_written(reg, instr_form_rw_3))
|
||||
self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
|
||||
self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
|
||||
|
||||
def test_invalid_MachineModel(self):
|
||||
with self.assertRaises(ValueError):
|
||||
MachineModel()
|
||||
with self.assertRaises(ValueError):
|
||||
MachineModel(arch='CSX', path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml'))
|
||||
with self.assertRaises(FileNotFoundError):
|
||||
MachineModel(arch='THE_MACHINE')
|
||||
with self.assertRaises(FileNotFoundError):
|
||||
MachineModel(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'THE_MACHINE.yml'))
|
||||
|
||||
def test_MachineModel_getter(self):
|
||||
sample_operands = [
|
||||
{
|
||||
'memory': {
|
||||
'offset': None,
|
||||
'base': {'name': 'r12'},
|
||||
'index': {'name': 'rcx'},
|
||||
'scale': 8,
|
||||
}
|
||||
}
|
||||
]
|
||||
self.assertIsNone(self.machine_model_csx.get_instruction('GETRESULT', sample_operands))
|
||||
self.assertIsNone(self.machine_model_tx2.get_instruction('GETRESULT', sample_operands))
|
||||
|
||||
self.assertEqual(self.machine_model_csx.get_arch(), 'csx')
|
||||
self.assertEqual(self.machine_model_tx2.get_arch(), 'tx2')
|
||||
|
||||
self.assertEqual(self.machine_model_csx.get_ISA(), 'x86')
|
||||
self.assertEqual(self.machine_model_tx2.get_ISA(), 'aarch64')
|
||||
|
||||
ports_csx = ['0', '0DV', '1', '2', '2D', '3', '3D', '4', '5', '6', '7']
|
||||
data_ports_csx = ['2D', '3D']
|
||||
self.assertEqual(self.machine_model_csx.get_ports(), ports_csx)
|
||||
self.assertEqual(self.machine_model_csx.get_data_ports(), data_ports_csx)
|
||||
|
||||
self.assertFalse(self.machine_model_tx2.has_hidden_loads())
|
||||
|
||||
self.assertEqual(MachineModel.get_isa_for_arch('CSX'), 'x86')
|
||||
self.assertEqual(MachineModel.get_isa_for_arch('tX2'), 'aarch64')
|
||||
with self.assertRaises(ValueError):
|
||||
self.assertIsNone(MachineModel.get_isa_for_arch('THE_MACHINE'))
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
name = os.path.join(testdir, 'test_files', name)
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestSemanticTools)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
@@ -1,653 +0,0 @@
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.macosx_version_min 10, 14
|
||||
.globl _main ## -- Begin function main
|
||||
.p2align 4, 0x90
|
||||
_main: ## @main
|
||||
.cfi_startproc
|
||||
## %bb.0:
|
||||
pushq %rbp
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
subq $408, %rsp ## imm = 0x198
|
||||
.cfi_offset %rbx, -56
|
||||
.cfi_offset %r12, -48
|
||||
.cfi_offset %r13, -40
|
||||
.cfi_offset %r14, -32
|
||||
.cfi_offset %r15, -24
|
||||
movq %rsi, %rbx
|
||||
movq 16(%rsi), %rdi
|
||||
callq _atoi
|
||||
movl %eax, %r14d
|
||||
movq 24(%rbx), %rdi
|
||||
callq _atoi
|
||||
## kill: def $eax killed $eax def $rax
|
||||
movq %r14, -96(%rbp) ## 8-byte Spill
|
||||
movl %r14d, %ecx
|
||||
imull %r14d, %ecx
|
||||
movl %ecx, -88(%rbp) ## 4-byte Spill
|
||||
movq %rax, -72(%rbp) ## 8-byte Spill
|
||||
imull %eax, %ecx
|
||||
movslq %ecx, %r13
|
||||
shlq $3, %r13
|
||||
leaq -56(%rbp), %rdi
|
||||
movl $32, %esi
|
||||
movq %r13, %rdx
|
||||
callq _posix_memalign
|
||||
testl %eax, %eax
|
||||
je LBB0_2
|
||||
## %bb.1:
|
||||
movq $0, -56(%rbp)
|
||||
xorl %ebx, %ebx
|
||||
jmp LBB0_3
|
||||
LBB0_2:
|
||||
movq -56(%rbp), %rbx
|
||||
LBB0_3:
|
||||
leaq -56(%rbp), %rdi
|
||||
movl $32, %esi
|
||||
movq %r13, %rdx
|
||||
callq _posix_memalign
|
||||
testl %eax, %eax
|
||||
je LBB0_5
|
||||
## %bb.4:
|
||||
movq $0, -56(%rbp)
|
||||
xorl %eax, %eax
|
||||
jmp LBB0_6
|
||||
LBB0_5:
|
||||
movq -56(%rbp), %rax
|
||||
LBB0_6:
|
||||
movq %rax, -80(%rbp) ## 8-byte Spill
|
||||
movq -96(%rbp), %r9 ## 8-byte Reload
|
||||
movabsq $4602641980904887326, %rax ## imm = 0x3FDFDE7EEC22D41E
|
||||
movq %rax, -56(%rbp)
|
||||
cmpl $3, -72(%rbp) ## 4-byte Folded Reload
|
||||
jl LBB0_15
|
||||
## %bb.7:
|
||||
movabsq $4294967296, %r12 ## imm = 0x100000000
|
||||
leal -1(%r9), %ecx
|
||||
movslq %r9d, %rax
|
||||
movslq -88(%rbp), %rdx ## 4-byte Folded Reload
|
||||
movq %rdx, -160(%rbp) ## 8-byte Spill
|
||||
movq -72(%rbp), %rsi ## 8-byte Reload
|
||||
leal -1(%rsi), %edx
|
||||
leaq 8(%rbx,%rax,8), %rsi
|
||||
movq %rsi, -152(%rbp) ## 8-byte Spill
|
||||
movq -80(%rbp), %rsi ## 8-byte Reload
|
||||
leaq 8(%rsi,%rax,8), %rsi
|
||||
movq %rsi, -144(%rbp) ## 8-byte Spill
|
||||
leaq (,%rax,8), %rsi
|
||||
movq %rsi, -104(%rbp) ## 8-byte Spill
|
||||
leaq 2(%rax), %rsi
|
||||
movq %rsi, -136(%rbp) ## 8-byte Spill
|
||||
shlq $32, %rax
|
||||
movq %rax, -184(%rbp) ## 8-byte Spill
|
||||
addq $-1, %rcx
|
||||
movl %r9d, %eax
|
||||
movq %rax, -176(%rbp) ## 8-byte Spill
|
||||
movl $1, %eax
|
||||
movabsq $4601149042440805838, %rdi ## imm = 0x3FDA90AD19501DCE
|
||||
movq %rdx, -208(%rbp) ## 8-byte Spill
|
||||
.p2align 4, 0x90
|
||||
LBB0_8: ## =>This Loop Header: Depth=1
|
||||
## Child Loop BB0_10 Depth 2
|
||||
## Child Loop BB0_11 Depth 3
|
||||
cmpl $2, %r9d
|
||||
jle LBB0_14
|
||||
## %bb.9: ## in Loop: Header=BB0_8 Depth=1
|
||||
movl %eax, %r14d
|
||||
imull -88(%rbp), %r14d ## 4-byte Folded Reload
|
||||
leaq 1(%rax), %r8
|
||||
movq -160(%rbp), %rdx ## 8-byte Reload
|
||||
movq %rdx, %rsi
|
||||
movq %r8, -168(%rbp) ## 8-byte Spill
|
||||
imulq %r8, %rsi
|
||||
movq -152(%rbp), %r10 ## 8-byte Reload
|
||||
leaq (%r10,%rsi,8), %r8
|
||||
leaq -1(%rax), %rsi
|
||||
imulq %rdx, %rsi
|
||||
leaq (%r10,%rsi,8), %r10
|
||||
movq %rax, %rsi
|
||||
imulq %rdx, %rsi
|
||||
movq -144(%rbp), %rdx ## 8-byte Reload
|
||||
leaq (%rdx,%rsi,8), %r11
|
||||
addl -136(%rbp), %esi ## 4-byte Folded Reload
|
||||
shlq $32, %rsi
|
||||
movl %r9d, %r15d
|
||||
imull %eax, %r15d
|
||||
leal 2(%r15), %r13d
|
||||
imull %r9d, %r13d
|
||||
addl $1, %r13d
|
||||
addq $1, %r14
|
||||
addl $1, %r15d
|
||||
imull %r9d, %r15d
|
||||
movl $1, %eax
|
||||
.p2align 4, 0x90
|
||||
LBB0_10: ## Parent Loop BB0_8 Depth=1
|
||||
## => This Loop Header: Depth=2
|
||||
## Child Loop BB0_11 Depth 3
|
||||
movq %rax, -112(%rbp) ## 8-byte Spill
|
||||
leaq 1(%rax), %rax
|
||||
movq %rax, -192(%rbp) ## 8-byte Spill
|
||||
movq %rsi, -120(%rbp) ## 8-byte Spill
|
||||
xorl %edx, %edx
|
||||
.p2align 4, 0x90
|
||||
LBB0_11: ## Parent Loop BB0_8 Depth=1
|
||||
## Parent Loop BB0_10 Depth=2
|
||||
## => This Inner Loop Header: Depth=3
|
||||
movq %rdi, (%r11,%rdx,8)
|
||||
leal (%r15,%rdx), %r9d
|
||||
movslq %r9d, %rax
|
||||
movq %rdi, (%rbx,%rax,8)
|
||||
movq %rsi, %rax
|
||||
sarq $29, %rax
|
||||
movq %rdi, (%rbx,%rax)
|
||||
leal (%r14,%rdx), %eax
|
||||
cltq
|
||||
movq %rdi, (%rbx,%rax,8)
|
||||
leal (%r13,%rdx), %eax
|
||||
cltq
|
||||
movq %rdi, (%rbx,%rax,8)
|
||||
movq %rdi, (%r10,%rdx,8)
|
||||
movq %rdi, (%r8,%rdx,8)
|
||||
addq $1, %rdx
|
||||
addq %r12, %rsi
|
||||
cmpq %rdx, %rcx
|
||||
jne LBB0_11
|
||||
## %bb.12: ## in Loop: Header=BB0_10 Depth=2
|
||||
movq -104(%rbp), %rax ## 8-byte Reload
|
||||
addq %rax, %r8
|
||||
addq %rax, %r10
|
||||
addq %rax, %r11
|
||||
movq -120(%rbp), %rsi ## 8-byte Reload
|
||||
addq -184(%rbp), %rsi ## 8-byte Folded Reload
|
||||
movq -176(%rbp), %rax ## 8-byte Reload
|
||||
addq %rax, %r13
|
||||
addq %rax, %r14
|
||||
addq %rax, %r15
|
||||
cmpq %rdx, -112(%rbp) ## 8-byte Folded Reload
|
||||
movq -192(%rbp), %rax ## 8-byte Reload
|
||||
jne LBB0_10
|
||||
## %bb.13: ## in Loop: Header=BB0_8 Depth=1
|
||||
movq -168(%rbp), %rsi ## 8-byte Reload
|
||||
movq %rsi, %rax
|
||||
movq -96(%rbp), %r9 ## 8-byte Reload
|
||||
movq -208(%rbp), %rdx ## 8-byte Reload
|
||||
cmpq %rdx, %rsi
|
||||
jne LBB0_8
|
||||
jmp LBB0_15
|
||||
.p2align 4, 0x90
|
||||
LBB0_14: ## in Loop: Header=BB0_8 Depth=1
|
||||
addq $1, %rax
|
||||
movq %rax, %rsi
|
||||
cmpq %rdx, %rsi
|
||||
jne LBB0_8
|
||||
LBB0_15:
|
||||
movq _var_false@GOTPCREL(%rip), %rax
|
||||
cmpl $0, (%rax)
|
||||
je LBB0_17
|
||||
## %bb.16:
|
||||
movq %rbx, %rdi
|
||||
callq _dummy
|
||||
movq -80(%rbp), %rdi ## 8-byte Reload
|
||||
callq _dummy
|
||||
leaq -56(%rbp), %rdi
|
||||
callq _dummy
|
||||
movq -96(%rbp), %r9 ## 8-byte Reload
|
||||
LBB0_17:
|
||||
cmpl $3, -72(%rbp) ## 4-byte Folded Reload
|
||||
jl LBB0_59
|
||||
## %bb.18:
|
||||
movabsq $4294967296, %r14 ## imm = 0x100000000
|
||||
leal -1(%r9), %ecx
|
||||
movslq %r9d, %rsi
|
||||
movslq -88(%rbp), %rax ## 4-byte Folded Reload
|
||||
movq %rax, -312(%rbp) ## 8-byte Spill
|
||||
movq -72(%rbp), %rax ## 8-byte Reload
|
||||
addl $-1, %eax
|
||||
movq %rax, -72(%rbp) ## 8-byte Spill
|
||||
leaq -1(%rcx), %rax
|
||||
leaq -2(%rcx), %rdi
|
||||
movq %rdi, -424(%rbp) ## 8-byte Spill
|
||||
leaq 1(%rsi), %rdi
|
||||
movq %rdi, -224(%rbp) ## 8-byte Spill
|
||||
leaq (%rsi,%rcx), %rdi
|
||||
movq %rdi, -304(%rbp) ## 8-byte Spill
|
||||
movl %r9d, %edi
|
||||
movq %rdi, -256(%rbp) ## 8-byte Spill
|
||||
movq %rcx, -264(%rbp) ## 8-byte Spill
|
||||
leaq (%rbx,%rcx,8), %rcx
|
||||
addq $-8, %rcx
|
||||
movq %rcx, -352(%rbp) ## 8-byte Spill
|
||||
leal 6(%r9), %ecx
|
||||
andl $7, %ecx
|
||||
movq %rax, -448(%rbp) ## 8-byte Spill
|
||||
movq %rcx, -344(%rbp) ## 8-byte Spill
|
||||
subq %rcx, %rax
|
||||
movq %rsi, %rcx
|
||||
shlq $32, %rcx
|
||||
movq %rcx, -440(%rbp) ## 8-byte Spill
|
||||
leaq 1(%rax), %rcx
|
||||
movq %rcx, -328(%rbp) ## 8-byte Spill
|
||||
movq %rax, -336(%rbp) ## 8-byte Spill
|
||||
leal 1(%rax), %eax
|
||||
movl %eax, -212(%rbp) ## 4-byte Spill
|
||||
leaq 2(%rsi), %rax
|
||||
movq %rax, -296(%rbp) ## 8-byte Spill
|
||||
movq -80(%rbp), %rax ## 8-byte Reload
|
||||
leaq 8(%rax,%rsi,8), %rax
|
||||
movq %rax, -288(%rbp) ## 8-byte Spill
|
||||
leaq (,%rsi,8), %rax
|
||||
movq %rax, -432(%rbp) ## 8-byte Spill
|
||||
movq %rsi, -200(%rbp) ## 8-byte Spill
|
||||
leaq (%rbx,%rsi,8), %rax
|
||||
addq $8, %rax
|
||||
movq %rax, -280(%rbp) ## 8-byte Spill
|
||||
movl $1, %eax
|
||||
.p2align 4, 0x90
|
||||
LBB0_19: ## =>This Loop Header: Depth=1
|
||||
## Child Loop BB0_52 Depth 2
|
||||
## Child Loop BB0_37 Depth 3
|
||||
## Child Loop BB0_55 Depth 3
|
||||
cmpl $2, %r9d
|
||||
jle LBB0_58
|
||||
## %bb.20: ## in Loop: Header=BB0_19 Depth=1
|
||||
movq %rax, %rcx
|
||||
movq %rax, %r12
|
||||
movq -312(%rbp), %r15 ## 8-byte Reload
|
||||
imulq %r15, %r12
|
||||
leaq 1(%rax), %rax
|
||||
movl %r9d, %edi
|
||||
imull %ecx, %edi
|
||||
leal 1(%rdi), %r8d
|
||||
imull %r9d, %r8d
|
||||
addl $2, %edi
|
||||
imull %r9d, %edi
|
||||
movq %rax, -320(%rbp) ## 8-byte Spill
|
||||
movq %rax, %r13
|
||||
imulq %r15, %r13
|
||||
movq -224(%rbp), %rdx ## 8-byte Reload
|
||||
leaq (%rdx,%r13), %rax
|
||||
movq %rax, -408(%rbp) ## 8-byte Spill
|
||||
movq -304(%rbp), %rsi ## 8-byte Reload
|
||||
leaq (%rsi,%r13), %rax
|
||||
movq %rax, -400(%rbp) ## 8-byte Spill
|
||||
addq $-1, %rcx
|
||||
imulq %r15, %rcx
|
||||
leaq (%rdx,%rcx), %rax
|
||||
movq %rax, -392(%rbp) ## 8-byte Spill
|
||||
leaq (%rsi,%rcx), %rax
|
||||
movq %rax, -384(%rbp) ## 8-byte Spill
|
||||
movq -296(%rbp), %rax ## 8-byte Reload
|
||||
leal (%rax,%r12), %eax
|
||||
shlq $32, %rax
|
||||
movq %rax, -104(%rbp) ## 8-byte Spill
|
||||
movq -280(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%r13,8), %r10
|
||||
leaq (%rax,%rcx,8), %r11
|
||||
movl %r12d, %edx
|
||||
addq $1, %rdx
|
||||
movq -200(%rbp), %rax ## 8-byte Reload
|
||||
addq %rax, %r13
|
||||
movq %r13, -144(%rbp) ## 8-byte Spill
|
||||
addq %rax, %rcx
|
||||
movq %rcx, -152(%rbp) ## 8-byte Spill
|
||||
leal 2(%r8), %eax
|
||||
movq %rax, -240(%rbp) ## 8-byte Spill
|
||||
leal 1(%r12), %eax
|
||||
movq %rax, -416(%rbp) ## 8-byte Spill
|
||||
movq %rdi, %rax
|
||||
movq %rdi, -112(%rbp) ## 8-byte Spill
|
||||
leal 1(%rdi), %r15d
|
||||
movq -224(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%r12), %rcx
|
||||
leaq (%rsi,%r12), %rax
|
||||
movq %rax, -368(%rbp) ## 8-byte Spill
|
||||
movq -288(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%r12,8), %rsi
|
||||
leaq -8(%rax,%r12,8), %rax
|
||||
movq %rax, -136(%rbp) ## 8-byte Spill
|
||||
movq %r12, -120(%rbp) ## 8-byte Spill
|
||||
leaq 1(%r12), %rax
|
||||
movq %rax, -360(%rbp) ## 8-byte Spill
|
||||
leal -1(%r8), %eax
|
||||
movl %eax, -124(%rbp) ## 4-byte Spill
|
||||
movq %rcx, -376(%rbp) ## 8-byte Spill
|
||||
movq %rcx, -272(%rbp) ## 8-byte Spill
|
||||
movq %r8, -248(%rbp) ## 8-byte Spill
|
||||
movq %r8, %rdi
|
||||
movq %r15, -232(%rbp) ## 8-byte Spill
|
||||
movq %r15, %r8
|
||||
xorl %r12d, %r12d
|
||||
movl $1, %eax
|
||||
jmp LBB0_52
|
||||
.p2align 4, 0x90
|
||||
LBB0_21: ## in Loop: Header=BB0_52 Depth=2
|
||||
movl %r9d, %edx
|
||||
imull %r12d, %edx
|
||||
movq -248(%rbp), %rax ## 8-byte Reload
|
||||
leal (%rax,%rdx), %ecx
|
||||
movq -424(%rbp), %rax ## 8-byte Reload
|
||||
leal (%rcx,%rax), %esi
|
||||
cmpl %ecx, %esi
|
||||
jl LBB0_53
|
||||
## %bb.22: ## in Loop: Header=BB0_52 Depth=2
|
||||
movq %rax, %rcx
|
||||
shrq $32, %rcx
|
||||
jne LBB0_53
|
||||
## %bb.23: ## in Loop: Header=BB0_52 Depth=2
|
||||
movq -240(%rbp), %rsi ## 8-byte Reload
|
||||
leal (%rsi,%rdx), %esi
|
||||
leal (%rsi,%rax), %edi
|
||||
cmpl %esi, %edi
|
||||
jl LBB0_53
|
||||
## %bb.24: ## in Loop: Header=BB0_52 Depth=2
|
||||
testq %rcx, %rcx
|
||||
jne LBB0_53
|
||||
## %bb.25: ## in Loop: Header=BB0_52 Depth=2
|
||||
movq -416(%rbp), %rsi ## 8-byte Reload
|
||||
leal (%rsi,%rdx), %esi
|
||||
leal (%rsi,%rax), %edi
|
||||
cmpl %esi, %edi
|
||||
jl LBB0_53
|
||||
## %bb.26: ## in Loop: Header=BB0_52 Depth=2
|
||||
testq %rcx, %rcx
|
||||
jne LBB0_53
|
||||
## %bb.27: ## in Loop: Header=BB0_52 Depth=2
|
||||
addl -232(%rbp), %edx ## 4-byte Folded Reload
|
||||
leal (%rdx,%rax), %esi
|
||||
cmpl %edx, %esi
|
||||
jl LBB0_53
|
||||
## %bb.28: ## in Loop: Header=BB0_52 Depth=2
|
||||
testq %rcx, %rcx
|
||||
jne LBB0_53
|
||||
## %bb.29: ## in Loop: Header=BB0_52 Depth=2
|
||||
movq -192(%rbp), %rdx ## 8-byte Reload
|
||||
movq %rdx, %rsi
|
||||
imulq -200(%rbp), %rsi ## 8-byte Folded Reload
|
||||
movq -376(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%rsi), %rdi
|
||||
movq -368(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%rsi), %r13
|
||||
movq -408(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%rsi), %r11
|
||||
movq -400(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%rsi), %rcx
|
||||
movq -392(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%rsi), %r10
|
||||
addq -384(%rbp), %rsi ## 8-byte Folded Reload
|
||||
## kill: def $edx killed $edx killed $rdx def $rdx
|
||||
imull -256(%rbp), %edx ## 4-byte Folded Reload
|
||||
movq -232(%rbp), %rax ## 8-byte Reload
|
||||
leal (%rax,%rdx), %r12d
|
||||
movq -360(%rbp), %rax ## 8-byte Reload
|
||||
leal (%rax,%rdx), %r9d
|
||||
movq -240(%rbp), %rax ## 8-byte Reload
|
||||
leal (%rax,%rdx), %eax
|
||||
movl %eax, -60(%rbp) ## 4-byte Spill
|
||||
addl -248(%rbp), %edx ## 4-byte Folded Reload
|
||||
movq -80(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%rdi,8), %rdi
|
||||
leaq (%rbx,%rcx,8), %rcx
|
||||
cmpq %rcx, %rdi
|
||||
leaq (%rax,%r13,8), %rcx
|
||||
leaq (%rbx,%r11,8), %r11
|
||||
setb -45(%rbp) ## 1-byte Folded Spill
|
||||
cmpq %rcx, %r11
|
||||
leaq (%rbx,%r10,8), %r10
|
||||
leaq (%rbx,%rsi,8), %r11
|
||||
movslq %r12d, %rsi
|
||||
setb -44(%rbp) ## 1-byte Folded Spill
|
||||
cmpq %r11, %rdi
|
||||
setb %r12b
|
||||
cmpq %rcx, %r10
|
||||
leaq (%rbx,%rsi,8), %r10
|
||||
movq -352(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%rsi,8), %rsi
|
||||
movslq %r9d, %r9
|
||||
setb -43(%rbp) ## 1-byte Folded Spill
|
||||
cmpq %rsi, %rdi
|
||||
setb %r11b
|
||||
cmpq %rcx, %r10
|
||||
leaq (%rbx,%r9,8), %r10
|
||||
leaq (%rax,%r9,8), %rsi
|
||||
movslq -60(%rbp), %r9 ## 4-byte Folded Reload
|
||||
setb -60(%rbp) ## 1-byte Folded Spill
|
||||
cmpq %rsi, %rdi
|
||||
setb %r13b
|
||||
cmpq %rcx, %r10
|
||||
leaq (%rbx,%r9,8), %r10
|
||||
leaq (%rax,%r9,8), %rsi
|
||||
movslq %edx, %rdx
|
||||
setb -42(%rbp) ## 1-byte Folded Spill
|
||||
cmpq %rsi, %rdi
|
||||
setb %r9b
|
||||
cmpq %rcx, %r10
|
||||
leaq (%rax,%rdx,8), %rsi
|
||||
setb -41(%rbp) ## 1-byte Folded Spill
|
||||
cmpq %rsi, %rdi
|
||||
leaq (%rbx,%rdx,8), %rdx
|
||||
setb %r10b
|
||||
cmpq %rcx, %rdx
|
||||
setb %dl
|
||||
leaq -55(%rbp), %rax
|
||||
cmpq %rdi, %rax
|
||||
seta %dil
|
||||
leaq -56(%rbp), %rax
|
||||
cmpq %rcx, %rax
|
||||
setb %al
|
||||
movb -44(%rbp), %cl ## 1-byte Reload
|
||||
testb %cl, -45(%rbp) ## 1-byte Folded Reload
|
||||
jne LBB0_53
|
||||
## %bb.30: ## in Loop: Header=BB0_52 Depth=2
|
||||
andb -43(%rbp), %r12b ## 1-byte Folded Reload
|
||||
jne LBB0_53
|
||||
## %bb.31: ## in Loop: Header=BB0_52 Depth=2
|
||||
andb -60(%rbp), %r11b ## 1-byte Folded Reload
|
||||
jne LBB0_53
|
||||
## %bb.32: ## in Loop: Header=BB0_52 Depth=2
|
||||
andb -42(%rbp), %r13b ## 1-byte Folded Reload
|
||||
jne LBB0_53
|
||||
## %bb.33: ## in Loop: Header=BB0_52 Depth=2
|
||||
andb -41(%rbp), %r9b ## 1-byte Folded Reload
|
||||
jne LBB0_53
|
||||
## %bb.34: ## in Loop: Header=BB0_52 Depth=2
|
||||
movl $1, %r9d
|
||||
andb %dl, %r10b
|
||||
jne LBB0_54
|
||||
## %bb.35: ## in Loop: Header=BB0_52 Depth=2
|
||||
andb %al, %dil
|
||||
jne LBB0_54
|
||||
## %bb.36: ## in Loop: Header=BB0_52 Depth=2
|
||||
vbroadcastsd -56(%rbp), %zmm0
|
||||
movq -104(%rbp), %rdx ## 8-byte Reload
|
||||
xorl %esi, %esi
|
||||
movq -336(%rbp), %r9 ## 8-byte Reload
|
||||
movabsq $34359738368, %rdi ## imm = 0x800000000
|
||||
movq %rdi, %r10
|
||||
movq -184(%rbp), %r11 ## 8-byte Reload
|
||||
movq -176(%rbp), %r15 ## 8-byte Reload
|
||||
movq -168(%rbp), %r12 ## 8-byte Reload
|
||||
movq -88(%rbp), %rdi ## 8-byte Reload
|
||||
movq -160(%rbp), %rax ## 8-byte Reload
|
||||
.p2align 4, 0x90
|
||||
movl $111, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
LBB0_37: ## Parent Loop BB0_19 Depth=1
|
||||
## Parent Loop BB0_52 Depth=2
|
||||
## => This Inner Loop Header: Depth=3
|
||||
leal (%rax,%rsi), %ecx
|
||||
movslq %ecx, %rcx
|
||||
vmovupd (%rbx,%rcx,8), %zmm1
|
||||
movq %rdx, %rcx
|
||||
sarq $29, %rcx
|
||||
vaddpd (%rbx,%rcx), %zmm1, %zmm1
|
||||
leal (%r12,%rsi), %ecx
|
||||
movslq %ecx, %rcx
|
||||
vaddpd (%rbx,%rcx,8), %zmm1, %zmm1
|
||||
leal (%r8,%rsi), %ecx
|
||||
movslq %ecx, %rcx
|
||||
vaddpd (%rbx,%rcx,8), %zmm1, %zmm1
|
||||
vaddpd (%r15,%rsi,8), %zmm1, %zmm1
|
||||
vaddpd (%r11,%rsi,8), %zmm1, %zmm1
|
||||
vmulpd %zmm0, %zmm1, %zmm1
|
||||
vmovupd %zmm1, (%rdi,%rsi,8)
|
||||
addq $8, %rsi
|
||||
addq %r10, %rdx
|
||||
cmpq %rsi, %r9
|
||||
jne LBB0_37
|
||||
movl $222, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
## %bb.38: ## in Loop: Header=BB0_52 Depth=2
|
||||
movq -328(%rbp), %r9 ## 8-byte Reload
|
||||
movl -212(%rbp), %eax ## 4-byte Reload
|
||||
movl %eax, %r15d
|
||||
cmpl $0, -344(%rbp) ## 4-byte Folded Reload
|
||||
jne LBB0_54
|
||||
jmp LBB0_56
|
||||
.p2align 4, 0x90
|
||||
LBB0_52: ## Parent Loop BB0_19 Depth=1
|
||||
## => This Loop Header: Depth=2
|
||||
## Child Loop BB0_37 Depth 3
|
||||
## Child Loop BB0_55 Depth 3
|
||||
movq %rdx, -168(%rbp) ## 8-byte Spill
|
||||
addq $1, %rax
|
||||
movl $1, %r15d
|
||||
cmpq $8, -448(%rbp) ## 8-byte Folded Reload
|
||||
movq %r10, -184(%rbp) ## 8-byte Spill
|
||||
movq %r11, -176(%rbp) ## 8-byte Spill
|
||||
movq %rsi, -88(%rbp) ## 8-byte Spill
|
||||
movq %rdi, -160(%rbp) ## 8-byte Spill
|
||||
movq %r12, -192(%rbp) ## 8-byte Spill
|
||||
movq %rax, -208(%rbp) ## 8-byte Spill
|
||||
jae LBB0_21
|
||||
LBB0_53: ## in Loop: Header=BB0_52 Depth=2
|
||||
movl $1, %r9d
|
||||
LBB0_54: ## in Loop: Header=BB0_52 Depth=2
|
||||
movq -136(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%rax,%r9,8), %rdx
|
||||
movq -144(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%r9,%rax), %rcx
|
||||
leaq (%rbx,%rcx,8), %r11
|
||||
movq -152(%rbp), %rax ## 8-byte Reload
|
||||
leaq (%r9,%rax), %rcx
|
||||
leaq (%rbx,%rcx,8), %r10
|
||||
movq -272(%rbp), %rax ## 8-byte Reload
|
||||
leal (%r9,%rax), %r12d
|
||||
shlq $32, %r12
|
||||
movq -264(%rbp), %r13 ## 8-byte Reload
|
||||
subq %r9, %r13
|
||||
movq -112(%rbp), %rax ## 8-byte Reload
|
||||
leal (%r15,%rax), %esi
|
||||
movq -120(%rbp), %rax ## 8-byte Reload
|
||||
leal (%r15,%rax), %edi
|
||||
addl -124(%rbp), %r15d ## 4-byte Folded Reload
|
||||
xorl %ecx, %ecx
|
||||
.p2align 4, 0x90
|
||||
LBB0_55: ## Parent Loop BB0_19 Depth=1
|
||||
## Parent Loop BB0_52 Depth=2
|
||||
## => This Inner Loop Header: Depth=3
|
||||
leal (%r15,%rcx), %eax
|
||||
cltq
|
||||
vmovsd (%rbx,%rax,8), %xmm0 ## xmm0 = mem[0],zero
|
||||
movq %r12, %rax
|
||||
sarq $29, %rax
|
||||
vaddsd (%rbx,%rax), %xmm0, %xmm0
|
||||
leal (%rdi,%rcx), %eax
|
||||
cltq
|
||||
vaddsd (%rbx,%rax,8), %xmm0, %xmm0
|
||||
leal (%rsi,%rcx), %eax
|
||||
cltq
|
||||
vaddsd (%rbx,%rax,8), %xmm0, %xmm0
|
||||
vaddsd (%r10,%rcx,8), %xmm0, %xmm0
|
||||
vaddsd (%r11,%rcx,8), %xmm0, %xmm0
|
||||
vmulsd -56(%rbp), %xmm0, %xmm0
|
||||
vmovsd %xmm0, (%rdx,%rcx,8)
|
||||
addq $1, %rcx
|
||||
addq %r14, %r12
|
||||
cmpq %rcx, %r13
|
||||
jne LBB0_55
|
||||
LBB0_56: ## in Loop: Header=BB0_52 Depth=2
|
||||
movq -192(%rbp), %r12 ## 8-byte Reload
|
||||
addq $1, %r12
|
||||
movq -104(%rbp), %rax ## 8-byte Reload
|
||||
addq -440(%rbp), %rax ## 8-byte Folded Reload
|
||||
movq %rax, -104(%rbp) ## 8-byte Spill
|
||||
movq -432(%rbp), %rcx ## 8-byte Reload
|
||||
movq -88(%rbp), %rsi ## 8-byte Reload
|
||||
addq %rcx, %rsi
|
||||
movq -184(%rbp), %r10 ## 8-byte Reload
|
||||
addq %rcx, %r10
|
||||
movq -176(%rbp), %r11 ## 8-byte Reload
|
||||
addq %rcx, %r11
|
||||
movq -256(%rbp), %rax ## 8-byte Reload
|
||||
addq %rax, %r8
|
||||
movq -168(%rbp), %rdx ## 8-byte Reload
|
||||
addq %rax, %rdx
|
||||
movq -160(%rbp), %rdi ## 8-byte Reload
|
||||
addq %rax, %rdi
|
||||
addq %rcx, -136(%rbp) ## 8-byte Folded Spill
|
||||
movq -200(%rbp), %rax ## 8-byte Reload
|
||||
addq %rax, -144(%rbp) ## 8-byte Folded Spill
|
||||
addq %rax, -152(%rbp) ## 8-byte Folded Spill
|
||||
addq %rax, -272(%rbp) ## 8-byte Folded Spill
|
||||
movq -96(%rbp), %r9 ## 8-byte Reload
|
||||
movq -112(%rbp), %rax ## 8-byte Reload
|
||||
addl %r9d, %eax
|
||||
movq %rax, -112(%rbp) ## 8-byte Spill
|
||||
movq -120(%rbp), %rax ## 8-byte Reload
|
||||
addl %r9d, %eax
|
||||
movq %rax, -120(%rbp) ## 8-byte Spill
|
||||
addl %r9d, -124(%rbp) ## 4-byte Folded Spill
|
||||
movq -208(%rbp), %rax ## 8-byte Reload
|
||||
cmpq -264(%rbp), %rax ## 8-byte Folded Reload
|
||||
jne LBB0_52
|
||||
## %bb.57: ## in Loop: Header=BB0_19 Depth=1
|
||||
movq -320(%rbp), %rcx ## 8-byte Reload
|
||||
movq %rcx, %rax
|
||||
cmpq -72(%rbp), %rcx ## 8-byte Folded Reload
|
||||
jne LBB0_19
|
||||
jmp LBB0_59
|
||||
.p2align 4, 0x90
|
||||
LBB0_58: ## in Loop: Header=BB0_19 Depth=1
|
||||
movq %rax, %rcx
|
||||
addq $1, %rcx
|
||||
movq %rcx, %rax
|
||||
cmpq -72(%rbp), %rcx ## 8-byte Folded Reload
|
||||
jne LBB0_19
|
||||
LBB0_59:
|
||||
movq _var_false@GOTPCREL(%rip), %rax
|
||||
cmpl $0, (%rax)
|
||||
je LBB0_61
|
||||
## %bb.60:
|
||||
movq %rbx, %rdi
|
||||
vzeroupper
|
||||
callq _dummy
|
||||
movq -80(%rbp), %rdi ## 8-byte Reload
|
||||
callq _dummy
|
||||
leaq -56(%rbp), %rdi
|
||||
callq _dummy
|
||||
LBB0_61:
|
||||
xorl %eax, %eax
|
||||
addq $408, %rsp ## imm = 0x198
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
vzeroupper
|
||||
retq
|
||||
.cfi_endproc
|
||||
## -- End function
|
||||
|
||||
.subsections_via_symbols
|
||||
Binary file not shown.
@@ -1,196 +0,0 @@
|
||||
# mark_description "Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 16.0.3.210 Build 20160415";
|
||||
# mark_description "-I../../iaca-lin64/include -fno-alias -O3 -fopenmp -xCORE-AVX-I -S -o ivb-asm.S";
|
||||
.file "taxCalc.c"
|
||||
.text
|
||||
..TXTST0:
|
||||
# -- Begin main
|
||||
.text
|
||||
# mark_begin;
|
||||
.align 16,0x90
|
||||
.globl main
|
||||
# --- main(void)
|
||||
main:
|
||||
..B1.1: # Preds ..B1.0
|
||||
.cfi_startproc
|
||||
..___tag_value_main.1:
|
||||
..L2:
|
||||
#4.15
|
||||
pushq %rbp #4.15
|
||||
.cfi_def_cfa_offset 16
|
||||
movq %rsp, %rbp #4.15
|
||||
.cfi_def_cfa 6, 16
|
||||
.cfi_offset 6, -16
|
||||
andq $-128, %rsp #4.15
|
||||
subq $4096, %rsp #4.15
|
||||
movl $104446, %esi #4.15
|
||||
movl $3, %edi #4.15
|
||||
call __intel_new_feature_proc_init #4.15
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.10: # Preds ..B1.1
|
||||
vstmxcsr (%rsp) #4.15
|
||||
movl $.2.3_2_kmpc_loc_struct_pack.3, %edi #4.15
|
||||
xorl %esi, %esi #4.15
|
||||
orl $32832, (%rsp) #4.15
|
||||
xorl %eax, %eax #4.15
|
||||
vldmxcsr (%rsp) #4.15
|
||||
..___tag_value_main.6:
|
||||
call __kmpc_begin #4.15
|
||||
..___tag_value_main.7:
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.2: # Preds ..B1.10
|
||||
movl $il0_peep_printf_format_0, %edi #5.5
|
||||
call puts #5.5
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.3: # Preds ..B1.2
|
||||
vmovss .L_2il0floatpacket.0(%rip), %xmm0 #8.15
|
||||
xorl %eax, %eax #11.5
|
||||
vxorps %xmm1, %xmm1, %xmm1 #9.5
|
||||
vmovss %xmm1, (%rsp) #9.5
|
||||
|
||||
movl $111,%ebx
|
||||
.byte 100,103,144
|
||||
|
||||
..B1.4: # Preds ..B1.4 ..B1.3
|
||||
lea 1(%rax,%rax), %edx #12.9
|
||||
vcvtsi2ss %edx, %xmm2, %xmm2 #12.27
|
||||
vmulss %xmm2, %xmm0, %xmm3 #12.29
|
||||
lea 2(%rax,%rax), %ecx #12.9
|
||||
vaddss %xmm3, %xmm1, %xmm4 #12.29
|
||||
vxorps %xmm1, %xmm1, %xmm1 #12.27
|
||||
vcvtsi2ss %ecx, %xmm1, %xmm1 #12.27
|
||||
vmulss %xmm1, %xmm0, %xmm5 #12.29
|
||||
vmovss %xmm4, 4(%rsp,%rax,8) #12.9
|
||||
vaddss %xmm5, %xmm4, %xmm1 #12.29
|
||||
vmovss %xmm1, 8(%rsp,%rax,8) #12.9
|
||||
incq %rax #11.5
|
||||
cmpq $499, %rax #11.5
|
||||
jb ..B1.4 # Prob 99% #11.5
|
||||
# LOE rax rbx r12 r13 r14 r15 xmm0 xmm1
|
||||
movl $222,%ebx
|
||||
.byte 100,103,144
|
||||
..B1.5: # Preds ..B1.4
|
||||
vmovss 3992(%rsp), %xmm0 #12.18
|
||||
movl $il0_peep_printf_format_1, %edi #15.5
|
||||
vaddss .L_2il0floatpacket.1(%rip), %xmm0, %xmm1 #12.29
|
||||
vmovss %xmm1, 3996(%rsp) #12.9
|
||||
call puts #15.5
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.6: # Preds ..B1.5
|
||||
movl $.2.3_2_kmpc_loc_struct_pack.14, %edi #16.12
|
||||
xorl %eax, %eax #16.12
|
||||
..___tag_value_main.8:
|
||||
call __kmpc_end #16.12
|
||||
..___tag_value_main.9:
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.7: # Preds ..B1.6
|
||||
xorl %eax, %eax #16.12
|
||||
movq %rbp, %rsp #16.12
|
||||
popq %rbp #16.12
|
||||
.cfi_def_cfa 7, 8
|
||||
.cfi_restore 6
|
||||
ret #16.12
|
||||
.align 16,0x90
|
||||
.cfi_endproc
|
||||
# LOE
|
||||
# mark_end;
|
||||
.type main,@function
|
||||
.size main,.-main
|
||||
.data
|
||||
.align 4
|
||||
.align 4
|
||||
.2.3_2_kmpc_loc_struct_pack.3:
|
||||
.long 0
|
||||
.long 2
|
||||
.long 0
|
||||
.long 0
|
||||
.quad .2.3_2__kmpc_loc_pack.2
|
||||
.align 4
|
||||
.2.3_2__kmpc_loc_pack.2:
|
||||
.byte 59
|
||||
.byte 117
|
||||
.byte 110
|
||||
.byte 107
|
||||
.byte 110
|
||||
.byte 111
|
||||
.byte 119
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 109
|
||||
.byte 97
|
||||
.byte 105
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 52
|
||||
.byte 59
|
||||
.byte 52
|
||||
.byte 59
|
||||
.byte 59
|
||||
.space 1, 0x00 # pad
|
||||
.align 4
|
||||
.2.3_2_kmpc_loc_struct_pack.14:
|
||||
.long 0
|
||||
.long 2
|
||||
.long 0
|
||||
.long 0
|
||||
.quad .2.3_2__kmpc_loc_pack.13
|
||||
.align 4
|
||||
.2.3_2__kmpc_loc_pack.13:
|
||||
.byte 59
|
||||
.byte 117
|
||||
.byte 110
|
||||
.byte 107
|
||||
.byte 110
|
||||
.byte 111
|
||||
.byte 119
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 109
|
||||
.byte 97
|
||||
.byte 105
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 49
|
||||
.byte 54
|
||||
.byte 59
|
||||
.byte 49
|
||||
.byte 54
|
||||
.byte 59
|
||||
.byte 59
|
||||
.section .rodata.str1.4, "aMS",@progbits,1
|
||||
.align 4
|
||||
.align 4
|
||||
il0_peep_printf_format_0:
|
||||
.long 1128354639
|
||||
.long 1702109249
|
||||
.long 1931506803
|
||||
.long 1953653108
|
||||
.byte 0
|
||||
.space 3, 0x00 # pad
|
||||
.align 4
|
||||
il0_peep_printf_format_1:
|
||||
.long 1128354639
|
||||
.long 1702109249
|
||||
.long 1696625779
|
||||
.word 25710
|
||||
.byte 0
|
||||
.data
|
||||
# -- End main
|
||||
.section .rodata, "a"
|
||||
.align 4
|
||||
.align 4
|
||||
.L_2il0floatpacket.0:
|
||||
.long 0x3e428f5c
|
||||
.type .L_2il0floatpacket.0,@object
|
||||
.size .L_2il0floatpacket.0,4
|
||||
.align 4
|
||||
.L_2il0floatpacket.1:
|
||||
.long 0x433dcf5c
|
||||
.type .L_2il0floatpacket.1,@object
|
||||
.size .L_2il0floatpacket.1,4
|
||||
.data
|
||||
.section .note.GNU-stack, ""
|
||||
// -- Begin DWARF2 SEGMENT .eh_frame
|
||||
.section .eh_frame,"a",@progbits
|
||||
.eh_frame_seg:
|
||||
.align 8
|
||||
# End
|
||||
@@ -1,201 +0,0 @@
|
||||
# mark_description "Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 16.0.3.210 Build 20160415";
|
||||
# mark_description "-I../../iaca-lin64/include -fno-alias -O3 -fopenmp -xCORE-AVX-I -S -o ivb-asm.S";
|
||||
.file "taxCalc.c"
|
||||
.text
|
||||
..TXTST0:
|
||||
# -- Begin main
|
||||
.text
|
||||
# mark_begin;
|
||||
.align 16,0x90
|
||||
.globl main
|
||||
# --- main(void)
|
||||
main:
|
||||
..B1.1: # Preds ..B1.0
|
||||
.cfi_startproc
|
||||
..___tag_value_main.1:
|
||||
..L2:
|
||||
#4.15
|
||||
pushq %rbp #4.15
|
||||
.cfi_def_cfa_offset 16
|
||||
movq %rsp, %rbp #4.15
|
||||
.cfi_def_cfa 6, 16
|
||||
.cfi_offset 6, -16
|
||||
andq $-128, %rsp #4.15
|
||||
subq $4096, %rsp #4.15
|
||||
movl $104446, %esi #4.15
|
||||
movl $3, %edi #4.15
|
||||
call __intel_new_feature_proc_init #4.15
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.10: # Preds ..B1.1
|
||||
vstmxcsr (%rsp) #4.15
|
||||
movl $.2.3_2_kmpc_loc_struct_pack.3, %edi #4.15
|
||||
xorl %esi, %esi #4.15
|
||||
orl $32832, (%rsp) #4.15
|
||||
xorl %eax, %eax #4.15
|
||||
vldmxcsr (%rsp) #4.15
|
||||
..___tag_value_main.6:
|
||||
call __kmpc_begin #4.15
|
||||
..___tag_value_main.7:
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.2: # Preds ..B1.10
|
||||
movl $il0_peep_printf_format_0, %edi #5.5
|
||||
call puts #5.5
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.3: # Preds ..B1.2
|
||||
vmovss .L_2il0floatpacket.0(%rip), %xmm0 #8.15
|
||||
xorl %eax, %eax #11.5
|
||||
vxorps %xmm1, %xmm1, %xmm1 #9.5
|
||||
vmovss %xmm1, (%rsp) #9.5
|
||||
|
||||
# LOE rax rbx r12 r13 r14 r15 xmm0 xmm1
|
||||
movl $111, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 100
|
||||
.byte 103
|
||||
.byte 144
|
||||
..B1.4: # Preds ..B1.4 ..B1.3
|
||||
lea 1(%rax,%rax), %edx #12.9
|
||||
vcvtsi2ss %edx, %xmm2, %xmm2 #12.27
|
||||
vmulss %xmm2, %xmm0, %xmm3 #12.29
|
||||
lea 2(%rax,%rax), %ecx #12.9
|
||||
vaddss %xmm3, %xmm1, %xmm4 #12.29
|
||||
vxorps %xmm1, %xmm1, %xmm1 #12.27
|
||||
vcvtsi2ss %ecx, %xmm1, %xmm1 #12.27
|
||||
vmulss %xmm1, %xmm0, %xmm5 #12.29
|
||||
vmovss %xmm4, 4(%rsp,%rax,8) #12.9
|
||||
vaddss %xmm5, %xmm4, %xmm1 #12.29
|
||||
vmovss %xmm1, 8(%rsp,%rax,8) #12.9
|
||||
incq %rax #11.5
|
||||
cmpq $499, %rax #11.5
|
||||
jb ..B1.4 # Prob 99% #11.5
|
||||
movl $222, %ebx
|
||||
.byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
.byte 103
|
||||
.byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY
|
||||
# LOE rax rbx r12 r13 r14 r15 xmm0 xmm1
|
||||
|
||||
..B1.5: # Preds ..B1.4
|
||||
vmovss 3992(%rsp), %xmm0 #12.18
|
||||
movl $il0_peep_printf_format_1, %edi #15.5
|
||||
vaddss .L_2il0floatpacket.1(%rip), %xmm0, %xmm1 #12.29
|
||||
vmovss %xmm1, 3996(%rsp) #12.9
|
||||
call puts #15.5
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.6: # Preds ..B1.5
|
||||
movl $.2.3_2_kmpc_loc_struct_pack.14, %edi #16.12
|
||||
xorl %eax, %eax #16.12
|
||||
..___tag_value_main.8:
|
||||
call __kmpc_end #16.12
|
||||
..___tag_value_main.9:
|
||||
# LOE rbx r12 r13 r14 r15
|
||||
..B1.7: # Preds ..B1.6
|
||||
xorl %eax, %eax #16.12
|
||||
movq %rbp, %rsp #16.12
|
||||
popq %rbp #16.12
|
||||
.cfi_def_cfa 7, 8
|
||||
.cfi_restore 6
|
||||
ret #16.12
|
||||
.align 16,0x90
|
||||
.cfi_endproc
|
||||
# LOE
|
||||
# mark_end;
|
||||
.type main,@function
|
||||
.size main,.-main
|
||||
.data
|
||||
.align 4
|
||||
.align 4
|
||||
.2.3_2_kmpc_loc_struct_pack.3:
|
||||
.long 0
|
||||
.long 2
|
||||
.long 0
|
||||
.long 0
|
||||
.quad .2.3_2__kmpc_loc_pack.2
|
||||
.align 4
|
||||
.2.3_2__kmpc_loc_pack.2:
|
||||
.byte 59
|
||||
.byte 117
|
||||
.byte 110
|
||||
.byte 107
|
||||
.byte 110
|
||||
.byte 111
|
||||
.byte 119
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 109
|
||||
.byte 97
|
||||
.byte 105
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 52
|
||||
.byte 59
|
||||
.byte 52
|
||||
.byte 59
|
||||
.byte 59
|
||||
.space 1, 0x00 # pad
|
||||
.align 4
|
||||
.2.3_2_kmpc_loc_struct_pack.14:
|
||||
.long 0
|
||||
.long 2
|
||||
.long 0
|
||||
.long 0
|
||||
.quad .2.3_2__kmpc_loc_pack.13
|
||||
.align 4
|
||||
.2.3_2__kmpc_loc_pack.13:
|
||||
.byte 59
|
||||
.byte 117
|
||||
.byte 110
|
||||
.byte 107
|
||||
.byte 110
|
||||
.byte 111
|
||||
.byte 119
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 109
|
||||
.byte 97
|
||||
.byte 105
|
||||
.byte 110
|
||||
.byte 59
|
||||
.byte 49
|
||||
.byte 54
|
||||
.byte 59
|
||||
.byte 49
|
||||
.byte 54
|
||||
.byte 59
|
||||
.byte 59
|
||||
.section .rodata.str1.4, "aMS",@progbits,1
|
||||
.align 4
|
||||
.align 4
|
||||
il0_peep_printf_format_0:
|
||||
.long 1128354639
|
||||
.long 1702109249
|
||||
.long 1931506803
|
||||
.long 1953653108
|
||||
.byte 0
|
||||
.space 3, 0x00 # pad
|
||||
.align 4
|
||||
il0_peep_printf_format_1:
|
||||
.long 1128354639
|
||||
.long 1702109249
|
||||
.long 1696625779
|
||||
.word 25710
|
||||
.byte 0
|
||||
.data
|
||||
# -- End main
|
||||
.section .rodata, "a"
|
||||
.align 4
|
||||
.align 4
|
||||
.L_2il0floatpacket.0:
|
||||
.long 0x3e428f5c
|
||||
.type .L_2il0floatpacket.0,@object
|
||||
.size .L_2il0floatpacket.0,4
|
||||
.align 4
|
||||
.L_2il0floatpacket.1:
|
||||
.long 0x433dcf5c
|
||||
.type .L_2il0floatpacket.1,@object
|
||||
.size .L_2il0floatpacket.1,4
|
||||
.data
|
||||
.section .note.GNU-stack, ""
|
||||
// -- Begin DWARF2 SEGMENT .eh_frame
|
||||
.section .eh_frame,"a",@progbits
|
||||
.eh_frame_seg:
|
||||
.align 8
|
||||
# End
|
||||
Reference in New Issue
Block a user