Compare commits

...

85 Commits

Author SHA1 Message Date
Julian Hammer
4ff8fdc4ab version bump 2020-11-11 15:14:27 +01:00
JanLJL
c204096d74 fixed typo 2020-11-11 14:11:00 +01:00
JanLJL
dea217c12c fixed test after changing TP value of instruction 2020-11-11 14:04:07 +01:00
JanLJL
92c162daa2 new instructions 2020-11-11 13:54:23 +01:00
JanLJL
87ea8f0f0a new instructions 2020-11-11 12:27:49 +01:00
Julian Hammer
cb04efc384 fixed typo 2020-11-10 13:33:24 +01:00
JanLJL
14c0ea6180 bugfixes 2020-11-09 23:29:42 +01:00
Julian Hammer
314ff4cf9d improved performance of arch_semantics and reg dependency matching 2020-11-09 19:27:47 +01:00
Julian Hammer
f64253b2b9 added dict for instruction lookup 2020-11-09 17:00:46 +01:00
Julian Hammer
979d08358e singelton for isa parsers 2020-11-09 12:36:14 +01:00
Julian Hammer
a2dd6f752d added comment 2020-11-09 12:35:13 +01:00
Julian Hammer
2fb36406a7 performance improvement of throughput summation 2020-11-09 12:01:00 +01:00
Julian Hammer
94086033a8 added __main__.py 2020-11-09 08:27:31 +01:00
JanLJL
75edfc808a version bump 2020-11-06 20:40:13 +01:00
JanLJL
c8c077a834 enhanced length warning 2020-11-06 15:49:13 +01:00
JanLJL
26ee005adc added missing test file 2020-11-06 15:07:57 +01:00
JanLJL
207c53aaad minor bugfix in HW model and added user warnings for more insight 2020-11-06 15:06:36 +01:00
JanLJL
fafd7bc526 Merge branch 'master' of https://github.com/RRZE-HPC/OSACA 2020-11-06 12:57:46 +01:00
JanLJL
b986d7eba0 added --lines option 2020-11-06 12:57:41 +01:00
Julian Hammer
6b0adb5d68 improved cache handing (always hashing original file) 2020-11-06 12:27:34 +01:00
JanLJL
f9f382a948 bugfixes 2020-11-06 12:03:54 +01:00
Julian Hammer
c6b58c63ab Merge branch 'master' of github.com:RRZE-HPC/OSACA 2020-11-03 16:28:28 +01:00
Julian Hammer
78530bfdb0 fail-safed _build_cache.py 2020-11-03 16:28:07 +01:00
JanLJL
5aa0899961 added bdist 2020-11-03 16:10:46 +01:00
JanLJL
7f0abd7d10 version bump 2020-11-02 15:48:19 +01:00
JanLJL
9ba9bab107 try different ISA as fallback when parsing without --arch flag, use SKX as x86 default and enhanced ISA detection heuristic 2020-11-02 15:33:50 +01:00
Julian Hammer
983e66938c version bump 2020-10-29 13:15:23 +01:00
JanLJL
1c889fa785 Merge branch 'master' of https://github.com/RRZE-HPC/OSACA 2020-10-29 13:00:09 +01:00
JanLJL
022598d94f autodetect ISA and default uarch for ISA 2020-10-29 13:00:02 +01:00
Julian
1f5c9d1c61 using travis-ci.com badge 2020-10-29 12:45:39 +01:00
JanLJL
30e0ad038d ignore pickles in data/ and support py3.9 2020-10-29 11:06:20 +01:00
Julian Hammer
decec86e56 fixed py3.5 compatability 2020-10-29 10:59:00 +01:00
JanLJL
9af689b28c fixed bug in tests and removed unused imports 2020-10-28 19:29:48 +01:00
Julian Hammer
3aea3f2b49 Merge branch 'master' of github.com:RRZE-HPC/OSACA 2020-10-28 17:16:43 +01:00
Julian Hammer
a6cb09cf1f added cache files to package and building during setup 2020-10-28 17:16:03 +01:00
Julian Hammer
9d2ea8603f new caching structure with support for distribution 2020-10-28 16:29:55 +01:00
JanLJL
a7918db145 enhanced hanlding for immediates with shifting 2020-10-21 12:14:21 +02:00
Julian Hammer
b5b1a1f2b2 version bump 2020-10-20 14:36:43 +02:00
Julian
dd59af16b2 Merge pull request #51 from RRZE-HPC/A64FX
A64FX support and several Arm bugfixes and enhancements including better TP scheduling
2020-10-16 10:44:47 +02:00
JanLJL
d9325724e2 removed duplicate cmp entry 2020-10-16 10:11:51 +02:00
JanLJL
7e7269c2bc refactored operand checking in post-processing 2020-10-16 10:05:08 +02:00
JanLJL
c64a24ae1b no \t replacement before any other point than user output 2020-10-16 09:44:18 +02:00
JanLJL
e8b78e4cc6 Merge branch 'master' into A64FX 2020-10-15 22:44:12 +02:00
JanLJL
cd5a706f56 adjusted tests for AArch64 2020-10-15 17:56:08 +02:00
Jan
13426358d0 Merge pull request #50 from RRZE-HPC/fix/increment_handling
Fixing Increment Handling
2020-10-15 17:00:11 +02:00
Julian Hammer
c80088b628 Merge branch 'master' into fix/increment_handling 2020-10-15 16:36:29 +02:00
Julian Hammer
748474cd81 added more cmp versions 2020-10-15 16:23:14 +02:00
Julian Hammer
2fec0bf810 Merge branch 'master' into fix/increment_handling 2020-10-15 13:55:34 +02:00
Julian Hammer
711a41d18e extended and cleaned up marker tests 2020-10-15 13:54:18 +02:00
Julian Hammer
cf4a9cddcb Merge branch 'master' into fix/increment_handling 2020-10-15 13:17:02 +02:00
Julian Hammer
5a5a1e74f5 added CMP to aarch64 to exclude first op from destinations 2020-10-15 13:15:54 +02:00
Julian Hammer
4865e7ea72 fixed ignoring of last line without end marker 2020-10-15 11:59:51 +02:00
Julian Hammer
d03398ddf9 treating post- and pre-incremeted memory references no longer as src_dst
the incremented register is now considered src_dst instead
2020-10-13 19:25:29 +02:00
Julian Hammer
edb8df3205 considering split AVX loads on SNB and IVB 2020-10-13 11:25:13 +02:00
Julian Hammer
489050723c removed a nother set of no-maker tests 2020-10-13 09:03:13 +02:00
Julian Hammer
0cc0d35ce9 removed maker missing tests 2020-10-12 19:34:04 +02:00
Julian Hammer
7f65bdb022 version bump 2020-10-12 15:39:49 +02:00
Julian Hammer
04360cc897 fixed label identifiers by splitting 2020-10-12 15:39:32 +02:00
Julian Hammer
5e7a12f9bb paranthesis now suppored in identifier strings 2020-10-12 15:05:52 +02:00
Julian Hammer
1def12ee79 if not markes were found, use whole code 2020-10-12 15:04:55 +02:00
Julian Hammer
7269156854 added --out argument 2020-10-12 15:04:18 +02:00
Julian Hammer
d6529ced73 fixed push and added pop 2020-10-12 15:03:03 +02:00
Julian Hammer
eac728dc9f added tx2 support for ldp d1, d2, [x3] 2020-10-07 13:57:57 +02:00
JanLJL
451ba62959 added vector mov 2020-09-23 10:07:43 +02:00
JanLJL
57cf1bfe6f Merge branch 'master' of github.com:RRZE-HPC/osaca 2020-09-17 22:28:56 +02:00
JanLJL
44b921aa73 added BS4 dependency 2020-09-17 22:27:37 +02:00
JanLJL
accb52ce53 Merge branch 'master' of github.com:RRZE-HPC/osaca 2020-09-17 22:15:20 +02:00
JanLJL
9e78f85475 added instructions 2020-09-17 22:14:14 +02:00
JanLJL
64da89ec3d enhancecd ARM identifier to support immediate offsets 2020-09-17 22:12:12 +02:00
JanLJL
adeae88665 instr update 2020-09-17 21:21:15 +02:00
JanLJL
1698ed1776 gather enhancement 2020-09-03 13:48:00 +02:00
JanLJL
2ef6051e64 added gather load instruction 2020-09-03 09:30:19 +02:00
Julian Hammer
bd61b94669 ignoring b.none branched in basic block detection 2020-08-03 19:23:33 +02:00
JanLJL
addcdeda85 added sve instructions 2020-08-03 08:55:37 +02:00
JanLJL
23d36a651b enhancements for SVE support 2020-08-03 08:54:59 +02:00
JanLJL
b052ab4151 bugfix in OoO scheduling 2020-07-28 14:52:30 +02:00
JanLJL
673da99fba minor enhancements for scheduling 2020-07-23 15:55:56 +02:00
JanLJL
6c72281d65 prepared for aarch64 8.2 support 2020-07-23 15:54:54 +02:00
JanLJL
5520362e65 adjustments and bugfixes 2020-07-13 18:53:19 +02:00
JanLJL
93060eee43 Merge branch 'master' into A64FX 2020-07-13 14:41:49 +02:00
JanLJL
0e77b7bc9a enhanced TP scheduling 2020-07-06 18:49:46 +02:00
JanLJL
ce8c3ff9ab bugfixes for A64FX 2020-07-06 18:48:54 +02:00
JanLJL
6294e2e9da initial commit for trying to support a64fx 2020-06-26 05:20:40 +02:00
JanLJL
5258d65c8e few more instructions 2020-06-24 17:41:30 +02:00
JanLJL
379fe80169 added initial support for Intel Ice Lake (ICL) 2020-06-22 22:15:14 +02:00
39 changed files with 72281 additions and 53390 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
# OSACA specific files and folders
osaca/taxCalc/
*.*.pickle
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@@ -3,9 +3,9 @@ language: python
python:
- "3.5"
- "3.6"
# Python 3.7 not working yet
- "3.7"
- "3.8"
- "3.9"
before_install:
# - pip install tox-travis
- pip install codecov
@@ -26,7 +26,7 @@ deploy:
username: "__token__"
password:
secure: "fRRCETOwDkJ4pFacYZghPfCQ9mSsV4PlD3sTDp8rDHoCnebPjvFYc1tIdv+Wds0ae162KNUaj9GbxjK0MTGiRcy4pD08n7ufv8snmBQ2rtOLkj7RCRg1hw30WcMHjzqScFJgQcBrpjdPmR5AlesUufh6OadGvF1NspmVRWKr8ir3KQhmNV+itAliYoqaSTRTg1zC/znm+49l5gkzlLxd+mPj5/dtcc8vZ/i2M2+nNTTjDxq71q4Ddqv+bgZV1y7OZY2YuvjEDPflUbwc3fjOxpj891uMDHodsGmEHBu8WsLpF2tAO0C/x63S0jXamkV+/4cAQqQAwWr0Lby9/BjCfUwyUMOEgZ0S+z9WoFpBpQTQEfkD2JH/UFrv4CMnLFqgDkVMcx0vc/rT4Od8eJ5wOSG5+VdniJNOLpodFOXuKc09eJMk2lE9vk9OBrcsZ09UOTPTUCMZSIP4cBDxaIkx+RHQEy63TQdJZcElRBEWGEgj2e9hbiktvIoOvbFGQDscpz7ShBDklXIpu9hnxcKHtNDEjyywTUJmx7lTMILL05DPUnpUmnMb1Gyx5lbHzhSExc9re0cxEA354UUQKBS5HwHQcEBw9stMfsaForiBAUOocUKdGqlGP9cOXFoxdC9M+ff5FNstgbjPYSowb/JbATMlmCWKgH/bXXcTGCO10sk="
distributions: sdist
distributions: "sdist bdist_wheel"
skip_existing: true
cleanup: false
on:

View File

@@ -2,6 +2,8 @@ include README.rst
include LICENSE
include tox.ini
recursive-include osaca/data/ *.yml
recursive-include osaca/data/ *.pickle
include osaca/data/_build_cache.py
include examples/*
recursive-include tests *.py *.out
recursive-include tests/testfiles/ *

View File

@@ -10,8 +10,8 @@ Open Source Architecture Code Analyzer
For an innermost loop kernel in assembly, this tool allows automatic instruction fetching of assembly code and automatic runtime prediction including throughput analysis and detection for critical path and loop-carried dependencies.
.. image:: https://travis-ci.org/RRZE-HPC/OSACA.svg?branch=master
:target: https://travis-ci.org/RRZE-HPC/OSACA
.. image:: https://travis-ci.com/RRZE-HPC/OSACA.svg?branch=master
:target: https://travis-ci.com/github/RRZE-HPC/OSACA
:alt: Build Status
.. image:: https://codecov.io/github/RRZE-HPC/OSACA/coverage.svg?branch=master
@@ -57,8 +57,12 @@ Additional requirements are:
- `Python3 <https://www.python.org/>`__
- `Graphviz <https://www.graphviz.org/>`__ for dependency graph creation (minimal dependency is `libgraphviz-dev` on Ubuntu)
Optional requirements are:
- `Kerncraft <https://github.com/RRZE-HPC/kerncraft>`__ >=v0.8.4 for marker insertion
- `ibench <https://github.com/RRZE-HPC/ibench>`__ or `asmbench <https://github.com/RRZE-HPC/asmbench/>`__ for throughput/latency measurements
- `BeautifulSoup4 <https://www.crummy.com/software/BeautifulSoup/bs4/doc/>`__ for scraping instruction form information for the x86 ISA (experimental)
Design
======

View File

@@ -1,6 +1,6 @@
"""Open Source Architecture Code Analyzer"""
name = 'osaca'
__version__ = '0.3.6'
__version__ = '0.3.12'
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___

4
osaca/__main__.py Normal file
View File

@@ -0,0 +1,4 @@
#!/usr/bin/env python3
from .osaca import main
main()

View File

@@ -5,7 +5,7 @@ import sys
from io import StringIO
from osaca.frontend import Frontend
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel,
ArchSemantics, reduce_to_section)
@@ -29,7 +29,7 @@ class KerncraftAPI(object):
self.semantics = ArchSemantics(self.machine_model)
isa = self.machine_model.get_ISA().lower()
if isa == 'aarch64':
self.parser = ParserAArch64v81()
self.parser = ParserAArch64()
elif isa == 'x86':
self.parser = ParserX86ATT()

31
osaca/data/_build_cache.py Executable file
View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python3
from glob import glob
import os.path
import sys
sys.path[0:0] = ['../..']
failed = False
try:
from osaca.semantics.hw_model import MachineModel
except ModuleNotFoundError:
print("Unable to import MachineModel, probably some dependency is not yet installed. SKIPPING. "
"First run of OSACA may take a while to build caches, subsequent runs will be as fast as "
"ever.")
sys.exit()
print('Building cache: ', end='')
sys.stdout.flush()
# Iterating architectures
for f in glob(os.path.join(os.path.dirname(__file__), '*.yml')):
MachineModel(path_to_yaml=f)
print('.', end='')
sys.stdout.flush()
# Iterating ISAs
for f in glob(os.path.join(os.path.dirname(__file__), 'isa/*.yml')):
MachineModel(path_to_yaml=f)
print('+', end='')
sys.stdout.flush()
print()

1339
osaca/data/a64fx.yml Normal file

File diff suppressed because it is too large Load Diff

36318
osaca/data/icl.yml Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.4
osaca_version: 0.3.7
isa: "AArch64"
# Contains all operand-irregular instruction forms OSACA supports for AArch64.
# Operand-regular for a AArch64 instruction form with N operands in the shape of
@@ -37,48 +37,10 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
pre-indexed: "*"
post-indexed: "*"
source: true
destination: false
- name: ldp
operands:
- class: "register"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
source: true
destination: true
- name: ldp
operands:
- class: "register"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: [ldr, ldur]
operands:
- class: "register"
@@ -90,25 +52,10 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
pre-indexed: "*"
post-indexed: "*"
source: true
destination: true
- name: [ldr, ldur]
operands:
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
destination: false
- name: stp
operands:
- class: "register"
@@ -124,46 +71,8 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
source: false
destination: true
- name: stp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: false
destination: true
- name: stp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: [str, stur]
@@ -181,3 +90,73 @@ instruction_forms:
post-indexed: "*"
source: false
destination: true
- name: cmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- name: cmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "immediate"
imd: "int"
source: true
destination: false
- name: cmn
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- name: cmn
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "immediate"
imd: "int"
source: true
destination: false
- name: fcmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- name: fcmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "immediate"
imd: "double"
source: true
destination: false
- name: fcmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "immediate"
imd: "float"
source: true
destination: false

View File

@@ -3167,7 +3167,7 @@ instruction_forms:
destination: false
hidden_operands:
- class: "memory"
base: "gpr"
base: {name: 'rsp'}
offset: ~
index: ~
scale: 1
@@ -3177,11 +3177,29 @@ instruction_forms:
name: "rsp"
source: true
destination: true
- name: pop
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "memory"
base: {name: 'rsp'}
offset: ~
index: ~
scale: 1
source: true
destination: false
- class: "register"
name: "rsp"
source: true
destination: true
- name: pushfq
operands: []
hidden_operands:
- class: "memory"
base: "gpr"
base: {name: 'rsp'}
offset: ~
index: ~
scale: 1
@@ -3971,4 +3989,3 @@ instruction_forms:
name: "gpr"
source: true
destination: true

File diff suppressed because it is too large Load Diff

View File

@@ -219,9 +219,15 @@ def extract_model(tree, arch, skip_mem=True):
port_23 = True
if '4' in pp[1]:
port_4 = True
# Add (1, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
# Add (X, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
# X = 2 on SNB and IVB IFF used in combination with ymm register, otherwise X = 1
if arch.upper() in ['SNB', 'IVB'] and \
any([p['class'] == 'register' and p['name'] == 'ymm' for p in parameters]):
data_port_throughput = 2
else:
data_port_throughput = 1
if port_23 and not port_4:
port_pressure.append((1, ['2D', '3D']))
port_pressure.append((data_port_throughput, ['2D', '3D']))
# Add missing ports:
for ports in [pp[1] for pp in port_pressure]:
@@ -275,7 +281,7 @@ def main():
if model is not None:
print(
rhs_comment(
model.dump(), basename + " " + args.xml.split('/')[-1] + " " + args.arch
model.dump(), "uops.info import"
)
)
else:

File diff suppressed because it is too large Load Diff

View File

@@ -80,24 +80,114 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mul
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 1.0
latency: 4.0 # 1*p1
port_pressure: [[1, '1']]
- name: mul
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 1.0
latency: 4.0 # 1*p1
port_pressure: [[1, '1']]
- name: b.ne
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: b.lt
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.hs
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.eq
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.gt
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: bne
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: cmp
operands:
- class: register
@@ -107,6 +197,15 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: cmp
operands:
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: cmp
operands:
- class: register
@@ -126,6 +225,17 @@ instruction_forms:
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: dup
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: fadd
operands:
- class: register
@@ -323,6 +433,28 @@ instruction_forms:
throughput: 0.5
latency: 6.0 # 1*p01
port_pressure: [[1, '01']]
- name: lsl
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: lsl
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: ldp
operands:
- class: register
@@ -403,6 +535,22 @@ instruction_forms:
throughput: 1.0
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
- class: register
@@ -505,6 +653,15 @@ instruction_forms:
throughput: 0.5
latency: 4.0 # 1*p34
port_pressure: [[1.0, '34']]
- name: ldr
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
@@ -532,15 +689,42 @@ instruction_forms:
throughput: 0.0
latency: 0.0
port_pressure: []
- name: mov
operands:
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.5
latency: 1.0 # 1*p01
port_pressure: [[1, '01']]
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
@@ -568,6 +752,43 @@ instruction_forms:
throughput: ~
latency: ~
port_pressure: []
- name: ret
operands: []
throughput: 0.5
latency: ~ # 1*p34
port_pressure: [[1, '34']]
- name: stp
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p34+1*p5
port_pressure: [[2, '34'], [1, '5']]
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p34+1*p5
port_pressure: [[2, '34'], [1, '5']]
- name: stp
operands:
- class: register
@@ -644,6 +865,20 @@ instruction_forms:
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
- class: register
@@ -728,6 +963,39 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5'], [1, '012']]
- name: subs
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: subs
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
@@ -739,3 +1007,25 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]

View File

@@ -76,7 +76,7 @@ class Frontend(object):
self._get_flag_symbols(instruction_form['flags'])
if instruction_form['instruction'] is not None
else ' ',
instruction_form['line'].strip(),
instruction_form['line'].strip().replace('\t', ' '),
)
line = line if show_lineno else col_sep + col_sep.join(line.split(col_sep)[1:])
if show_cmnts is False and self._is_comment(instruction_form):
@@ -138,13 +138,13 @@ class Frontend(object):
separator,
sum([instr_form['latency_lcd'] for instr_form in dep_dict[dep]['dependencies']]),
separator,
dep_dict[dep]['root']['line'],
dep_dict[dep]['root']['line'].strip(),
separator,
[node['line_number'] for node in dep_dict[dep]['dependencies']],
)
return s
def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, verbose=False):
def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, arch_warning=False, length_warning=False, verbose=False):
"""
Build the full analysis report including header, the symbol map, the combined TP/CP/LCD
view and the list based LCD view.
@@ -156,11 +156,16 @@ class Frontend(object):
:param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
`False`
:type ignore_unknown: boolean, optional
:param print_arch_warning: flag for additional user warning to specify micro-arch
:type print_arch_warning: boolean, optional
:param print_length_warning: flag for additional user warning to specify kernel length with --lines
:type print_length_warning: boolean, optional
:param verbose: flag for verbosity level, defaults to False
:type verbose: boolean, optional
"""
return (
self._header_report()
+ self._user_warnings(arch_warning, length_warning)
+ self._symbol_map()
+ self.combined_view(
kernel,
@@ -246,7 +251,7 @@ class Frontend(object):
self._get_flag_symbols(instruction_form['flags'])
if instruction_form['instruction'] is not None
else ' ',
instruction_form['line'].strip(),
instruction_form['line'].strip().replace('\t', ' '),
)
s += '\n'
# check for unknown instructions and throw warning if called without --ignore-unknown
@@ -285,6 +290,27 @@ class Frontend(object):
).format(amount, '-' * len(str(amount)))
return s
def _user_warnings(self, arch_warning, length_warning):
"""Returns warning texts for giving the user more insight in what he is doing."""
arch_text = (
'WARNING: No micro-architecture was specified and a default uarch was used.\n'
' Specify the uarch with --arch. See --help for more information.\n'
)
length_text = (
'WARNING: You are analyzing a large amount of instruction forms. Analysis '
'across loops/block boundaries often do not make much sense.\n'
' Specify the kernel length with --length. See --help for more '
'information.\n'
' If this is intentional, you can safely ignore this message.\n'
)
warnings = ''
warnings += arch_text if arch_warning else ''
warnings += length_text if length_warning else ''
warnings += '\n'
return warnings
def _get_separator_list(self, separator, separator_2=' '):
"""Creates column view for seperators in the TP/combined view."""
separator_list = []

View File

@@ -5,19 +5,33 @@ import io
import os
import re
import sys
import traceback
from osaca.db_interface import import_benchmark_output, sanity_check
from osaca.frontend import Frontend
from osaca.parser import BaseParser, ParserAArch64v81, ParserX86ATT
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG,
MachineModel, reduce_to_section)
MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
)
LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/')
DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2', 'N1']
SUPPORTED_ARCHS = [
'SNB',
'IVB',
'HSW',
'BDW',
'SKX',
'CSX',
'ICL',
'ZEN1',
'ZEN2',
'TX2',
'N1',
'A64FX',
]
DEFAULT_ARCHS = {
'aarch64': 'A64FX',
'x86': 'SKX',
}
# Stolen from pip
@@ -71,7 +85,8 @@ def create_parser(parser=None):
parser.add_argument(
'--arch',
type=str,
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2, N1).',
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, N1, '
'A64FX). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.',
)
parser.add_argument(
'--fixed',
@@ -79,6 +94,13 @@ def create_parser(parser=None):
help='Run the throughput analysis with fixed probabilities for all suitable ports per '
'instruction. Otherwise, OSACA will print the optimal port utilization for the kernel.',
)
parser.add_argument(
'--lines',
type=str,
help='Define lines that should be included in the analysis. This option overwrites any'
' range defined by markers in the assembly. Add either single lines or ranges defined by'
' "-" or ":", each entry separated by commas, e.g.: --lines 1,2,8-18,20:24',
)
parser.add_argument(
'--db-check',
dest='check_db',
@@ -128,6 +150,12 @@ def create_parser(parser=None):
parser.add_argument(
'--verbose', '-v', action='count', default=0, help='Increases verbosity level.'
)
parser.add_argument(
'--out', '-o',
default=sys.stdout,
type=argparse.FileType('w'),
help='Write analysis to this file (default to stdout).'
)
parser.add_argument(
'file', type=argparse.FileType('r'), help='Path to object (ASM or instruction file).'
)
@@ -144,7 +172,12 @@ def check_arguments(args, parser):
"""
supported_import_files = ['ibench', 'asmbench']
if 'arch' in args and (args.arch is None or args.arch.upper() not in SUPPORTED_ARCHS):
if args.arch is None and (args.check_db or 'import_data' in args):
parser.error(
'DB check and data import cannot work with a default microarchitecture. '
'Please see --help for all valid architecture codes.'
)
elif args.arch is not None and args.arch.upper() not in SUPPORTED_ARCHS:
parser.error(
'Microarchitecture not supported. Please see --help for all valid architecture codes.'
)
@@ -221,19 +254,41 @@ def inspect(args, output_file=sys.stdout):
:param output_file: Define the stream for output, defaults to :class:`sys.stdout`
:type output_file: stream, optional
"""
arch = args.arch
# Read file
code = args.file.read()
# Detect ISA if necessary
arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
print_arch_warning = False if args.arch else True
isa = MachineModel.get_isa_for_arch(arch)
verbose = args.verbose
ignore_unknown = args.ignore_unknown
# Read file
code = args.file.read()
# Parse file
parser = get_asm_parser(arch)
parsed_code = parser.parse_file(code)
try:
parsed_code = parser.parse_file(code)
except:
# probably the wrong parser based on heuristic
if args.arch is None:
# change ISA and try again
arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64']
isa = MachineModel.get_isa_for_arch(arch)
parser = get_asm_parser(arch)
parsed_code = parser.parse_file(code)
else:
traceback.print_exc(file=sys.stderr)
sys.exit(1)
# Reduce to marked kernel and add semantics
kernel = reduce_to_section(parsed_code, isa)
# Reduce to marked kernel or chosen section and add semantics
if args.lines:
line_range = get_line_range(args.lines)
kernel = [line for line in parsed_code if line['line_number'] in line_range]
print_length_warning = False
else:
kernel = reduce_to_section(parsed_code, isa)
# Print warning if kernel has no markers and is larger than threshold (100)
print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
machine_model = MachineModel(arch=arch)
semantics = ArchSemantics(machine_model)
semantics.add_semantics(kernel)
@@ -249,7 +304,12 @@ def inspect(args, output_file=sys.stdout):
frontend = Frontend(args.file.name, arch=arch)
print(
frontend.full_analysis(
kernel, kernel_graph, ignore_unknown=ignore_unknown, verbose=verbose
kernel,
kernel_graph,
ignore_unknown=ignore_unknown,
arch_warning=print_arch_warning,
length_warning=print_length_warning,
verbose=verbose
),
file=output_file,
)
@@ -292,7 +352,7 @@ def get_asm_parser(arch) -> BaseParser:
if isa == 'x86':
return ParserX86ATT()
elif isa == 'aarch64':
return ParserAArch64v81()
return ParserAArch64()
def get_unmatched_instruction_ratio(kernel):
@@ -306,13 +366,26 @@ def get_unmatched_instruction_ratio(kernel):
unmatched_counter += 1
return unmatched_counter / len(kernel)
def get_line_range(line_str):
line_str = line_str.replace(':', '-')
lines = line_str.split(',')
lines_int = []
for l in lines:
if '-' in l:
start = int(l.split('-')[0])
end = int(l.split('-')[1])
rnge = list(range(start, end+1))
lines_int += rnge
else:
lines_int.append(int(l))
return lines_int
def main():
"""Initialize and run command line interface."""
parser = create_parser()
args = parser.parse_args()
check_arguments(args, parser)
run(args)
run(args, output_file=args.out)
if __name__ == '__main__':

View File

@@ -6,14 +6,14 @@ Only the parser below will be exported, so please add new parsers to __all__.
from .attr_dict import AttrDict
from .base_parser import BaseParser
from .parser_x86att import ParserX86ATT
from .parser_AArch64v81 import ParserAArch64v81
from .parser_AArch64 import ParserAArch64
__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64v81', 'get_parser']
__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64', 'get_parser']
def get_parser(isa):
if isa.lower() == 'x86':
return ParserX86ATT()
elif isa.lower() == 'aarch64':
return ParserAArch64v81()
return ParserAArch64()
else:
raise ValueError("Unknown ISA {!r}.".format(isa))

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""Parser superclass of specific parsers."""
import operator
import re
class BaseParser(object):
# Identifiers for operand types
@@ -8,14 +9,35 @@ class BaseParser(object):
DIRECTIVE_ID = 'directive'
IMMEDIATE_ID = 'immediate'
LABEL_ID = 'label'
IDENTIFIER_ID = 'identifier'
MEMORY_ID = 'memory'
REGISTER_ID = 'register'
SEGMENT_EXT_ID = 'segment_extension'
INSTRUCTION_ID = 'instruction'
OPERANDS_ID = 'operands'
_parser_constructed = False
def __init__(self):
self.construct_parser()
if not self._parser_constructed:
self.construct_parser()
self._parser_constructed = True
@staticmethod
def detect_ISA(file_content):
"""Detect the ISA of the assembly based on the used registers and return the ISA code."""
# Check for the amount of registers in the code to determine the ISA
# 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86
heuristics_x86ATT = [r'%[xyz]mm[0-9]', r'%[er][abcd]x[0-9]']
# 2) check for v and z vector registers and x/w general-purpose registers
heuristics_aarch64 = [r'[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]', r'[wx][0-9]']
matches = {'x86': 0, 'aarch64': 0}
for h in heuristics_x86ATT:
matches['x86'] += len(re.findall(h, file_content))
for h in heuristics_aarch64:
matches['aarch64'] += len(re.findall(h, file_content))
return max(matches.items(), key=operator.itemgetter(1))[0]
def parse_file(self, file_content, start_line=0):
"""

View File

@@ -6,7 +6,15 @@ import pyparsing as pp
from osaca.parser import AttrDict, BaseParser
class ParserAArch64v81(BaseParser):
class ParserAArch64(BaseParser):
_instance = None
# Singelton pattern, as this is created very many times
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserAArch64, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
self.isa = 'aarch64'
@@ -19,22 +27,23 @@ class ParserAArch64v81(BaseParser):
pp.ZeroOrMore(pp.Word(pp.printables))
).setResultsName(self.COMMENT_ID)
# Define ARM assembly identifier
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
relocation = pp.Combine(pp.Literal(':') + pp.Word(pp.alphanums + '_') + pp.Literal(':'))
first = pp.Word(pp.alphas + '_.', exact=1)
rest = pp.Word(pp.alphanums + '_.')
identifier = pp.Group(
pp.Optional(relocation).setResultsName('relocation')
+ pp.Combine(first + pp.Optional(rest)).setResultsName('name')
).setResultsName('identifier')
+ pp.Optional(pp.Suppress(pp.Literal('+')) + (hex_number | decimal_number).setResultsName('offset'))
).setResultsName(self.IDENTIFIER_ID)
# Label
self.label = pp.Group(
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
# Directive
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
directive_option = pp.Combine(
pp.Word(pp.alphas + '#@.%', exact=1)
+ pp.Optional(pp.Word(pp.printables + ' ', excludeChars=','))
@@ -46,7 +55,7 @@ class ParserAArch64v81(BaseParser):
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
+ commaSeparatedList.setResultsName('parameters')
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName('parameters')
+ pp.Optional(self.comment)
).setResultsName(self.DIRECTIVE_ID)
# LLVM-MCA markers
@@ -91,31 +100,49 @@ class ParserAArch64v81(BaseParser):
^ pp.CaselessLiteral('ror')
^ pp.CaselessLiteral('sxtw')
^ pp.CaselessLiteral('uxtw')
^ pp.CaselessLiteral('mul vl')
)
arith_immediate = pp.Group(
immediate.setResultsName('base_immediate')
+ pp.Suppress(pp.Literal(','))
+ shift_op.setResultsName('shift_op')
+ immediate.setResultsName('shift')
+ pp.Optional(immediate).setResultsName('shift')
).setResultsName(self.IMMEDIATE_ID)
# Register:
# scalar: [XWBHSDQ][0-9]{1,2} | vector: V[0-9]{1,2}\.[12468]{1,2}[BHSD]()?
# define SP and ZR register aliases as regex, due to pyparsing does not support
# scalar: [XWBHSDQ][0-9]{1,2} | vector: [VZ][0-9]{1,2}(\.[12468]{1,2}[BHSD])?
# | predicate: P[0-9]{1,2}(/[ZM])?
# ignore vector len control ZCR_EL[123] for now
# define SP, ZR register aliases as regex, due to pyparsing does not support
# proper lookahead
alias_r31_sp = pp.Regex('(?P<prefix>[a-zA-Z])?(?P<name>(sp|SP))')
alias_r31_zr = pp.Regex('(?P<prefix>[a-zA-Z])?(?P<name>(zr|ZR))')
scalar = pp.Word(pp.alphas, exact=1).setResultsName('prefix') + pp.Word(
scalar = pp.Word('xwbhsdqXWBHSDQ', exact=1).setResultsName('prefix') + pp.Word(
pp.nums
).setResultsName('name')
index = pp.Literal('[') + pp.Word(pp.nums).setResultsName('index') + pp.Literal(']')
vector = (
pp.CaselessLiteral('v').setResultsName('prefix')
pp.oneOf('v z', caseless=True).setResultsName('prefix')
+ pp.Word(pp.nums).setResultsName('name')
+ pp.Literal('.')
+ pp.Optional(pp.Word('12468')).setResultsName('lanes')
+ pp.Word(pp.alphas, exact=1).setResultsName('shape')
+ pp.Optional(index)
)
predicate = (
pp.CaselessLiteral('p').setResultsName('prefix')
+ pp.Word(pp.nums).setResultsName('name')
+ pp.Optional(
(
pp.Suppress(pp.Literal('/'))
+ pp.oneOf('z m', caseless=True).setResultsName('predication')
)
| (
pp.Literal('.')
+ pp.Optional(pp.Word('12468')).setResultsName('lanes')
+ pp.Word(pp.alphas, exact=1).setResultsName('shape')
)
)
)
self.list_element = vector ^ scalar
register_list = (
pp.Literal('{')
@@ -129,7 +156,8 @@ class ParserAArch64v81(BaseParser):
+ pp.Optional(index)
)
register = pp.Group(
(alias_r31_sp | alias_r31_zr | vector | scalar | register_list)
(alias_r31_sp | alias_r31_zr | vector | scalar | predicate | register_list)
#(alias_r31_sp | alias_r31_zr | vector | scalar | predicate | register_list)
+ pp.Optional(
pp.Suppress(pp.Literal(','))
+ shift_op.setResultsName('shift_op')
@@ -144,7 +172,7 @@ class ParserAArch64v81(BaseParser):
pp.Literal('[')
+ pp.Optional(register.setResultsName('base'))
+ pp.Optional(pp.Suppress(pp.Literal(',')))
+ pp.Optional(register_index ^ immediate.setResultsName('offset'))
+ pp.Optional(register_index ^ (immediate ^ arith_immediate).setResultsName('offset'))
+ pp.Literal(']')
+ pp.Optional(
pp.Literal('!').setResultsName('pre_indexed')
@@ -177,6 +205,11 @@ class ParserAArch64v81(BaseParser):
+ pp.Optional(self.comment)
)
# for testing
self.predicate = predicate
self.vector = vector
self.register = register
def parse_line(self, line, line_number=None):
"""
Parse line and return instruction form.
@@ -193,7 +226,7 @@ class ParserAArch64v81(BaseParser):
self.DIRECTIVE_ID: None,
self.COMMENT_ID: None,
self.LABEL_ID: None,
'line': line.strip(),
'line': line,
'line_number': line_number,
}
)
@@ -317,14 +350,18 @@ class ParserAArch64v81(BaseParser):
return self.process_immediate(operand[self.IMMEDIATE_ID])
if self.LABEL_ID in operand:
return self.process_label(operand[self.LABEL_ID])
if self.IDENTIFIER_ID in operand:
return self.process_identifier(operand[self.IDENTIFIER_ID])
return operand
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
# Remove unnecessarily created dictionary entries during parsing
offset = None if 'offset' not in memory_address else memory_address['offset']
base = None if 'base' not in memory_address else memory_address['base']
index = None if 'index' not in memory_address else memory_address['index']
offset = memory_address.get('offset', None)
if isinstance(offset, list) and len(offset) == 1:
offset = offset[0]
base = memory_address.get('base', None)
index = memory_address.get('index', None)
scale = 1
if base is not None and 'name' in base and base['name'] == 'sp':
base['prefix'] = 'x'
@@ -351,18 +388,20 @@ class ParserAArch64v81(BaseParser):
def process_register_list(self, register_list):
"""Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})"""
# Remove unnecessarily created dictionary entries during parsing
vlist = []
rlist = []
dict_name = ''
if 'list' in register_list:
dict_name = 'list'
if 'range' in register_list:
dict_name = 'range'
for v in register_list[dict_name]:
vlist.append(
AttrDict.convert_dict(self.list_element.parseString(v, parseAll=True).asDict())
for r in register_list[dict_name]:
rlist.append(
AttrDict.convert_dict(self.list_element.parseString(r, parseAll=True).asDict())
)
index = None if 'index' not in register_list else register_list['index']
new_dict = AttrDict({dict_name: vlist, 'index': index})
index = register_list.get('index', None)
new_dict = AttrDict({dict_name: rlist, 'index': index})
if len(new_dict[dict_name]) == 1:
return AttrDict({self.REGISTER_ID: new_dict[dict_name][0]})
return AttrDict({self.REGISTER_ID: new_dict})
def process_immediate(self, immediate):
@@ -375,7 +414,9 @@ class ParserAArch64v81(BaseParser):
# normal integer value, nothing to do
return AttrDict({self.IMMEDIATE_ID: immediate})
if 'base_immediate' in immediate:
# arithmetic immediate, nothing to do
# arithmetic immediate, add calculated value as value
immediate['shift'] = immediate['shift'][0]
immediate['value'] = int(immediate['base_immediate']['value']) << int(immediate['shift']['value'])
return AttrDict({self.IMMEDIATE_ID: immediate})
if 'float' in immediate:
dict_name = 'float'
@@ -396,6 +437,13 @@ class ParserAArch64v81(BaseParser):
label['name'] = label['name']['name']
return AttrDict({self.LABEL_ID: label})
def process_identifier(self, identifier):
"""Post-process identifier operand"""
# remove value if it consists of symbol+offset
if 'value' in identifier:
del identifier['value']
return AttrDict({self.IDENTIFIER_ID: identifier})
def get_full_reg_name(self, register):
"""Return one register name string including all attributes"""
if 'lanes' in register:
@@ -440,7 +488,7 @@ class ParserAArch64v81(BaseParser):
def is_vector_register(self, register):
"""Check if register is a vector register"""
if register['prefix'] in 'bhsdqv':
if register['prefix'] in 'bhsdqvz':
return True
return False
@@ -455,7 +503,7 @@ class ParserAArch64v81(BaseParser):
def is_reg_dependend_of(self, reg_a, reg_b):
"""Check if ``reg_a`` is dependent on ``reg_b``"""
prefixes_gpr = 'wx'
prefixes_vec = 'bhsdqv'
prefixes_vec = 'bhsdqvz'
if reg_a['name'] == reg_b['name']:
if reg_a['prefix'].lower() in prefixes_gpr and reg_b['prefix'].lower() in prefixes_gpr:
return True

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import string
import re
import pyparsing as pp
@@ -8,6 +9,14 @@ from osaca.parser import AttrDict, BaseParser
class ParserX86ATT(BaseParser):
_instance = None
# Singelton pattern, as this is created very many times
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserX86ATT, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
self.isa = 'x86'
@@ -33,12 +42,18 @@ class ParserX86ATT(BaseParser):
+ pp.Optional(relocation).setResultsName('relocation')
).setResultsName('identifier')
# Label
rest = pp.Word(pp.alphanums + '$_.+-()')
label_identifier = pp.Group(
pp.Optional(id_offset).setResultsName('offset')
+ pp.Combine(first + pp.Optional(rest)).setResultsName('name')
+ pp.Optional(relocation).setResultsName('relocation')
).setResultsName('identifier')
numeric_identifier = pp.Group(
pp.Word(pp.nums).setResultsName('name')
+ pp.Optional(pp.oneOf('b f', caseless=True).setResultsName('suffix'))
).setResultsName('identifier')
self.label = pp.Group(
(identifier | numeric_identifier).setResultsName('name')
(label_identifier | numeric_identifier).setResultsName('name')
+ pp.Literal(':')
+ pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
@@ -178,7 +193,7 @@ class ParserX86ATT(BaseParser):
self.DIRECTIVE_ID: None,
self.COMMENT_ID: None,
self.LABEL_ID: None,
'line': line.strip(),
'line': line,
'line_number': line_number,
}
)
@@ -297,9 +312,9 @@ class ParserX86ATT(BaseParser):
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
# Remove unecessarily created dictionary entries during memory address parsing
offset = None if 'offset' not in memory_address else memory_address['offset']
base = None if 'base' not in memory_address else memory_address['base']
index = None if 'index' not in memory_address else memory_address['index']
offset = memory_address.get('offset', None)
base = memory_address.get('base', None)
index = memory_address.get('index', None)
scale = 1 if 'scale' not in memory_address else int(memory_address['scale'])
if isinstance(offset, str) and base is None and index is None:
offset = {'value': offset}
@@ -348,45 +363,44 @@ class ParserX86ATT(BaseParser):
def is_reg_dependend_of(self, reg_a, reg_b):
"""Check if ``reg_a`` is dependent on ``reg_b``"""
# Normalize name
reg_a_name = reg_a['name'].upper()
reg_b_name = reg_b['name'].upper()
# Check if they are the same registers
if reg_a.name == reg_b.name:
if reg_a_name == reg_b_name:
return True
# Check vector registers first
if self.is_vector_register(reg_a):
if self.is_vector_register(reg_b):
if reg_a.name[1:] == reg_b.name[1:]:
if reg_a_name[1:] == reg_b_name[1:]:
# Registers in the same vector space
return True
return False
# Check basic GPRs
a_dep = ['RAX', 'EAX', 'AX', 'AH', 'AL']
b_dep = ['RBX', 'EBX', 'BX', 'BH', 'BL']
c_dep = ['RCX', 'ECX', 'CX', 'CH', 'CL']
d_dep = ['RDX', 'EDX', 'DX', 'DH', 'DL']
sp_dep = ['RSP', 'ESP', 'SP', 'SPL']
src_dep = ['RSI', 'ESI', 'SI', 'SIL']
dst_dep = ['RDI', 'EDI', 'DI', 'DIL']
basic_gprs = [a_dep, b_dep, c_dep, d_dep, sp_dep, src_dep, dst_dep]
gpr_groups = {
'A': ['RAX', 'EAX', 'AX', 'AH', 'AL'],
'B': ['RBX', 'EBX', 'BX', 'BH', 'BL'],
'C': ['RCX', 'ECX', 'CX', 'CH', 'CL'],
'D': ['RDX', 'EDX', 'DX', 'DH', 'DL'],
'SP': ['RSP', 'ESP', 'SP', 'SPL'],
'SRC': ['RSI', 'ESI', 'SI', 'SIL'],
'DST': ['RDI', 'EDI', 'DI', 'DIL']
}
if self.is_basic_gpr(reg_a):
if self.is_basic_gpr(reg_b):
for dep_group in basic_gprs:
if reg_a['name'].upper() in dep_group:
if reg_b['name'].upper() in dep_group:
for dep_group in gpr_groups.values():
if reg_a_name in dep_group:
if reg_b_name in dep_group:
return True
return False
# Check other GPRs
gpr_parser = (
pp.CaselessLiteral('R')
+ pp.Word(pp.nums).setResultsName('id')
+ pp.Optional(pp.Word('dwbDWB', exact=1))
)
try:
id_a = gpr_parser.parseString(reg_a['name'], parseAll=True).asDict()['id']
id_b = gpr_parser.parseString(reg_b['name'], parseAll=True).asDict()['id']
if id_a == id_b:
return True
except pp.ParseException:
return False
ma = re.match(r'R([0-9]+)[DWB]?', reg_a_name)
mb = re.match(r'R([0-9]+)[DWB]?', reg_b_name)
if ma and mb and ma.group(1) == mb.group(1):
return True
# No dependencies
return False
@@ -400,19 +414,11 @@ class ParserX86ATT(BaseParser):
"""Check if register is a general purpose register"""
if register is None:
return False
gpr_parser = (
pp.CaselessLiteral('R')
+ pp.Word(pp.nums).setResultsName('id')
+ pp.Optional(pp.Word('dwbDWB', exact=1))
)
if self.is_basic_gpr(register):
return True
else:
try:
gpr_parser.parseString(register['name'], parseAll=True)
return True
except pp.ParseException:
return False
return re.match(r'R([0-9]+)[DWB]?', register['name'], re.IGNORECASE)
def is_vector_register(self, register):
"""Check if register is a vector register"""

View File

@@ -53,9 +53,18 @@ class ArchSemantics(ISASemantics):
)
if len(set(port_sums)) > 1:
# balance ports
for _ in range(cycles * 100):
instr_ports[port_sums.index(max(port_sums))] -= INC
instr_ports[port_sums.index(min(port_sums))] += INC
# init list for keeping track of the current change
differences = [cycles / len(ports) for p in ports]
for _ in range(int(cycles * (1 / INC))):
if len(instr_ports) == 1:
# no balancing possible anymore
break
max_port_idx = port_sums.index(max(port_sums))
min_port_idx = port_sums.index(min(port_sums))
instr_ports[max_port_idx] -= INC
instr_ports[min_port_idx] += INC
differences[max_port_idx] -= INC
differences[min_port_idx] += INC
# instr_ports = [round(p, 2) for p in instr_ports]
self._itemsetter(*indices)(instruction_form['port_pressure'], *instr_ports)
# check if min port is zero
@@ -63,7 +72,12 @@ class ArchSemantics(ISASemantics):
# if port_pressure is not exactly 0.00, add the residual to
# the former port
if min(instr_ports) != 0.0:
instr_ports[port_sums.index(min(port_sums))] += min(instr_ports)
min_port_idx = port_sums.index(min(port_sums))
instr_ports[min_port_idx] += min(instr_ports)
differences[min_port_idx] += min(instr_ports)
# we don't need to decrease difference for other port, just
# delete it
del differences[instr_ports.index(min(instr_ports))]
self._itemsetter(*indices)(
instruction_form['port_pressure'], *instr_ports
)
@@ -80,6 +94,17 @@ class ArchSemantics(ISASemantics):
instr_ports = self._to_list(
itemgetter(*indices)(instruction_form['port_pressure'])
)
# never remove more than the fixed utilization per uop and port, i.e.,
# cycles/len(ports)
if round(min(differences), 2) <= 0:
# don't worry if port_pressure isn't exactly 0 and just
# remove from further balancing by deleting index since
# pressure is not 0
del indices[differences.index(min(differences))]
instr_ports = self._to_list(
itemgetter(*indices)(instruction_form['port_pressure'])
)
del differences[differences.index(min(differences))]
port_sums = self._to_list(
itemgetter(*indices)(self.get_throughput_sum(kernel))
)
@@ -373,9 +398,7 @@ class ArchSemantics(ISASemantics):
def g(obj, value):
obj[item] = value
else:
def g(obj, *values):
for item, value in zip(items, values):
obj[item] = value
@@ -391,9 +414,11 @@ class ArchSemantics(ISASemantics):
@staticmethod
def get_throughput_sum(kernel):
"""Get the overall throughput sum separated by port of all instructions of a kernel."""
tp_sum = reduce(
(lambda x, y: [sum(z) for z in zip(x, y)]),
[instr['port_pressure'] for instr in kernel],
)
tp_sum = [round(x, 2) for x in tp_sum]
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up
# typically comment, label and non-instruction lines
port_pressures = [instr['port_pressure'] for instr in kernel if instr['throughput'] != 0.0]
# Essentially summing up each columns of port_pressures, where each column is one port
# and each row is one line of the kernel
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
tp_sum = [round(sum(col), 2) for col in zip(*port_pressures)]
return tp_sum

View File

@@ -1,12 +1,14 @@
#!/usr/bin/env python3
import base64
import os
import pickle
import re
import string
from copy import deepcopy
from itertools import product
import hashlib
from pathlib import Path
from collections import defaultdict
import ruamel.yaml
from ruamel.yaml.compat import StringIO
@@ -17,6 +19,7 @@ from osaca.parser import ParserX86ATT
class MachineModel(object):
WILDCARD = '*'
INTERNAL_VERSION = 1 # increase whenever self._data format changes to invalidate cache!
def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
if not arch and not path_to_yaml:
@@ -39,7 +42,7 @@ class MachineModel(object):
'load_throughput_default': [],
'ports': [],
'port_model_scheme': None,
'instruction_forms': [],
'instruction_forms': []
}
else:
if arch and path_to_yaml:
@@ -49,7 +52,7 @@ class MachineModel(object):
yaml = self._create_yaml_object()
if arch:
self._arch = arch.lower()
self._path = utils.find_file(self._arch + '.yml')
self._path = utils.find_datafile(self._arch + '.yml')
# check if file is cached
cached = self._get_cached(self._path) if not lazy else False
if cached:
@@ -59,8 +62,6 @@ class MachineModel(object):
with open(self._path, 'r') as f:
if not lazy:
self._data = yaml.load(f)
# cache file for next call
self._write_in_cache(self._path, self._data)
else:
file_content = ''
line = f.readline()
@@ -69,21 +70,26 @@ class MachineModel(object):
line = f.readline()
self._data = yaml.load(file_content)
self._data['instruction_forms'] = []
# separate multi-alias instruction forms
for entry in [
x for x in self._data['instruction_forms'] if isinstance(x['name'], list)
]:
for name in entry['name']:
new_entry = {'name': name}
for k in [x for x in entry.keys() if x != 'name']:
new_entry[k] = entry[k]
self._data['instruction_forms'].append(new_entry)
# remove old entry
self._data['instruction_forms'].remove(entry)
# For use with dict instead of list as DB
# self._data['instruction_dict'] = (
# self._convert_to_dict(self._data['instruction_forms'])
# )
# separate multi-alias instruction forms
for entry in [x for x in self._data['instruction_forms']
if isinstance(x['name'], list)]:
for name in entry['name']:
new_entry = {'name': name}
for k in [x for x in entry.keys() if x != 'name']:
new_entry[k] = entry[k]
self._data['instruction_forms'].append(new_entry)
# remove old entry
self._data['instruction_forms'].remove(entry)
# Normalize instruction_form names (to UPPERCASE) and build dict for faster access:
self._data['instruction_forms_dict'] = defaultdict(list)
for iform in self._data['instruction_forms']:
iform['name'] = iform['name'].upper()
self._data['instruction_forms_dict'][iform['name']].append(iform)
self._data['internal_version'] = self.INTERNAL_VERSION
if not lazy:
# cache internal representation for future use
self._write_in_cache(self._path)
def __getitem__(self, key):
"""Return configuration entry."""
@@ -98,36 +104,21 @@ class MachineModel(object):
def get_instruction(self, name, operands):
"""Find and return instruction data from name and operands."""
# For use with dict instead of list as DB
# return self.get_instruction_from_dict(name, operands)
if name is None:
return None
name_matched_iforms = self._data['instruction_forms_dict'].get(name.upper(), [])
try:
return next(
instruction_form
for instruction_form in self._data['instruction_forms']
if instruction_form['name'].upper() == name.upper()
and self._match_operands(
for instruction_form in name_matched_iforms if self._match_operands(
instruction_form['operands'] if 'operands' in instruction_form else [],
operands,
)
)
operands))
except StopIteration:
return None
except TypeError as e:
print('\nname: {}\noperands: {}'.format(name, operands))
raise TypeError from e
def get_instruction_from_dict(self, name, operands):
"""Find and return instruction data from name and operands stored in dictionary."""
if name is None:
return None
try:
# Check if key is in dict
instruction_form = self._data['instruction_dict'][self._get_key(name, operands)]
return instruction_form
except KeyError:
return None
def average_port_pressure(self, port_pressure):
"""Construct average port pressure list from instruction data."""
port_list = self._data['ports']
@@ -234,12 +225,13 @@ class MachineModel(object):
for y in list(filter(lambda x: True if x != 'class' else False, op))
]
operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
return '{} {}'.format(instruction_form['name'], ','.join(operands))
return '{} {}'.format(instruction_form['name'].lower(), ','.join(operands))
@staticmethod
def get_isa_for_arch(arch):
"""Return ISA for given micro-arch ``arch``."""
arch_dict = {
'a64fx': 'aarch64',
'tx2': 'aarch64',
'n1': 'aarch64',
'zen1': 'x86',
@@ -293,7 +285,8 @@ class MachineModel(object):
{
k: v
for k, v in self._data.items()
if k not in ['instruction_forms', 'load_throughput']
if k not in ['instruction_forms', 'instruction_forms_dict', 'load_throughput',
'internal_version']
},
stream,
)
@@ -313,37 +306,54 @@ class MachineModel(object):
:type filepath: str
:returns: cached DB if existing, `False` otherwise
"""
hashname = self._get_hashname(filepath)
cachepath = utils.exists_cached_file(hashname + '.pickle')
if cachepath:
# Check if modification date of DB is older than cached version
if os.path.getmtime(filepath) < os.path.getmtime(cachepath):
# load cached version
with open(cachepath, 'rb') as f:
cached_db = pickle.load(f)
return cached_db
else:
# DB newer than cached version --> delete cached file and return False
os.remove(cachepath)
p = Path(filepath)
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name('.' + p.stem + '_' + hexhash).with_suffix('.pickle')
if companion_cachefile.exists():
# companion file (must be up-to-date, due to equal hash)
with companion_cachefile.open('rb') as f:
data = pickle.load(f)
if data.get('internal_version') == self.INTERNAL_VERSION:
return data
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + '_' + hexhash)).with_suffix('.pickle')
if home_cachefile.exists():
# home file (must be up-to-date, due to equal hash)
with home_cachefile.open('rb') as f:
data = pickle.load(f)
if data.get('internal_version') == self.INTERNAL_VERSION:
return data
return False
def _write_in_cache(self, filepath, data):
def _write_in_cache(self, filepath):
"""
Write machine model to cache
:param filepath: path to store DB
:type filepath: str
:param data: :class:`MachineModel` to store
:type data: :class:`dict`
"""
hashname = self._get_hashname(filepath)
filepath = os.path.join(utils.CACHE_DIR, hashname + '.pickle')
with open(filepath, 'wb') as f:
pickle.dump(data, f)
p = Path(filepath)
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name('.' + p.stem + '_' + hexhash).with_suffix('.pickle')
if os.access(str(companion_cachefile.parent), os.W_OK):
with companion_cachefile.open('wb') as f:
pickle.dump(self._data, f)
return
def _get_hashname(self, name):
"""Returns unique hashname for machine model"""
return base64.b64encode(name.encode()).decode()
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
cache_dir = Path(utils.CACHE_DIR)
try:
os.makedirs(cache_dir, exist_ok=True)
except OSError:
return
home_cachefile = (cache_dir / (p.stem + '_' + hexhash)).with_suffix('.pickle')
if os.access(str(home_cachefile.parent), os.W_OK):
with home_cachefile.open('wb') as f:
pickle.dump(self._data, f)
def _get_key(self, name, operands):
"""Get unique instruction form key for dict DB."""
@@ -353,18 +363,6 @@ class MachineModel(object):
key_string += '_'.join([self._get_operand_hash(op) for op in operands])
return key_string
def _convert_to_dict(self, instruction_forms):
"""Convert list DB to dict DB"""
instruction_dict = {}
for instruction_form in instruction_forms:
instruction_dict[
self._get_key(
instruction_form['name'],
instruction_form['operands'] if 'operands' in instruction_form else None,
)
] = instruction_form
return instruction_dict
def _get_operand_hash(self, operand):
"""Get unique key for operand for dict DB"""
operand_string = ''
@@ -493,6 +491,7 @@ class MachineModel(object):
if 'class' in operand:
# compare two DB entries
return self._compare_db_entries(i_operand, operand)
# TODO support class wildcards
# register
if 'register' in operand:
if i_operand['class'] != 'register':
@@ -504,12 +503,14 @@ class MachineModel(object):
return False
return self._is_AArch64_mem_type(i_operand, operand['memory'])
# immediate
# TODO support wildcards
if 'value' in operand or ('immediate' in operand and 'value' in operand['immediate']):
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'int'
if 'float' in operand or ('immediate' in operand and 'float' in operand['immediate']):
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'float'
if 'double' in operand or ('immediate' in operand and 'double' in operand['immediate']):
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'double'
# identifier
if 'identifier' in operand or (
'immediate' in operand and 'identifier' in operand['immediate']
):
@@ -580,7 +581,11 @@ class MachineModel(object):
def _is_x86_reg_type(self, i_reg, reg, consider_masking=False):
"""Check if register type match."""
i_reg_name = i_reg if not consider_masking else i_reg['name']
i_reg_name = i_reg['name'] if i_reg and 'name' in i_reg else i_reg
if reg is None:
if i_reg is None:
return True
return False
# check for wildcards
if i_reg_name == self.WILDCARD or reg['name'] == self.WILDCARD:
return True

View File

@@ -2,7 +2,7 @@
from itertools import chain
from osaca import utils
from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from .hw_model import MachineModel
@@ -26,12 +26,12 @@ class ISASemantics(object):
def __init__(self, isa, path_to_yaml=None):
self._isa = isa.lower()
path = utils.find_file('isa/' + self._isa + '.yml') if not path_to_yaml else path_to_yaml
path = path_to_yaml or utils.find_datafile('isa/' + self._isa + '.yml')
self._isa_model = MachineModel(path_to_yaml=path)
if self._isa == 'x86':
self._parser = ParserX86ATT()
elif self._isa == 'aarch64':
self._parser = ParserAArch64v81()
self._parser = ParserAArch64()
def process(self, instruction_forms):
"""Process a list of instruction forms."""
@@ -52,7 +52,6 @@ class ISASemantics(object):
return
# check if instruction form is in ISA yaml, otherwise apply standard operand assignment
# (one dest, others source)
# import pdb; pdb.set_trace()
isa_data = self._isa_model.get_instruction(
instruction_form['instruction'], instruction_form['operands']
)
@@ -103,14 +102,14 @@ class ISASemantics(object):
if ('post_indexed' in operand['memory'] and operand['memory']['post_indexed']) or (
'pre_indexed' in operand['memory'] and operand['memory']['pre_indexed']
):
op_dict['source'].remove(operand)
op_dict['src_dst'].append(operand)
op_dict['src_dst'].append(AttrDict.convert_dict(
{'register': operand['memory']['base']}))
for operand in [op for op in op_dict['destination'] if 'memory' in op]:
if ('post_indexed' in operand['memory'] and operand['memory']['post_indexed']) or (
'pre_indexed' in operand['memory'] and operand['memory']['pre_indexed']
):
op_dict['destination'].remove(operand)
op_dict['src_dst'].append(operand)
op_dict['src_dst'].append(AttrDict.convert_dict(
{'register': operand['memory']['base']}))
# store operand list in dict and reassign operand key/value pair
instruction_form['semantic_operands'] = AttrDict.convert_dict(op_dict)
# assign LD/ST flags

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
from collections import OrderedDict
from osaca.parser import ParserAArch64v81, ParserX86ATT, get_parser
from osaca.parser import ParserAArch64, ParserX86ATT, get_parser
COMMENT_MARKER = {'start': 'OSACA-BEGIN', 'end': 'OSACA-END'}
@@ -22,9 +22,9 @@ def reduce_to_section(kernel, isa):
else:
raise ValueError('ISA not supported.')
if start == -1:
raise LookupError('Could not find START MARKER. Make sure it is inserted!')
start = 0
if end == -1:
raise LookupError('Could not find END MARKER. Make sure it is inserted!')
end = len(kernel)
return kernel[start:end]
@@ -38,7 +38,7 @@ def find_marked_kernel_AArch64(lines):
nop_bytes = ['213', '3', '32', '31']
return find_marked_section(
lines,
ParserAArch64v81(),
ParserAArch64(),
['mov'],
'x1',
[111, 222],
@@ -277,6 +277,11 @@ def find_basic_loop_bodies(lines):
current_block.append(line)
# Find end of block by searching for references to valid jump labels
if line['instruction'] and line['operands']:
# Ignore `b.none` instructions (relevant von ARM SVE code)
# This branch instruction is often present _within_ inner loop blocks, but usually
# do not terminate
if line['instruction'] == 'b.none':
continue
for operand in [o for o in line['operands'] if 'identifier' in o]:
if operand['identifier']['name'] in valid_jump_labels:
if operand['identifier']['name'] == label:

View File

@@ -1,28 +1,14 @@
#!/usr/bin/env python3
import os.path
DATA_DIRS = [os.path.expanduser('~/.osaca/data'), os.path.join(os.path.dirname(__file__), 'data')]
CACHE_DIR = os.path.expanduser('~/.osaca/cache')
def find_file(name):
def find_datafile(name):
"""Check for existence of name in user or package data folders and return path."""
search_paths = [os.path.expanduser('~/.osaca/data'),
os.path.join(os.path.dirname(__file__), 'data')]
for dir in search_paths:
for dir in DATA_DIRS:
path = os.path.join(dir, name)
if os.path.exists(path):
return path
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, search_paths))
def exists_cached_file(name):
"""Check for existence of file in cache dir. Returns path if it exists and False otherwise."""
if not os.path.exists(CACHE_DIR):
os.makedirs(CACHE_DIR)
return False
search_paths = [CACHE_DIR]
for dir in search_paths:
path = os.path.join(dir, name)
if os.path.exists(path):
return path
return False
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, DATA_DIRS))

View File

@@ -2,11 +2,14 @@
# Always prefer setuptools over distutils
from setuptools import setup, find_packages
from setuptools.command.install import install as _install
from setuptools.command.sdist import sdist as _sdist
# To use a consistent encoding
from codecs import open
import os
import io
import re
import sys
here = os.path.abspath(os.path.dirname(__file__))
@@ -27,6 +30,27 @@ def find_version(*file_paths):
raise RuntimeError("Unable to find version string.")
def _run_build_cache(dir):
from subprocess import check_call
# This is run inside the install staging directory (that had no .pyc files)
# We don't want to generate any.
# https://github.com/eliben/pycparser/pull/135
check_call([sys.executable, '-B', '_build_cache.py'],
cwd=os.path.join(dir, 'osaca', 'data'))
class install(_install):
def run(self):
_install.run(self)
self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
class sdist(_sdist):
def make_release_tree(self, basedir, files):
_sdist.make_release_tree(self, basedir, files)
self.execute(_run_build_cache, (basedir,), msg="Build ISA and architecture cache")
# Get the long description from the README file
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()
@@ -59,7 +83,7 @@ setup(
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
'Development Status :: 3 - Alpha',
'Development Status :: 4 - Beta',
# Indicate who your project is intended for
'Intended Audience :: Developers',
@@ -76,6 +100,9 @@ setup(
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
# What doesd your project relate to?
@@ -124,4 +151,7 @@ setup(
'osaca=osaca.osaca:main',
],
},
# Overwriting install and sdist to enforce cache distribution with package
cmdclass={'install': install, 'sdist': sdist},
)

View File

@@ -8,7 +8,7 @@ suite = unittest.TestLoader().loadTestsFromNames(
[
'test_base_parser',
'test_parser_x86att',
'test_parser_AArch64v81',
'test_parser_AArch64',
'test_marker_utils',
'test_semantics',
'test_frontend',

View File

@@ -18,6 +18,12 @@ class TestBaseParser(unittest.TestCase):
pass
with open(self._find_file('triad_x86_iaca.s')) as f:
self.triad_code = f.read()
with open(self._find_file('triad_arm_iaca.s')) as f:
self.triad_code_arm = f.read()
with open(self._find_file('kernel_x86.s')) as f:
self.x86_code = f.read()
with open(self._find_file('kernel_aarch64.s')) as f:
self.aarch64_code = f.read()
##################
# Test
@@ -59,6 +65,12 @@ class TestBaseParser(unittest.TestCase):
with self.assertRaises(NotImplementedError):
self.parser.normalize_imd(imd_hex_1)
def test_detect_ISA(self):
self.assertEqual(BaseParser.detect_ISA(self.triad_code), 'x86')
self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), 'aarch64')
self.assertEqual(BaseParser.detect_ISA(self.x86_code), 'x86')
self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), 'aarch64')
##################
# Helper functions
##################

View File

@@ -11,7 +11,7 @@ from shutil import copyfile
from unittest.mock import patch
import osaca.osaca as osaca
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.semantics import MachineModel
@@ -71,7 +71,7 @@ class TestCLI(unittest.TestCase):
def test_get_parser(self):
self.assertTrue(isinstance(osaca.get_asm_parser('csx'), ParserX86ATT))
self.assertTrue(isinstance(osaca.get_asm_parser('tx2'), ParserAArch64v81))
self.assertTrue(isinstance(osaca.get_asm_parser('tx2'), ParserAArch64))
with self.assertRaises(ValueError):
osaca.get_asm_parser('UNKNOWN')
@@ -153,6 +153,64 @@ class TestCLI(unittest.TestCase):
output = StringIO()
osaca.run(args, output_file=output)
def test_without_arch(self):
# Run test kernels without --arch flag
parser = osaca.create_parser()
# x86
kernel_x86 = 'kernel_x86.s'
args = parser.parse_args([self._find_test_file(kernel_x86)])
output = StringIO()
osaca.run(args, output_file=output)
# AArch64
kernel_aarch64 = 'kernel_aarch64.s'
args = parser.parse_args([self._find_test_file(kernel_aarch64)])
osaca.run(args, output_file=output)
def test_user_warnings(self):
parser = osaca.create_parser()
kernel = 'triad_x86_unmarked.s'
args = parser.parse_args(
['--arch', 'csx', '--ignore-unknown', self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for length
self.assertTrue(output.getvalue().count('WARNING') == 1)
args = parser.parse_args(
['--lines', '100-199', '--ignore-unknown', self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for arch
self.assertTrue(output.getvalue().count('WARNING') == 1)
def test_lines_arg(self):
# Run tests with --lines option
parser = osaca.create_parser()
kernel_x86 = 'triad_x86_iaca.s'
args_base = parser.parse_args(
['--arch', 'csx', self._find_test_file(kernel_x86)]
)
output_base = StringIO()
osaca.run(args_base, output_file=output_base)
output_base = output_base.getvalue().split('\n')[8:]
args = []
args.append(parser.parse_args(
['--lines', '146-154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
args.append(parser.parse_args(
['--lines', '146:154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
args.append(parser.parse_args(
['--lines', '146,147:148,149-154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
for a in args:
with self.subTest(params=a):
output = StringIO()
osaca.run(a, output_file=output)
self.assertEqual(output.getvalue().split('\n')[8:], output_base)
##################
# Helper functions
##################

View File

@@ -0,0 +1,345 @@
.file "triad.c"
.section .rodata.str1.8,"aMS",@progbits,1
.align 8
.LC9:
.string "%12.1f | %9.8f | %9.3f | %7.1f | %7.1f | %7d | %4d \n"
.text
.p2align 4,,15
.globl triad
.type triad, @function
triad:
.LFB24:
.cfi_startproc
pushq %r13
.cfi_def_cfa_offset 16
.cfi_offset 13, -16
movslq %edi, %rax
movl $64, %edi
leaq 16(%rsp), %r13
.cfi_def_cfa 13, 0
andq $-32, %rsp
pushq -8(%r13)
pushq %rbp
.cfi_escape 0x10,0x6,0x2,0x76,0
movq %rsp, %rbp
pushq %r15
.cfi_escape 0x10,0xf,0x2,0x76,0x78
leaq 0(,%rax,8), %r15
pushq %r14
movq %r15, %rsi
pushq %r13
.cfi_escape 0xf,0x3,0x76,0x68,0x6
.cfi_escape 0x10,0xe,0x2,0x76,0x70
pushq %r12
pushq %rbx
.cfi_escape 0x10,0xc,0x2,0x76,0x60
.cfi_escape 0x10,0x3,0x2,0x76,0x58
movq %rax, %rbx
subq $72, %rsp
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r14
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r12
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r13
call aligned_alloc
movq %rax, %r15
leal -1(%rbx), %eax
movl %eax, -96(%rbp)
testl %ebx, %ebx
jle .L2
cmpl $2, %eax
jbe .L14
movl %ebx, %esi
vmovapd .LC0(%rip), %ymm0
xorl %eax, %eax
xorl %ecx, %ecx
shrl $2, %esi
.p2align 4,,10
.p2align 3
.L4:
addl $1, %ecx
vmovapd %ymm0, (%r15,%rax)
vmovapd %ymm0, 0(%r13,%rax)
vmovapd %ymm0, (%r12,%rax)
vmovapd %ymm0, (%r14,%rax)
addq $32, %rax
cmpl %ecx, %esi
ja .L4
movl %ebx, %eax
andl $-4, %eax
cmpl %eax, %ebx
je .L26
vzeroupper
.L3:
vmovsd .LC1(%rip), %xmm0
movslq %eax, %rcx
vmovsd %xmm0, (%r15,%rcx,8)
vmovsd %xmm0, 0(%r13,%rcx,8)
vmovsd %xmm0, (%r12,%rcx,8)
vmovsd %xmm0, (%r14,%rcx,8)
leal 1(%rax), %ecx
cmpl %ecx, %ebx
jle .L2
movslq %ecx, %rcx
addl $2, %eax
vmovsd %xmm0, (%r15,%rcx,8)
vmovsd %xmm0, 0(%r13,%rcx,8)
vmovsd %xmm0, (%r12,%rcx,8)
vmovsd %xmm0, (%r14,%rcx,8)
cmpl %eax, %ebx
jle .L2
cltq
vmovsd %xmm0, (%r15,%rax,8)
vmovsd %xmm0, 0(%r13,%rax,8)
vmovsd %xmm0, (%r12,%rax,8)
vmovsd %xmm0, (%r14,%rax,8)
.L2:
movl %ebx, %eax
movl $1, -84(%rbp)
movl %ebx, %r10d
andl $-4, %eax
shrl $2, %r10d
movl %eax, -100(%rbp)
.p2align 4,,10
.p2align 3
.L13:
leaq -56(%rbp), %rsi
leaq -72(%rbp), %rdi
movl %r10d, -88(%rbp)
call timing
movl -88(%rbp), %r10d
xorl %r11d, %r11d
.p2align 4,,10
.p2align 3
.L12:
vmovsd (%r14), %xmm0
vxorpd %xmm7, %xmm7, %xmm7
vucomisd %xmm7, %xmm0
jbe .L6
movq %r14, %rdi
movl %r11d, -92(%rbp)
movl %r10d, -88(%rbp)
vzeroupper
call dummy
movl -92(%rbp), %r11d
movl -88(%rbp), %r10d
.L6:
testl %ebx, %ebx
jle .L8
cmpl $2, -96(%rbp)
jbe .L15
xorl %eax, %eax
xorl %ecx, %ecx
.p2align 4,,10
.p2align 3
.L10:
vmovapd (%r15,%rax), %ymm0
vmovapd (%r12,%rax), %ymm3
addl $1, %ecx
vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0
vmovapd %ymm0, (%r14,%rax)
addq $32, %rax
cmpl %ecx, %r10d
ja .L10
movl -100(%rbp), %eax
cmpl %ebx, %eax
je .L8
.L9:
movslq %eax, %rcx
vmovsd 0(%r13,%rcx,8), %xmm0
vmovsd (%r12,%rcx,8), %xmm5
vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0
vmovsd %xmm0, (%r14,%rcx,8)
leal 1(%rax), %ecx
cmpl %ebx, %ecx
jge .L8
movslq %ecx, %rcx
addl $2, %eax
vmovsd 0(%r13,%rcx,8), %xmm0
vmovsd (%r12,%rcx,8), %xmm6
vfmadd132sd (%r15,%rcx,8), %xmm6, %xmm0
vmovsd %xmm0, (%r14,%rcx,8)
cmpl %eax, %ebx
jle .L8
cltq
vmovsd (%r15,%rax,8), %xmm0
vmovsd (%r12,%rax,8), %xmm4
vfmadd132sd 0(%r13,%rax,8), %xmm4, %xmm0
vmovsd %xmm0, (%r14,%rax,8)
.L8:
addl $1, %r11d
cmpl -84(%rbp), %r11d
jne .L12
leaq -56(%rbp), %rsi
leaq -64(%rbp), %rdi
movl %r11d, -84(%rbp)
movl %r10d, -88(%rbp)
vzeroupper
call timing
vmovsd -64(%rbp), %xmm1
vsubsd -72(%rbp), %xmm1, %xmm1
vmovsd .LC3(%rip), %xmm2
movl -84(%rbp), %r11d
movl -88(%rbp), %r10d
vucomisd %xmm1, %xmm2
leal (%r11,%r11), %eax
movl %eax, -84(%rbp)
ja .L13
movl %eax, %esi
vxorpd %xmm6, %xmm6, %xmm6
vxorpd %xmm0, %xmm0, %xmm0
movl %ebx, %edx
sarl %esi
vcvtsi2sd %ebx, %xmm0, %xmm0
movl $.LC9, %edi
movl $5, %eax
vcvtsi2sd %esi, %xmm6, %xmm6
vmulsd .LC5(%rip), %xmm6, %xmm2
vmovsd .LC4(%rip), %xmm5
vmovsd .LC6(%rip), %xmm7
vmulsd %xmm0, %xmm6, %xmm4
vmulsd %xmm0, %xmm2, %xmm2
vdivsd %xmm1, %xmm4, %xmm4
vdivsd %xmm1, %xmm2, %xmm2
vdivsd %xmm5, %xmm4, %xmm4
vmulsd %xmm7, %xmm2, %xmm3
vaddsd %xmm0, %xmm0, %xmm2
vmulsd .LC8(%rip), %xmm0, %xmm0
vmulsd %xmm6, %xmm2, %xmm2
vmulsd .LC7(%rip), %xmm2, %xmm2
vmulsd %xmm7, %xmm3, %xmm3
vdivsd %xmm5, %xmm0, %xmm0
vdivsd %xmm5, %xmm4, %xmm4
vdivsd %xmm1, %xmm2, %xmm2
call printf
movq %r14, %rdi
call free
movq %r12, %rdi
call free
movq %r13, %rdi
call free
addq $72, %rsp
movq %r15, %rdi
popq %rbx
popq %r12
popq %r13
.cfi_remember_state
.cfi_def_cfa 13, 0
popq %r14
popq %r15
popq %rbp
leaq -16(%r13), %rsp
.cfi_def_cfa 7, 16
popq %r13
.cfi_def_cfa_offset 8
jmp free
.p2align 4,,10
.p2align 3
.L15:
.cfi_restore_state
xorl %eax, %eax
jmp .L9
.L26:
vzeroupper
jmp .L2
.L14:
xorl %eax, %eax
jmp .L3
.cfi_endproc
.LFE24:
.size triad, .-triad
.section .rodata.str1.8
.align 8
.LC10:
.string "TRIAD a[i] = b[i]+c[i]*d[i], 32 byte/it, 2 Flop/it"
.align 8
.LC11:
.string "Size (KByte) | runtime | MFlop/s | MB/s | MLUP/s | repeat | size"
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB25:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movl $.LC10, %edi
movl $20, %ebx
call puts
movl $.LC11, %edi
call puts
.p2align 4,,10
.p2align 3
.L28:
vxorpd %xmm1, %xmm1, %xmm1
movq .LC12(%rip), %rax
vcvtsi2sd %ebx, %xmm1, %xmm1
addl $1, %ebx
vmovq %rax, %xmm0
call pow
vcvttsd2si %xmm0, %edi
call triad
cmpl $36, %ebx
jne .L28
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE25:
.size main, .-main
.section .rodata.cst32,"aM",@progbits,32
.align 32
.LC0:
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC1:
.long 1907715710
.long 1048610426
.align 8
.LC3:
.long 2576980378
.long 1070176665
.align 8
.LC4:
.long 0
.long 1083129856
.align 8
.LC5:
.long 0
.long 1077936128
.align 8
.LC6:
.long 0
.long 1062207488
.align 8
.LC7:
.long 2696277389
.long 1051772663
.align 8
.LC8:
.long 0
.long 1075838976
.align 8
.LC12:
.long 3435973837
.long 1073007820
.ident "GCC: (GNU) 7.2.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -7,7 +7,7 @@ import os
import unittest
from osaca.frontend import Frontend
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.semantics import ArchSemantics, KernelDG, MachineModel
@@ -20,7 +20,7 @@ class TestFrontend(unittest.TestCase):
def setUpClass(self):
# set up parser and kernels
self.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64v81()
self.parser_AArch64 = ParserAArch64()
with open(self._find_file('kernel_x86.s')) as f:
code_x86 = f.read()
with open(self._find_file('kernel_aarch64.s')) as f:
@@ -33,7 +33,7 @@ class TestFrontend(unittest.TestCase):
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
)
self.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
arch='tx2'
)
self.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')

View File

@@ -9,7 +9,7 @@ import unittest
from collections import OrderedDict
from osaca.api import KerncraftAPI
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
class TestKerncraftAPI(unittest.TestCase):
@@ -17,7 +17,7 @@ class TestKerncraftAPI(unittest.TestCase):
def setUpClass(self):
# set up parser and kernels
self.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64v81()
self.parser_AArch64 = ParserAArch64()
with open(self._find_file('triad_x86_iaca.s')) as f:
self.code_x86 = f.read()
with open(self._find_file('triad_arm_iaca.s')) as f:
@@ -63,7 +63,7 @@ class TestKerncraftAPI(unittest.TestCase):
('0DV', 0.0),
('1', 34.0),
('1DV', 0.0),
('2', 2.0),
('2', 3.0),
('3', 64.0),
('4', 64.0),
('5', 32.0),

View File

@@ -8,13 +8,13 @@ from collections import OrderedDict
from osaca.semantics import reduce_to_section, find_basic_blocks, find_jump_labels, \
find_basic_loop_bodies
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
class TestMarkerUtils(unittest.TestCase):
@classmethod
def setUpClass(self):
self.parser_AArch = ParserAArch64v81()
self.parser_AArch = ParserAArch64()
self.parser_x86 = ParserX86ATT()
with open(self._find_file('triad_arm_iaca.s')) as f:
triad_code_arm = f.read()
@@ -178,120 +178,115 @@ class TestMarkerUtils(unittest.TestCase):
def test_marker_special_cases_AArch(self):
bytes_line = '.byte 213,3,32,31\n'
mov_start = 'mov x1, #111\n'
mov_end = 'mov x1, #222\n'
prologue = 'dup v0.2d, x14\n' + ' neg x9, x9\n' + ' .p2align 6\n'
start_marker = 'mov x1, #111\n' + bytes_line
end_marker = 'mov x1, #222\n' + bytes_line
prologue = (
'dup v0.2d, x14\n'
'neg x9, x9\n'
'.p2align 6\n')
kernel = (
'.LBB0_28:\n'
+ 'fmul v7.2d, v7.2d, v19.2d\n'
+ 'stp q0, q1, [x10, #-32]\n'
+ 'b.ne .LBB0_28\n'
)
epilogue = '.LBB0_29: // Parent Loop BB0_20 Depth=1\n' + 'bl dummy\n'
kernel_length = len(list(filter(None, kernel.split('\n'))))
+ 'b.ne .LBB0_28\n')
epilogue = (
'.LBB0_29: // Parent Loop BB0_20 Depth=1\n'
'bl dummy\n')
# marker directly at the beginning
code_beginning = mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
beginning_parsed = self.parser_AArch.parse_file(code_beginning)
test_kernel = reduce_to_section(beginning_parsed, 'AArch64')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
samples = [
# (test name,
# ignored prologue, section to be extraced, ignored epilogue)
("markers",
prologue + start_marker, kernel, end_marker + epilogue),
("marker at file start",
start_marker, kernel, end_marker + epilogue),
("no start marker",
'', prologue + kernel, end_marker + epilogue),
("marker at file end",
prologue + start_marker, kernel, end_marker),
("no end marker",
prologue + start_marker, kernel + epilogue, ''),
("empty kernel",
prologue + start_marker, '', end_marker + epilogue),
]
# marker at the end
code_end = prologue + mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
end_parsed = self.parser_AArch.parse_file(code_end)
test_kernel = reduce_to_section(end_parsed, 'AArch64')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
# no kernel
code_empty = prologue + mov_start + bytes_line + mov_end + bytes_line + epilogue
empty_parsed = self.parser_AArch.parse_file(code_empty)
test_kernel = reduce_to_section(empty_parsed, 'AArch64')
self.assertEqual(len(test_kernel), 0)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
self.assertEqual(test_kernel, [])
# no start marker
code_no_start = prologue + bytes_line + kernel + mov_end + bytes_line + epilogue
no_start_parsed = self.parser_AArch.parse_file(code_no_start)
with self.assertRaises(LookupError):
reduce_to_section(no_start_parsed, 'AArch64')
# no end marker
code_no_end = prologue + mov_start + bytes_line + kernel + mov_end + epilogue
no_end_parsed = self.parser_AArch.parse_file(code_no_end)
with self.assertRaises(LookupError):
reduce_to_section(no_end_parsed, 'AArch64')
# no marker at all
code_no_marker = prologue + kernel + epilogue
no_marker_parsed = self.parser_AArch.parse_file(code_no_marker)
with self.assertRaises(LookupError):
reduce_to_section(no_marker_parsed, 'AArch64')
for test_name, pro, kernel, epi in samples:
code = pro + kernel + epi
parsed = self.parser_AArch.parse_file(code)
test_kernel = reduce_to_section(parsed, 'AArch64')
if kernel:
kernel_length = len(kernel.strip().split('\n'))
else:
kernel_length = 0
self.assertEqual(
len(test_kernel), kernel_length,
msg="Invalid exctracted kernel length on {!r} sample".format(test_name))
if pro:
kernel_start = len((pro).strip().split('\n'))
else:
kernel_start = 0
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
self.assertEqual(
test_kernel, parsed_kernel,
msg="Invalid exctracted kernel on {!r}".format(test_name))
def test_marker_special_cases_x86(self):
bytes_line = '.byte 100\n.byte 103\n.byte 144\n'
mov_start = 'movl $111, %ebx\n'
mov_end = 'movl $222, %ebx\n'
prologue = 'movl -88(%rbp), %r10d\n' + 'xorl %r11d, %r11d\n' + '.p2align 4,,10\n'
bytes_line = (
'.byte 100\n'
'.byte 103\n'
'.byte 144\n')
start_marker = 'movl $111, %ebx\n' + bytes_line
end_marker = 'movl $222, %ebx\n' + bytes_line
prologue = (
'movl -88(%rbp), %r10d\n'
'xorl %r11d, %r11d\n'
'.p2align 4,,10\n')
kernel = (
'.L3: #L3\n'
+ 'vmovsd .LC1(%rip), %xmm0\n'
+ 'vmovsd %xmm0, (%r15,%rcx,8)\n'
+ 'cmpl %ecx, %ebx\n'
+ 'jle .L3\n'
)
epilogue = 'leaq -56(%rbp), %rsi\n' + 'movl %r10d, -88(%rbp)\n' + 'call timing\n'
kernel_length = len(list(filter(None, kernel.split('\n'))))
'vmovsd .LC1(%rip), %xmm0\n'
'vmovsd %xmm0, (%r15,%rcx,8)\n'
'cmpl %ecx, %ebx\n'
'jle .L3\n')
epilogue = (
'leaq -56(%rbp), %rsi\n'
'movl %r10d, -88(%rbp)\n'
'call timing\n')
samples = [
# (test name,
# ignored prologue, section to be extraced, ignored epilogue)
("markers",
prologue + start_marker, kernel, end_marker + epilogue),
("marker at file start",
start_marker, kernel, end_marker + epilogue),
("no start marker",
'', prologue + kernel, end_marker + epilogue),
("marker at file end",
prologue + start_marker, kernel, end_marker),
("no end marker",
prologue + start_marker, kernel + epilogue, ''),
("empty kernel",
prologue + start_marker, '', end_marker + epilogue),
]
# marker directly at the beginning
code_beginning = mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
beginning_parsed = self.parser_x86.parse_file(code_beginning)
test_kernel = reduce_to_section(beginning_parsed, 'x86')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
# marker at the end
code_end = prologue + mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
end_parsed = self.parser_x86.parse_file(code_end)
test_kernel = reduce_to_section(end_parsed, 'x86')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
# no kernel
code_empty = prologue + mov_start + bytes_line + mov_end + bytes_line + epilogue
empty_parsed = self.parser_x86.parse_file(code_empty)
test_kernel = reduce_to_section(empty_parsed, 'x86')
self.assertEqual(len(test_kernel), 0)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
self.assertEqual(test_kernel, [])
# no start marker
code_no_start = prologue + bytes_line + kernel + mov_end + bytes_line + epilogue
no_start_parsed = self.parser_x86.parse_file(code_no_start)
with self.assertRaises(LookupError):
reduce_to_section(no_start_parsed, 'x86')
# no end marker
code_no_end = prologue + mov_start + bytes_line + kernel + mov_end + epilogue
no_end_parsed = self.parser_x86.parse_file(code_no_end)
with self.assertRaises(LookupError):
reduce_to_section(no_end_parsed, 'x86')
# no marker at all
code_no_marker = prologue + kernel + epilogue
no_marker_parsed = self.parser_x86.parse_file(code_no_marker)
with self.assertRaises(LookupError):
reduce_to_section(no_marker_parsed, 'x86')
for test_name, pro, kernel, epi in samples:
code = pro + kernel + epi
parsed = self.parser_x86.parse_file(code)
test_kernel = reduce_to_section(parsed, 'x86')
if kernel:
kernel_length = len(kernel.strip().split('\n'))
else:
kernel_length = 0
self.assertEqual(
len(test_kernel), kernel_length,
msg="Invalid exctracted kernel length on {!r} sample".format(test_name))
if pro:
kernel_start = len((pro).strip().split('\n'))
else:
kernel_start = 0
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
self.assertEqual(
test_kernel, parsed_kernel,
msg="Invalid exctracted kernel on {!r}".format(test_name))
def test_find_jump_labels(self):
self.assertEqual(find_jump_labels(self.parsed_x86),

View File

@@ -8,13 +8,13 @@ import unittest
from pyparsing import ParseException
from osaca.parser import AttrDict, ParserAArch64v81
from osaca.parser import AttrDict, ParserAArch64
class TestParserAArch64v81(unittest.TestCase):
class TestParserAArch64(unittest.TestCase):
@classmethod
def setUpClass(self):
self.parser = ParserAArch64v81()
self.parser = ParserAArch64()
with open(self._find_file('triad_arm_iaca.s')) as f:
self.triad_code = f.read()
@@ -146,8 +146,8 @@ class TestParserAArch64v81(unittest.TestCase):
def test_parse_line(self):
line_comment = '// -- Begin main'
line_label = '.LBB0_1: // =>This Inner Loop Header: Depth=1'
line_directive = '\t.cfi_def_cfa w29, -16'
line_instruction = '\tldr s0, [x11, w10, sxtw #2]\t\t// = <<2'
line_directive = '.cfi_def_cfa w29, -16'
line_instruction = 'ldr s0, [x11, w10, sxtw #2] // = <<2'
line_prefetch = 'prfm pldl1keep, [x26, #2048] //HPL'
line_preindexed = 'stp x29, x30, [sp, #-16]!'
line_postindexed = 'ldp q2, q3, [x11], #64'
@@ -201,7 +201,7 @@ class TestParserAArch64v81(unittest.TestCase):
'directive': None,
'comment': '= <<2',
'label': None,
'line': 'ldr s0, [x11, w10, sxtw #2]\t\t// = <<2',
'line': 'ldr s0, [x11, w10, sxtw #2] // = <<2',
'line_number': 4,
}
instruction_form_5 = {
@@ -309,23 +309,23 @@ class TestParserAArch64v81(unittest.TestCase):
self.assertEqual(self.parser.normalize_imd(identifier), identifier)
def test_multiple_regs(self):
instr_range = 'PUSH {r5-r7}'
instr_range = 'PUSH {x5-x7}'
reg_range = AttrDict({
'register': {
'range': [
{'prefix': 'r', 'name': '5'},
{'prefix': 'r', 'name': '7'}
{'prefix': 'x', 'name': '5'},
{'prefix': 'x', 'name': '7'}
],
'index': None
}
})
instr_list = 'POP {r5, r7, r9}'
instr_list = 'POP {x5, x7, x9}'
reg_list = AttrDict({
'register': {
'list': [
{'prefix': 'r', 'name': '5'},
{'prefix': 'r', 'name': '7'},
{'prefix': 'r', 'name': '9'}
{'prefix': 'x', 'name': '5'},
{'prefix': 'x', 'name': '7'},
{'prefix': 'x', 'name': '9'}
],
'index': None
}
@@ -411,5 +411,5 @@ class TestParserAArch64v81(unittest.TestCase):
if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromTestCase(TestParserAArch64v81)
suite = unittest.TestLoader().loadTestsFromTestCase(TestParserAArch64)
unittest.TextTestRunner(verbosity=2).run(suite)

View File

@@ -156,8 +156,8 @@ class TestParserX86ATT(unittest.TestCase):
def test_parse_line(self):
line_comment = '# -- Begin main'
line_label = '..B1.7: # Preds ..B1.6'
line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed'
line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
line_directive = '.quad .2.3_2__kmpc_loc_pack.2 #qed'
line_instruction = 'lea 2(%rax,%rax), %ecx #12.9'
instruction_form_1 = {
'instruction': None,

View File

@@ -11,7 +11,7 @@ from subprocess import call
import networkx as nx
from osaca.osaca import get_unmatched_instruction_ratio
from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG,
MachineModel, reduce_to_section)
@@ -20,48 +20,43 @@ class TestSemanticTools(unittest.TestCase):
MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
)
USER_DATA_DIR = os.path.join(os.path.expanduser('~'), '.osaca/')
@classmethod
def setUpClass(self):
# copy db files in user directory
if not os.path.isdir(os.path.join(self.USER_DATA_DIR, 'data')):
os.makedirs(os.path.join(self.USER_DATA_DIR, 'data'))
call(['cp', '-r', self.MODULE_DATA_DIR, self.USER_DATA_DIR])
def setUpClass(cls):
# set up parser and kernels
self.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64v81()
with open(self._find_file('kernel_x86.s')) as f:
self.code_x86 = f.read()
with open(self._find_file('kernel_aarch64.s')) as f:
self.code_AArch64 = f.read()
self.kernel_x86 = reduce_to_section(self.parser_x86.parse_file(self.code_x86), 'x86')
self.kernel_AArch64 = reduce_to_section(
self.parser_AArch64.parse_file(self.code_AArch64), 'aarch64'
cls.parser_x86 = ParserX86ATT()
cls.parser_AArch64 = ParserAArch64()
with open(cls._find_file('kernel_x86.s')) as f:
cls.code_x86 = f.read()
with open(cls._find_file('kernel_aarch64.s')) as f:
cls.code_AArch64 = f.read()
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), 'x86')
cls.kernel_AArch64 = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64'
)
# set up machine models
self.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
cls.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml')
)
self.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml')
)
self.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')
cls.semantics_csx = ArchSemantics(
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/x86.yml')
)
self.semantics_tx2 = ArchSemantics(
self.machine_model_tx2,
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'),
cls.semantics_tx2 = ArchSemantics(
cls.machine_model_tx2,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'),
)
self.machine_model_zen = MachineModel(arch='zen1')
cls.machine_model_zen = MachineModel(arch='zen1')
for i in range(len(self.kernel_x86)):
self.semantics_csx.assign_src_dst(self.kernel_x86[i])
self.semantics_csx.assign_tp_lt(self.kernel_x86[i])
for i in range(len(self.kernel_AArch64)):
self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i])
self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i])
for i in range(len(cls.kernel_x86)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
for i in range(len(cls.kernel_AArch64)):
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
###########
# Tests
@@ -88,28 +83,21 @@ class TestSemanticTools(unittest.TestCase):
self.assertIsNone(test_mm_x86.get_instruction(None, []))
self.assertIsNone(test_mm_arm.get_instruction(None, []))
# test dict DB creation
test_mm_x86._data['instruction_dict'] = test_mm_x86._convert_to_dict(
test_mm_x86._data['instruction_forms']
)
test_mm_arm._data['instruction_dict'] = test_mm_arm._convert_to_dict(
test_mm_arm._data['instruction_forms']
)
# test get_instruction from dict DB
self.assertIsNone(test_mm_x86.get_instruction_from_dict(None, []))
self.assertIsNone(test_mm_arm.get_instruction_from_dict(None, []))
self.assertIsNone(test_mm_x86.get_instruction_from_dict('NOT_IN_DB', []))
self.assertIsNone(test_mm_arm.get_instruction_from_dict('NOT_IN_DB', []))
# test get_instruction from DB
self.assertIsNone(test_mm_x86.get_instruction(None, []))
self.assertIsNone(test_mm_arm.get_instruction(None, []))
self.assertIsNone(test_mm_x86.get_instruction('NOT_IN_DB', []))
self.assertIsNone(test_mm_arm.get_instruction('NOT_IN_DB', []))
name_x86_1 = 'vaddpd'
operands_x86_1 = [
{'class': 'register', 'name': 'xmm'},
{'class': 'register', 'name': 'xmm'},
{'class': 'register', 'name': 'xmm'},
]
instr_form_x86_1 = test_mm_x86.get_instruction_from_dict(name_x86_1, operands_x86_1)
instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1, operands_x86_1)
self.assertEqual(instr_form_x86_1, test_mm_x86.get_instruction(name_x86_1, operands_x86_1))
self.assertEqual(
test_mm_x86.get_instruction_from_dict('jg', [{'class': 'identifier'}]),
test_mm_x86.get_instruction('jg', [{'class': 'identifier'}]),
test_mm_x86.get_instruction('jg', [{'class': 'identifier'}]),
)
name_arm_1 = 'fadd'
@@ -118,10 +106,10 @@ class TestSemanticTools(unittest.TestCase):
{'class': 'register', 'prefix': 'v', 'shape': 's'},
{'class': 'register', 'prefix': 'v', 'shape': 's'},
]
instr_form_arm_1 = test_mm_arm.get_instruction_from_dict(name_arm_1, operands_arm_1)
instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1, operands_arm_1)
self.assertEqual(instr_form_arm_1, test_mm_arm.get_instruction(name_arm_1, operands_arm_1))
self.assertEqual(
test_mm_arm.get_instruction_from_dict('b.ne', [{'class': 'identifier'}]),
test_mm_arm.get_instruction('b.ne', [{'class': 'identifier'}]),
test_mm_arm.get_instruction('b.ne', [{'class': 'identifier'}]),
)

View File

@@ -1,5 +1,5 @@
[tox]
envlist = py35,py36
envlist = py35,py36,py37,py38,py39
[testenv]
commands=
python tests/all_tests.py