Compare commits

...

120 Commits

Author SHA1 Message Date
Julian Hammer
4ff8fdc4ab version bump 2020-11-11 15:14:27 +01:00
JanLJL
c204096d74 fixed typo 2020-11-11 14:11:00 +01:00
JanLJL
dea217c12c fixed test after changing TP value of instruction 2020-11-11 14:04:07 +01:00
JanLJL
92c162daa2 new instructions 2020-11-11 13:54:23 +01:00
JanLJL
87ea8f0f0a new instructions 2020-11-11 12:27:49 +01:00
Julian Hammer
cb04efc384 fixed typo 2020-11-10 13:33:24 +01:00
JanLJL
14c0ea6180 bugfixes 2020-11-09 23:29:42 +01:00
Julian Hammer
314ff4cf9d improved performance of arch_semantics and reg dependency matching 2020-11-09 19:27:47 +01:00
Julian Hammer
f64253b2b9 added dict for instruction lookup 2020-11-09 17:00:46 +01:00
Julian Hammer
979d08358e singelton for isa parsers 2020-11-09 12:36:14 +01:00
Julian Hammer
a2dd6f752d added comment 2020-11-09 12:35:13 +01:00
Julian Hammer
2fb36406a7 performance improvement of throughput summation 2020-11-09 12:01:00 +01:00
Julian Hammer
94086033a8 added __main__.py 2020-11-09 08:27:31 +01:00
JanLJL
75edfc808a version bump 2020-11-06 20:40:13 +01:00
JanLJL
c8c077a834 enhanced length warning 2020-11-06 15:49:13 +01:00
JanLJL
26ee005adc added missing test file 2020-11-06 15:07:57 +01:00
JanLJL
207c53aaad minor bugfix in HW model and added user warnings for more insight 2020-11-06 15:06:36 +01:00
JanLJL
fafd7bc526 Merge branch 'master' of https://github.com/RRZE-HPC/OSACA 2020-11-06 12:57:46 +01:00
JanLJL
b986d7eba0 added --lines option 2020-11-06 12:57:41 +01:00
Julian Hammer
6b0adb5d68 improved cache handing (always hashing original file) 2020-11-06 12:27:34 +01:00
JanLJL
f9f382a948 bugfixes 2020-11-06 12:03:54 +01:00
Julian Hammer
c6b58c63ab Merge branch 'master' of github.com:RRZE-HPC/OSACA 2020-11-03 16:28:28 +01:00
Julian Hammer
78530bfdb0 fail-safed _build_cache.py 2020-11-03 16:28:07 +01:00
JanLJL
5aa0899961 added bdist 2020-11-03 16:10:46 +01:00
JanLJL
7f0abd7d10 version bump 2020-11-02 15:48:19 +01:00
JanLJL
9ba9bab107 try different ISA as fallback when parsing without --arch flag, use SKX as x86 default and enhanced ISA detection heuristic 2020-11-02 15:33:50 +01:00
Julian Hammer
983e66938c version bump 2020-10-29 13:15:23 +01:00
JanLJL
1c889fa785 Merge branch 'master' of https://github.com/RRZE-HPC/OSACA 2020-10-29 13:00:09 +01:00
JanLJL
022598d94f autodetect ISA and default uarch for ISA 2020-10-29 13:00:02 +01:00
Julian
1f5c9d1c61 using travis-ci.com badge 2020-10-29 12:45:39 +01:00
JanLJL
30e0ad038d ignore pickles in data/ and support py3.9 2020-10-29 11:06:20 +01:00
Julian Hammer
decec86e56 fixed py3.5 compatability 2020-10-29 10:59:00 +01:00
JanLJL
9af689b28c fixed bug in tests and removed unused imports 2020-10-28 19:29:48 +01:00
Julian Hammer
3aea3f2b49 Merge branch 'master' of github.com:RRZE-HPC/OSACA 2020-10-28 17:16:43 +01:00
Julian Hammer
a6cb09cf1f added cache files to package and building during setup 2020-10-28 17:16:03 +01:00
Julian Hammer
9d2ea8603f new caching structure with support for distribution 2020-10-28 16:29:55 +01:00
JanLJL
a7918db145 enhanced hanlding for immediates with shifting 2020-10-21 12:14:21 +02:00
Julian Hammer
b5b1a1f2b2 version bump 2020-10-20 14:36:43 +02:00
Julian
dd59af16b2 Merge pull request #51 from RRZE-HPC/A64FX
A64FX support and several Arm bugfixes and enhancements including better TP scheduling
2020-10-16 10:44:47 +02:00
JanLJL
d9325724e2 removed duplicate cmp entry 2020-10-16 10:11:51 +02:00
JanLJL
7e7269c2bc refactored operand checking in post-processing 2020-10-16 10:05:08 +02:00
JanLJL
c64a24ae1b no \t replacement before any other point than user output 2020-10-16 09:44:18 +02:00
JanLJL
e8b78e4cc6 Merge branch 'master' into A64FX 2020-10-15 22:44:12 +02:00
JanLJL
cd5a706f56 adjusted tests for AArch64 2020-10-15 17:56:08 +02:00
Jan
13426358d0 Merge pull request #50 from RRZE-HPC/fix/increment_handling
Fixing Increment Handling
2020-10-15 17:00:11 +02:00
Julian Hammer
c80088b628 Merge branch 'master' into fix/increment_handling 2020-10-15 16:36:29 +02:00
Julian Hammer
748474cd81 added more cmp versions 2020-10-15 16:23:14 +02:00
Julian Hammer
2fec0bf810 Merge branch 'master' into fix/increment_handling 2020-10-15 13:55:34 +02:00
Julian Hammer
711a41d18e extended and cleaned up marker tests 2020-10-15 13:54:18 +02:00
Julian Hammer
cf4a9cddcb Merge branch 'master' into fix/increment_handling 2020-10-15 13:17:02 +02:00
Julian Hammer
5a5a1e74f5 added CMP to aarch64 to exclude first op from destinations 2020-10-15 13:15:54 +02:00
Julian Hammer
4865e7ea72 fixed ignoring of last line without end marker 2020-10-15 11:59:51 +02:00
Julian Hammer
d03398ddf9 treating post- and pre-incremeted memory references no longer as src_dst
the incremented register is now considered src_dst instead
2020-10-13 19:25:29 +02:00
Julian Hammer
edb8df3205 considering split AVX loads on SNB and IVB 2020-10-13 11:25:13 +02:00
Julian Hammer
489050723c removed a nother set of no-maker tests 2020-10-13 09:03:13 +02:00
Julian Hammer
0cc0d35ce9 removed maker missing tests 2020-10-12 19:34:04 +02:00
Julian Hammer
7f65bdb022 version bump 2020-10-12 15:39:49 +02:00
Julian Hammer
04360cc897 fixed label identifiers by splitting 2020-10-12 15:39:32 +02:00
Julian Hammer
5e7a12f9bb paranthesis now suppored in identifier strings 2020-10-12 15:05:52 +02:00
Julian Hammer
1def12ee79 if not markes were found, use whole code 2020-10-12 15:04:55 +02:00
Julian Hammer
7269156854 added --out argument 2020-10-12 15:04:18 +02:00
Julian Hammer
d6529ced73 fixed push and added pop 2020-10-12 15:03:03 +02:00
Julian Hammer
eac728dc9f added tx2 support for ldp d1, d2, [x3] 2020-10-07 13:57:57 +02:00
JanLJL
451ba62959 added vector mov 2020-09-23 10:07:43 +02:00
JanLJL
57cf1bfe6f Merge branch 'master' of github.com:RRZE-HPC/osaca 2020-09-17 22:28:56 +02:00
JanLJL
44b921aa73 added BS4 dependency 2020-09-17 22:27:37 +02:00
JanLJL
accb52ce53 Merge branch 'master' of github.com:RRZE-HPC/osaca 2020-09-17 22:15:20 +02:00
JanLJL
9e78f85475 added instructions 2020-09-17 22:14:14 +02:00
JanLJL
64da89ec3d enhancecd ARM identifier to support immediate offsets 2020-09-17 22:12:12 +02:00
JanLJL
adeae88665 instr update 2020-09-17 21:21:15 +02:00
JanLJL
1698ed1776 gather enhancement 2020-09-03 13:48:00 +02:00
JanLJL
2ef6051e64 added gather load instruction 2020-09-03 09:30:19 +02:00
Julian Hammer
3308f5d68f version bump 2020-08-05 10:59:10 +02:00
Julian Hammer
bd61b94669 ignoring b.none branched in basic block detection 2020-08-03 19:23:33 +02:00
JanLJL
0db8b6bcbf fixed first character match for symbolic identifiers 2020-08-03 18:30:29 +02:00
Jan
40755b2080 Merge pull request #49 from RRZE-HPC/coherent_label_parsing
Coherent label parsing
2020-08-03 18:25:20 +02:00
JanLJL
269148c2a1 save b/f in numeric identifier as suffix tag 2020-08-03 18:08:29 +02:00
JanLJL
12a8506530 removed unnecessary code 2020-08-03 17:14:58 +02:00
JanLJL
e715badcf9 detects numeric label as label 2020-08-03 16:59:48 +02:00
Julian Hammer
d6b4355a77 labels may now start with numbers 2020-08-03 15:53:29 +02:00
JanLJL
5361b63b52 version bump 2020-08-03 09:38:50 +02:00
JanLJL
cc39342047 minor enhancement for mask parsing 2020-08-03 09:07:45 +02:00
JanLJL
addcdeda85 added sve instructions 2020-08-03 08:55:37 +02:00
JanLJL
23d36a651b enhancements for SVE support 2020-08-03 08:54:59 +02:00
JanLJL
b052ab4151 bugfix in OoO scheduling 2020-07-28 14:52:30 +02:00
JanLJL
673da99fba minor enhancements for scheduling 2020-07-23 15:55:56 +02:00
JanLJL
6c72281d65 prepared for aarch64 8.2 support 2020-07-23 15:54:54 +02:00
JanLJL
5520362e65 adjustments and bugfixes 2020-07-13 18:53:19 +02:00
JanLJL
93060eee43 Merge branch 'master' into A64FX 2020-07-13 14:41:49 +02:00
JanLJL
0e77b7bc9a enhanced TP scheduling 2020-07-06 18:49:46 +02:00
JanLJL
ce8c3ff9ab bugfixes for A64FX 2020-07-06 18:48:54 +02:00
Jan
acbde7a19c Merge pull request #48 from RRZE-HPC/n1
initial implementation of Neoverse N1 support
2020-07-02 09:32:54 +02:00
Cloud User
34e978d2ae initial implementation of Neoverse N1 support 2020-06-30 20:28:57 +00:00
JanLJL
6294e2e9da initial commit for trying to support a64fx 2020-06-26 05:20:40 +02:00
JanLJL
6801229275 PEP8 adjustments 2020-06-25 21:56:18 +02:00
JanLJL
d3d1a89600 two new instrs 2020-06-25 21:55:10 +02:00
JanLJL
93c1951097 prettified aarch64 ISA DB 2020-06-25 21:54:52 +02:00
JanLJL
7211dd0799 improvements for uops.info importer script 2020-06-25 21:53:41 +02:00
JanLJL
5258d65c8e few more instructions 2020-06-24 17:41:30 +02:00
JanLJL
379fe80169 added initial support for Intel Ice Lake (ICL) 2020-06-22 22:15:14 +02:00
JanLJL
94d7d35c0b more instructions 2020-05-04 18:50:58 +02:00
JanLJL
1009c60d2d fixed wrong output format for 3-digit TP numbers 2020-04-08 21:28:50 +02:00
JanLJL
229b316b6d added some instructions 2020-04-08 15:54:31 +02:00
JanLJL
c0753be899 added python 3.7/3.8 to tests 2020-04-02 09:20:08 +02:00
JanLJL
eaa56792ab added bs4 dependency for Travis 2020-04-02 09:08:08 +02:00
JanLJL
3425fa3024 added tests 2020-04-02 08:57:26 +02:00
JanLJL
38924b6ec1 more instructions 2020-03-30 18:27:33 +02:00
JanLJL
d6ae457de4 removed duplicates in CSX DB 2020-03-30 18:18:35 +02:00
JanLJL
a5c2ab1a4a bugfix for online check of operands 2020-03-26 11:46:46 +01:00
JanLJL
e4393189dc minor update 2020-03-26 11:06:11 +01:00
JanLJL
3016fc7c46 added more tests 2020-03-26 10:19:14 +01:00
JanLJL
82f47d217c Merge branch 'master' of github.com:RRZE-HPC/osaca 2020-03-26 10:03:23 +01:00
JanLJL
1754df42d2 enhanced x86 parser for directives 2020-03-26 10:02:39 +01:00
Julian Hammer
ac1295aac2 flag string in output now in line with required flags 2020-03-24 16:02:40 +01:00
Julian Hammer
9624e6c109 closing cache file after dump 2020-03-24 15:20:49 +01:00
Julian Hammer
2d16037c44 Merge branch 'master' of github.com:RRZE-HPC/OSACA 2020-03-21 17:18:37 +01:00
Julian Hammer
c5801cfe2f closing cache file 2020-03-21 17:18:04 +01:00
Julian Hammer
3e960dd4ac closing cache file 2020-03-20 15:02:30 +01:00
JanLJL
680774267d fixed wrong import of mm registers 2020-03-17 12:56:12 +01:00
JanLJL
1aa710f195 enhanced MachineModel to support mask/zeroing differentiation for instruction forms 2020-03-17 12:55:37 +01:00
49 changed files with 75330 additions and 75126 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
# OSACA specific files and folders
osaca/taxCalc/
*.*.pickle
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@@ -3,11 +3,13 @@ language: python
python:
- "3.5"
- "3.6"
# Python 3.7 not working yet
# - "3.7"
- "3.7"
- "3.8"
- "3.9"
before_install:
# - pip install tox-travis
- pip install codecov
- pip install bs4
- pip install pygraphviz
- pip install kerncraft
install:
@@ -24,7 +26,7 @@ deploy:
username: "__token__"
password:
secure: "fRRCETOwDkJ4pFacYZghPfCQ9mSsV4PlD3sTDp8rDHoCnebPjvFYc1tIdv+Wds0ae162KNUaj9GbxjK0MTGiRcy4pD08n7ufv8snmBQ2rtOLkj7RCRg1hw30WcMHjzqScFJgQcBrpjdPmR5AlesUufh6OadGvF1NspmVRWKr8ir3KQhmNV+itAliYoqaSTRTg1zC/znm+49l5gkzlLxd+mPj5/dtcc8vZ/i2M2+nNTTjDxq71q4Ddqv+bgZV1y7OZY2YuvjEDPflUbwc3fjOxpj891uMDHodsGmEHBu8WsLpF2tAO0C/x63S0jXamkV+/4cAQqQAwWr0Lby9/BjCfUwyUMOEgZ0S+z9WoFpBpQTQEfkD2JH/UFrv4CMnLFqgDkVMcx0vc/rT4Od8eJ5wOSG5+VdniJNOLpodFOXuKc09eJMk2lE9vk9OBrcsZ09UOTPTUCMZSIP4cBDxaIkx+RHQEy63TQdJZcElRBEWGEgj2e9hbiktvIoOvbFGQDscpz7ShBDklXIpu9hnxcKHtNDEjyywTUJmx7lTMILL05DPUnpUmnMb1Gyx5lbHzhSExc9re0cxEA354UUQKBS5HwHQcEBw9stMfsaForiBAUOocUKdGqlGP9cOXFoxdC9M+ff5FNstgbjPYSowb/JbATMlmCWKgH/bXXcTGCO10sk="
distributions: sdist
distributions: "sdist bdist_wheel"
skip_existing: true
cleanup: false
on:

View File

@@ -2,6 +2,8 @@ include README.rst
include LICENSE
include tox.ini
recursive-include osaca/data/ *.yml
recursive-include osaca/data/ *.pickle
include osaca/data/_build_cache.py
include examples/*
recursive-include tests *.py *.out
recursive-include tests/testfiles/ *

View File

@@ -10,8 +10,8 @@ Open Source Architecture Code Analyzer
For an innermost loop kernel in assembly, this tool allows automatic instruction fetching of assembly code and automatic runtime prediction including throughput analysis and detection for critical path and loop-carried dependencies.
.. image:: https://travis-ci.org/RRZE-HPC/OSACA.svg?branch=master
:target: https://travis-ci.org/RRZE-HPC/OSACA
.. image:: https://travis-ci.com/RRZE-HPC/OSACA.svg?branch=master
:target: https://travis-ci.com/github/RRZE-HPC/OSACA
:alt: Build Status
.. image:: https://codecov.io/github/RRZE-HPC/OSACA/coverage.svg?branch=master
@@ -57,8 +57,12 @@ Additional requirements are:
- `Python3 <https://www.python.org/>`__
- `Graphviz <https://www.graphviz.org/>`__ for dependency graph creation (minimal dependency is `libgraphviz-dev` on Ubuntu)
Optional requirements are:
- `Kerncraft <https://github.com/RRZE-HPC/kerncraft>`__ >=v0.8.4 for marker insertion
- `ibench <https://github.com/RRZE-HPC/ibench>`__ or `asmbench <https://github.com/RRZE-HPC/asmbench/>`__ for throughput/latency measurements
- `BeautifulSoup4 <https://www.crummy.com/software/BeautifulSoup/bs4/doc/>`__ for scraping instruction form information for the x86 ISA (experimental)
Design
======

View File

@@ -1,6 +1,6 @@
"""Open Source Architecture Code Analyzer"""
name = 'osaca'
__version__ = '0.3.3.dev0'
__version__ = '0.3.12'
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___

4
osaca/__main__.py Normal file
View File

@@ -0,0 +1,4 @@
#!/usr/bin/env python3
from .osaca import main
main()

View File

@@ -5,7 +5,7 @@ import sys
from io import StringIO
from osaca.frontend import Frontend
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel,
ArchSemantics, reduce_to_section)
@@ -29,7 +29,7 @@ class KerncraftAPI(object):
self.semantics = ArchSemantics(self.machine_model)
isa = self.machine_model.get_ISA().lower()
if isa == 'aarch64':
self.parser = ParserAArch64v81()
self.parser = ParserAArch64()
elif isa == 'x86':
self.parser = ParserX86ATT()

31
osaca/data/_build_cache.py Executable file
View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python3
from glob import glob
import os.path
import sys
sys.path[0:0] = ['../..']
failed = False
try:
from osaca.semantics.hw_model import MachineModel
except ModuleNotFoundError:
print("Unable to import MachineModel, probably some dependency is not yet installed. SKIPPING. "
"First run of OSACA may take a while to build caches, subsequent runs will be as fast as "
"ever.")
sys.exit()
print('Building cache: ', end='')
sys.stdout.flush()
# Iterating architectures
for f in glob(os.path.join(os.path.dirname(__file__), '*.yml')):
MachineModel(path_to_yaml=f)
print('.', end='')
sys.stdout.flush()
# Iterating ISAs
for f in glob(os.path.join(os.path.dirname(__file__), 'isa/*.yml')):
MachineModel(path_to_yaml=f)
print('+', end='')
sys.stdout.flush()
print()

1339
osaca/data/a64fx.yml Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -9,8 +9,8 @@ class MOVEntryBuilder:
port_occupancy = defaultdict(Fraction)
for uops, ports in port_pressure:
for p in ports:
port_occupancy[p] += Fraction(uops, len(ports))
return float(max(list(port_occupancy.values())+[0]))
port_occupancy[p] += Fraction(uops, len(ports))
return float(max(list(port_occupancy.values()) + [0]))
@staticmethod
def classify(operands_types):
@@ -18,10 +18,10 @@ class MOVEntryBuilder:
store = 'mem' in operands_types[-1:]
assert not (load and store), "Can not process a combined load-store instruction."
return load, store
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0, comment=None):
self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None
):
if comment:
comment = " # " + comment
else:
@@ -32,10 +32,7 @@ class MOVEntryBuilder:
if ot == 'imd':
description += ' - class: immediate\n imd: int\n'
elif ot.startswith('mem'):
description += (
' - class: memory\n'
' base: "*"\n'
' offset: "*"\n')
description += ' - class: memory\n' ' base: "*"\n' ' offset: "*"\n'
if ot == 'mem_simple':
description += ' index: ~\n'
elif ot == 'mem_complex':
@@ -45,18 +42,20 @@ class MOVEntryBuilder:
description += ' scale: "*"\n'
else:
description += ' - class: register\n name: {}\n'.format(ot)
description += (
' latency: {latency}\n'
' port_pressure: {port_pressure!r}\n'
' throughput: {throughput}\n'
' uops: {uops}\n').format(
latency=latency,
port_pressure=port_pressure,
throughput=self.compute_throughput(port_pressure),
uops=sum([i for i,p in port_pressure]))
' uops: {uops}\n'
).format(
latency=latency,
port_pressure=port_pressure,
throughput=self.compute_throughput(port_pressure),
uops=sum([i for i, p in port_pressure]),
)
return description
def parse_port_pressure(self, port_pressure_str):
"""
Example:
@@ -68,7 +67,7 @@ class MOVEntryBuilder:
cycles, ports = p.split('*p')
port_pressure.append([int(cycles), ports])
return port_pressure
def process_item(self, instruction_form, resources):
"""
Example:
@@ -84,9 +83,7 @@ class MOVEntryBuilder:
class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
# for SNB and IVB
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
load, store = self.classify(operand_types)
comment = None
@@ -100,15 +97,14 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
comment = "with store"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
self, instruction_name, operand_types, port_pressure, latency, comment
)
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
# for HSW, BDW, SKX and CSX
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
load, store = self.classify(operand_types)
if load:
@@ -116,7 +112,8 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
latency += 4
comment = "with load"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
self, instruction_name, operand_types, port_pressure, latency, comment
)
if store:
port_pressure_simple = port_pressure + [[1, '237'], [1, '4']]
operands_simple = ['mem_simple' if o == 'mem' else o for o in operand_types]
@@ -125,16 +122,28 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
latency += 0
return (
MOVEntryBuilder.build_description(
self, instruction_name, operands_simple, port_pressure_simple, latency,
"with store, simple AGU") +
'\n' +
MOVEntryBuilder.build_description(
self, instruction_name, operands_complex, port_pressure_complex, latency,
"with store, complex AGU"))
self,
instruction_name,
operands_simple,
port_pressure_simple,
latency,
"with store, simple AGU",
)
+ '\n'
+ MOVEntryBuilder.build_description(
self,
instruction_name,
operands_complex,
port_pressure_complex,
latency,
"with store, complex AGU",
)
)
# Register only:
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency)
self, instruction_name, operand_types, port_pressure, latency
)
np7 = MOVEntryBuilderIntelNoPort7AGU()
@@ -149,7 +158,6 @@ snb_mov_instructions = [
('mov imd gpr', ('1*p015', 1)),
('mov imd mem', ('', 0)),
('movabs imd gpr', ('1*p015', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('1*p5', 1)),
('movapd xmm mem', ('', 0)),
@@ -160,7 +168,6 @@ snb_mov_instructions = [
('vmovapd ymm ymm', ('1*p5', 1)),
('vmovapd ymm mem', ('', 0)),
('vmovapd mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('1*p5', 1)),
('movaps xmm mem', ('', 0)),
@@ -171,7 +178,6 @@ snb_mov_instructions = [
('vmovaps ymm ymm', ('1*p5', 1)),
('movaps ymm mem', ('', 0)),
('movaps mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movd:movq
('movd gpr mm', ('1*p5', 1)),
('movd mem mm', ('', 0)),
@@ -197,7 +203,6 @@ snb_mov_instructions = [
('vmovd xmm mem', ('', 0)),
('vmovq xmm gpr', ('1*p0', 1)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movddup
('movddup xmm xmm', ('1*p5', 1)),
('movddup mem xmm', ('', 0)),
@@ -205,10 +210,8 @@ snb_mov_instructions = [
('vmovddup mem xmm', ('', 0)),
('vmovddup ymm ymm', ('1*p5', 1)),
('vmovddup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdq2q
('movdq2q xmm mm', ('1*p015+1*p5', 1)),
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('1*p015', 1)),
('movdqa mem xmm', ('', 0)),
@@ -219,7 +222,6 @@ snb_mov_instructions = [
('vmovdqa ymm ymm', ('1*p05', 1)),
('vmovdqa mem ymm', ('', 0)),
('vmovdqa ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('1*p015', 1)),
('movdqu mem xmm', ('', 0)),
@@ -230,75 +232,60 @@ snb_mov_instructions = [
('vmovdqu ymm ymm', ('1*p05', 1)),
('vmovdqu mem ymm', ('', 0)),
('vmovdqu ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movhlps
('movhlps xmm xmm', ('1*p5', 1)),
('vmovhlps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movhpd
('movhpd mem xmm', ('1*p5', 1)),
('vmovhpd mem xmm xmm', ('1*p5', 1)),
('movhpd xmm mem', ('', 0)),
('vmovhpd mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movhps
('movhps mem xmm', ('1*p5', 1)),
('vmovhps mem xmm xmm', ('1*p5', 1)),
('movhps xmm mem', ('', 0)),
('vmovhps mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movlhps
('movlhps xmm xmm', ('1*p5', 1)),
('vmovlhps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movlpd
('movlpd mem xmm', ('1*p5', 1)),
('vmovlpd mem xmm xmm', ('1*p5', 1)),
('movlpd xmm mem', ('', 0)),
('vmovlpd mem xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movlps
('movlps mem xmm', ('1*p5', 1)),
('vmovlps mem xmm xmm', ('1*p5', 1)),
('movlps xmm mem', ('', 0)),
('vmovlps mem xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 2)),
('vmovmskpd xmm gpr', ('1*p0', 2)),
('vmovmskpd ymm gpr', ('1*p0', 2)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 1)),
('vmovmskps xmm gpr', ('1*p0', 1)),
('vmovmskps ymm gpr', ('1*p0', 1)),
# https://www.felixcloutier.com/x86/movntdq
('movntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('movntdqa mem xmm', ('', 0)),
('vmovntdqa mem xmm', ('', 0)),
('vmovntdqa mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movnti
('movnti gpr mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntpd
('movntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('movntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntq
('movntq mm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq
('movq mm mm', ('', 0)),
('movq mem mm', ('', 0)),
@@ -309,14 +296,11 @@ snb_mov_instructions = [
('vmovq xmm xmm', ('1*p015', 1)),
('vmovq mem xmm', ('', 0)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p015', 1)),
# https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq
# TODO combined load-store is currently not supported
# ('movs mem mem', ()),
# https://www.felixcloutier.com/x86/movsd
('movsd xmm xmm', ('1*p5', 1)),
('movsd mem xmm', ('', 0)),
@@ -324,7 +308,6 @@ snb_mov_instructions = [
('vmovsd xmm xmm xmm', ('1*p5', 1)),
('vmovsd mem xmm', ('', 0)),
('vmovsd xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movshdup
('movshdup xmm xmm', ('1*p5', 1)),
('movshdup mem xmm', ('', 0)),
@@ -332,7 +315,6 @@ snb_mov_instructions = [
('vmovshdup mem xmm', ('', 0)),
('vmovshdup ymm ymm', ('1*p5', 1)),
('vmovshdup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movsldup
('movsldup xmm xmm', ('1*p5', 1)),
('movsldup mem xmm', ('', 0)),
@@ -340,7 +322,6 @@ snb_mov_instructions = [
('vmovsldup mem xmm', ('', 0)),
('vmovsldup ymm ymm', ('1*p5', 1)),
('vmovsldup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movss
('movss xmm xmm', ('1*p5', 1)),
('movss mem xmm', ('', 0)),
@@ -349,7 +330,6 @@ snb_mov_instructions = [
('vmovss xmm xmm', ('1*p5', 1)),
('vmovss xmm mem', ('', 0)),
('movss mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p015', 1)),
('movsx mem gpr', ('', 0)),
@@ -363,7 +343,6 @@ snb_mov_instructions = [
('movsl mem gpr', ('', 0)), # AT&T version
('movsq gpr gpr', ('1*p015', 1)), # AT&T version
('movsq mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('1*p5', 1)),
('movupd mem xmm', ('', 0)),
@@ -374,7 +353,6 @@ snb_mov_instructions = [
('vmovupd ymm ymm', ('1*p5', 1)),
('vmovupd mem ymm', ('', 0)),
('vmovupd ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movups
('movups xmm xmm', ('1*p5', 1)),
('movups mem xmm', ('', 0)),
@@ -385,7 +363,6 @@ snb_mov_instructions = [
('vmovups ymm ymm', ('1*p5', 1)),
('vmovups mem ymm', ('', 0)),
('vmovups ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p015', 1)),
('movzx mem gpr', ('', 0)),
@@ -397,7 +374,6 @@ snb_mov_instructions = [
('movzl mem gpr', ('', 0)), # AT&T version
('movzq gpr gpr', ('1*p015', 1)), # AT&T version
('movzq mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p015+2*p05', 2)),
('cmova mem gpr', ('1*p015+2*p05', 2)),
@@ -459,12 +435,10 @@ snb_mov_instructions = [
('cmovs mem gpr', ('1*p015+1*p05', 2)),
('cmovz gpr gpr', ('1*p015+1*p05', 2)),
('cmovz mem gpr', ('1*p015+1*p05', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 2)),
('pmovmskb xmm gpr', ('1*p0', 2)),
('vpmovmskb xmm gpr', ('1*p0', 2)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p15', 1)),
('pmovsxbw mem xmm', ('1*p15', 1)),
@@ -484,7 +458,6 @@ snb_mov_instructions = [
('vpmovsxbd mem ymm', ('1*p15', 1)),
('vpmovsxbq ymm ymm', ('1*p15', 1)),
('vpmovsxbq mem ymm', ('1*p15', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p15', 1)),
('pmovzxbw mem xmm', ('1*p15', 1)),
@@ -494,307 +467,294 @@ snb_mov_instructions = [
('vpmovzxbw mem ymm', ('1*p15', 1)),
]
ivb_mov_instructions = list(OrderedDict(snb_mov_instructions + [
# https://www.felixcloutier.com/x86/mov
('mov gpr gpr', ('', 0)),
('mov imd gpr', ('', 0)),
ivb_mov_instructions = list(
OrderedDict(
snb_mov_instructions
+ [
# https://www.felixcloutier.com/x86/mov
('mov gpr gpr', ('', 0)),
('mov imd gpr', ('', 0)),
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('', 0)),
('vmovapd xmm xmm', ('', 0)),
('vmovapd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('', 0)),
('vmovaps xmm xmm', ('', 0)),
('vmovaps ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('', 0)),
('vmovdqa xmm xmm', ('', 0)),
('vmovdqa ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('', 0)),
('vmovdqu xmm xmm', ('', 0)),
('vmovdqu ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('', 0)),
('vmovupd xmm xmm', ('', 0)),
('vmovupd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movups xmm xmm', ('', 0)),
('vmovups xmm xmm', ('', 0)),
('vmovups ymm ymm', ('', 0)),
]
).items()
)
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('', 0)),
('vmovapd xmm xmm', ('', 0)),
('vmovapd ymm ymm', ('', 0)),
hsw_mov_instructions = list(
OrderedDict(
ivb_mov_instructions
+ [
# https://www.felixcloutier.com/x86/mov
('mov imd gpr', ('1*p0156', 1)),
('mov gpr gpr', ('1*p0156', 1)),
('movabs imd gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 6)),
('movbe mem gpr', ('1*p15', 6)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 3)),
('vmovmskps xmm gpr', ('1*p0', 3)),
('vmovmskps ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p0156', 1)),
('movsb gpr gpr', ('1*p0156', 1)), # AT&T version
('movsw gpr gpr', ('1*p0156', 1)), # AT&T version
('movsl gpr gpr', ('1*p0156', 1)), # AT&T version
('movsq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p0156', 1)),
('movzb gpr gpr', ('1*p0156', 1)), # AT&T version
('movzw gpr gpr', ('1*p0156', 1)), # AT&T version
('movzl gpr gpr', ('1*p0156', 1)), # AT&T version
('movzq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
('cmova mem gpr', ('1*p0156+2*p06', 2)),
('cmovae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovae mem gpr', ('1*p0156+2*p06', 2)),
('cmovb gpr gpr', ('1*p0156+2*p06', 2)),
('cmovb mem gpr', ('1*p0156+1*p06', 2)),
('cmovbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovc mem gpr', ('1*p0156+1*p06', 2)),
('cmove gpr gpr', ('1*p0156+1*p06', 2)),
('cmove mem gpr', ('1*p0156+1*p06', 2)),
('cmovg gpr gpr', ('1*p0156+1*p06', 2)),
('cmovg mem gpr', ('1*p0156+1*p06', 2)),
('cmovge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovge mem gpr', ('1*p0156+1*p06', 2)),
('cmovl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovl mem gpr', ('1*p0156+1*p06', 2)),
('cmovle gpr gpr', ('1*p0156+1*p06', 2)),
('cmovle mem gpr', ('1*p0156+1*p06', 2)),
('cmovna gpr gpr', ('1*p0156+2*p06', 2)),
('cmovna mem gpr', ('1*p0156+2*p06', 2)),
('cmovnae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnae mem gpr', ('1*p0156+1*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovnbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnc mem gpr', ('1*p0156+1*p06', 2)),
('cmovne gpr gpr', ('1*p0156+1*p06', 2)),
('cmovne mem gpr', ('1*p0156+1*p06', 2)),
('cmovng gpr gpr', ('1*p0156+1*p06', 2)),
('cmovng mem gpr', ('1*p0156+1*p06', 2)),
('cmovnge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnge mem gpr', ('1*p0156+1*p06', 2)),
('cmovnl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnl mem gpr', ('1*p0156+1*p06', 2)),
('cmovno gpr gpr', ('1*p0156+1*p06', 2)),
('cmovno mem gpr', ('1*p0156+1*p06', 2)),
('cmovnp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnp mem gpr', ('1*p0156+1*p06', 2)),
('cmovns gpr gpr', ('1*p0156+1*p06', 2)),
('cmovns mem gpr', ('1*p0156+1*p06', 2)),
('cmovnz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnz mem gpr', ('1*p0156+1*p06', 2)),
('cmovo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovo mem gpr', ('1*p0156+1*p06', 2)),
('cmovp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovp mem gpr', ('1*p0156+1*p06', 2)),
('cmovpe gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpe mem gpr', ('1*p0156+1*p06', 2)),
('cmovpo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpo mem gpr', ('1*p0156+1*p06', 2)),
('cmovs gpr gpr', ('1*p0156+1*p06', 2)),
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 3)),
('pmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p5', 1)),
('pmovsxbw mem xmm', ('1*p5', 1)),
('pmovsxbd xmm xmm', ('1*p5', 1)),
('pmovsxbd mem xmm', ('1*p5', 1)),
('pmovsxbq xmm xmm', ('1*p5', 1)),
('pmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw xmm xmm', ('1*p5', 1)),
('vpmovsxbw mem xmm', ('1*p5', 1)),
('vpmovsxbd xmm xmm', ('1*p5', 1)),
('vpmovsxbd mem xmm', ('1*p5', 1)),
('vpmovsxbq xmm xmm', ('1*p5', 1)),
('vpmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw ymm ymm', ('1*p5', 1)),
('vpmovsxbw mem ymm', ('1*p5', 1)),
('vpmovsxbd ymm ymm', ('1*p5', 1)),
('vpmovsxbd mem ymm', ('1*p5', 1)),
('vpmovsxbq ymm ymm', ('1*p5', 1)),
('vpmovsxbq mem ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p5', 1)),
('pmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw xmm xmm', ('1*p5', 1)),
('vpmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw ymm ymm', ('1*p5', 1)),
('vpmovzxbw mem ymm', ('1*p5', 1)),
]
).items()
)
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('', 0)),
('vmovaps xmm xmm', ('', 0)),
('vmovaps ymm ymm', ('', 0)),
bdw_mov_instructions = list(
OrderedDict(
hsw_mov_instructions
+ [
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('2*p06', 1)),
('cmova mem gpr', ('2*p06', 1)),
('cmovae gpr gpr', ('1*p06', 1)),
('cmovae mem gpr', ('2*p06', 1)),
('cmovb gpr gpr', ('2*p06', 1)),
('cmovb mem gpr', ('1*p06', 1)),
('cmovbe gpr gpr', ('2*p06', 1)),
('cmovbe mem gpr', ('2*p06', 1)),
('cmovc gpr gpr', ('1*p06', 1)),
('cmovc mem gpr', ('1*p06', 1)),
('cmove gpr gpr', ('1*p06', 1)),
('cmove mem gpr', ('1*p06', 1)),
('cmovg gpr gpr', ('1*p06', 1)),
('cmovg mem gpr', ('1*p06', 1)),
('cmovge gpr gpr', ('1*p06', 1)),
('cmovge mem gpr', ('1*p06', 1)),
('cmovl gpr gpr', ('1*p06', 1)),
('cmovl mem gpr', ('1*p06', 1)),
('cmovle gpr gpr', ('1*p06', 1)),
('cmovle mem gpr', ('1*p06', 1)),
('cmovna gpr gpr', ('2*p06', 1)),
('cmovna mem gpr', ('2*p06', 1)),
('cmovnae gpr gpr', ('1*p06', 1)),
('cmovnae mem gpr', ('1*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnbe gpr gpr', ('2*p06', 1)),
('cmovnbe mem gpr', ('2*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnc gpr gpr', ('1*p06', 1)),
('cmovnc mem gpr', ('1*p06', 1)),
('cmovne gpr gpr', ('1*p06', 1)),
('cmovne mem gpr', ('1*p06', 1)),
('cmovng gpr gpr', ('1*p06', 1)),
('cmovng mem gpr', ('1*p06', 1)),
('cmovnge gpr gpr', ('1*p06', 1)),
('cmovnge mem gpr', ('1*p06', 1)),
('cmovnl gpr gpr', ('1*p06', 1)),
('cmovnl mem gpr', ('1*p06', 1)),
('cmovno gpr gpr', ('1*p06', 1)),
('cmovno mem gpr', ('1*p06', 1)),
('cmovnp gpr gpr', ('1*p06', 1)),
('cmovnp mem gpr', ('1*p06', 1)),
('cmovns gpr gpr', ('1*p06', 1)),
('cmovns mem gpr', ('1*p06', 1)),
('cmovnz gpr gpr', ('1*p06', 1)),
('cmovnz mem gpr', ('1*p06', 1)),
('cmovo gpr gpr', ('1*p06', 1)),
('cmovo mem gpr', ('1*p06', 1)),
('cmovp gpr gpr', ('1*p06', 1)),
('cmovp mem gpr', ('1*p06', 1)),
('cmovpe gpr gpr', ('1*p06', 1)),
('cmovpe mem gpr', ('1*p06', 1)),
('cmovpo gpr gpr', ('1*p06', 1)),
('cmovpo mem gpr', ('1*p06', 1)),
('cmovs gpr gpr', ('1*p06', 1)),
('cmovs mem gpr', ('1*p06', 1)),
('cmovz gpr gpr', ('1*p06', 1)),
('cmovz mem gpr', ('1*p06', 1)),
]
).items()
)
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('', 0)),
('vmovdqa xmm xmm', ('', 0)),
('vmovdqa ymm ymm', ('', 0)),
skx_mov_instructions = list(
OrderedDict(
bdw_mov_instructions
+ [
# https://www.felixcloutier.com/x86/movapd
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movapd xmm xmm', ('1*p5', 1)),
# ('vmovapd xmm xmm', ('1*p5', 1)),
# ('vmovapd ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movaps
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movaps xmm xmm', ('1*p5', 1)),
# ('vmovaps xmm xmm', ('1*p5', 1)),
# ('vmovaps ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 4)),
('movbe mem gpr', ('1*p15', 4)),
# https://www.felixcloutier.com/x86/movddup
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
# TODO with masking!
# https://www.felixcloutier.com/x86/movntdq
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('vmovntdqa mem zmm', ('', 0)),
# https://www.felixcloutier.com/x86/movntpd
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
# https://www.felixcloutier.com/x86/movsd
# TODO with masking!
# https://www.felixcloutier.com/x86/movshdup
# TODO with masking!
# https://www.felixcloutier.com/x86/movsldup
# TODO with masking!
# https://www.felixcloutier.com/x86/movss
# TODO with masking!
# https://www.felixcloutier.com/x86/movupd
# TODO with masking!
# https://www.felixcloutier.com/x86/movups
# TODO with masking!
# https://www.felixcloutier.com/x86/pmovsx
# TODO with masking!
('vpmovsxbw ymm zmm', ('1*p5', 3)),
('vpmovsxbw mem zmm', ('1*p5', 1)),
]
).items()
)
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('', 0)),
('vmovdqu xmm xmm', ('', 0)),
('vmovdqu ymm ymm', ('', 0)),
csx_mov_instructions = OrderedDict(skx_mov_instructions + []).items()
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('', 0)),
('vmovupd xmm xmm', ('', 0)),
('vmovupd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movups xmm xmm', ('', 0)),
('vmovups xmm xmm', ('', 0)),
('vmovups ymm ymm', ('', 0)),
]).items())
hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
# https://www.felixcloutier.com/x86/mov
('mov imd gpr', ('1*p0156', 1)),
('mov gpr gpr', ('1*p0156', 1)),
('movabs imd gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 6)),
('movbe mem gpr', ('1*p15', 6)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 3)),
('vmovmskps xmm gpr', ('1*p0', 3)),
('vmovmskps ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p0156', 1)),
('movsb gpr gpr', ('1*p0156', 1)), # AT&T version
('movsw gpr gpr', ('1*p0156', 1)), # AT&T version
('movsl gpr gpr', ('1*p0156', 1)), # AT&T version
('movsq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p0156', 1)),
('movzb gpr gpr', ('1*p0156', 1)), # AT&T version
('movzw gpr gpr', ('1*p0156', 1)), # AT&T version
('movzl gpr gpr', ('1*p0156', 1)), # AT&T version
('movzq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
('cmova mem gpr', ('1*p0156+2*p06', 2)),
('cmovae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovae mem gpr', ('1*p0156+2*p06', 2)),
('cmovb gpr gpr', ('1*p0156+2*p06', 2)),
('cmovb mem gpr', ('1*p0156+1*p06', 2)),
('cmovbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovc mem gpr', ('1*p0156+1*p06', 2)),
('cmove gpr gpr', ('1*p0156+1*p06', 2)),
('cmove mem gpr', ('1*p0156+1*p06', 2)),
('cmovg gpr gpr', ('1*p0156+1*p06', 2)),
('cmovg mem gpr', ('1*p0156+1*p06', 2)),
('cmovge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovge mem gpr', ('1*p0156+1*p06', 2)),
('cmovl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovl mem gpr', ('1*p0156+1*p06', 2)),
('cmovle gpr gpr', ('1*p0156+1*p06', 2)),
('cmovle mem gpr', ('1*p0156+1*p06', 2)),
('cmovna gpr gpr', ('1*p0156+2*p06', 2)),
('cmovna mem gpr', ('1*p0156+2*p06', 2)),
('cmovnae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnae mem gpr', ('1*p0156+1*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovnbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnc mem gpr', ('1*p0156+1*p06', 2)),
('cmovne gpr gpr', ('1*p0156+1*p06', 2)),
('cmovne mem gpr', ('1*p0156+1*p06', 2)),
('cmovng gpr gpr', ('1*p0156+1*p06', 2)),
('cmovng mem gpr', ('1*p0156+1*p06', 2)),
('cmovnge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnge mem gpr', ('1*p0156+1*p06', 2)),
('cmovnl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnl mem gpr', ('1*p0156+1*p06', 2)),
('cmovno gpr gpr', ('1*p0156+1*p06', 2)),
('cmovno mem gpr', ('1*p0156+1*p06', 2)),
('cmovnp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnp mem gpr', ('1*p0156+1*p06', 2)),
('cmovns gpr gpr', ('1*p0156+1*p06', 2)),
('cmovns mem gpr', ('1*p0156+1*p06', 2)),
('cmovnz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnz mem gpr', ('1*p0156+1*p06', 2)),
('cmovo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovo mem gpr', ('1*p0156+1*p06', 2)),
('cmovp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovp mem gpr', ('1*p0156+1*p06', 2)),
('cmovpe gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpe mem gpr', ('1*p0156+1*p06', 2)),
('cmovpo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpo mem gpr', ('1*p0156+1*p06', 2)),
('cmovs gpr gpr', ('1*p0156+1*p06', 2)),
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 3)),
('pmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p5', 1)),
('pmovsxbw mem xmm', ('1*p5', 1)),
('pmovsxbd xmm xmm', ('1*p5', 1)),
('pmovsxbd mem xmm', ('1*p5', 1)),
('pmovsxbq xmm xmm', ('1*p5', 1)),
('pmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw xmm xmm', ('1*p5', 1)),
('vpmovsxbw mem xmm', ('1*p5', 1)),
('vpmovsxbd xmm xmm', ('1*p5', 1)),
('vpmovsxbd mem xmm', ('1*p5', 1)),
('vpmovsxbq xmm xmm', ('1*p5', 1)),
('vpmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw ymm ymm', ('1*p5', 1)),
('vpmovsxbw mem ymm', ('1*p5', 1)),
('vpmovsxbd ymm ymm', ('1*p5', 1)),
('vpmovsxbd mem ymm', ('1*p5', 1)),
('vpmovsxbq ymm ymm', ('1*p5', 1)),
('vpmovsxbq mem ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p5', 1)),
('pmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw xmm xmm', ('1*p5', 1)),
('vpmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw ymm ymm', ('1*p5', 1)),
('vpmovzxbw mem ymm', ('1*p5', 1)),
]).items())
bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('2*p06', 1)),
('cmova mem gpr', ('2*p06', 1)),
('cmovae gpr gpr', ('1*p06', 1)),
('cmovae mem gpr', ('2*p06', 1)),
('cmovb gpr gpr', ('2*p06', 1)),
('cmovb mem gpr', ('1*p06', 1)),
('cmovbe gpr gpr', ('2*p06', 1)),
('cmovbe mem gpr', ('2*p06', 1)),
('cmovc gpr gpr', ('1*p06', 1)),
('cmovc mem gpr', ('1*p06', 1)),
('cmove gpr gpr', ('1*p06', 1)),
('cmove mem gpr', ('1*p06', 1)),
('cmovg gpr gpr', ('1*p06', 1)),
('cmovg mem gpr', ('1*p06', 1)),
('cmovge gpr gpr', ('1*p06', 1)),
('cmovge mem gpr', ('1*p06', 1)),
('cmovl gpr gpr', ('1*p06', 1)),
('cmovl mem gpr', ('1*p06', 1)),
('cmovle gpr gpr', ('1*p06', 1)),
('cmovle mem gpr', ('1*p06', 1)),
('cmovna gpr gpr', ('2*p06', 1)),
('cmovna mem gpr', ('2*p06', 1)),
('cmovnae gpr gpr', ('1*p06', 1)),
('cmovnae mem gpr', ('1*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnbe gpr gpr', ('2*p06', 1)),
('cmovnbe mem gpr', ('2*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnc gpr gpr', ('1*p06', 1)),
('cmovnc mem gpr', ('1*p06', 1)),
('cmovne gpr gpr', ('1*p06', 1)),
('cmovne mem gpr', ('1*p06', 1)),
('cmovng gpr gpr', ('1*p06', 1)),
('cmovng mem gpr', ('1*p06', 1)),
('cmovnge gpr gpr', ('1*p06', 1)),
('cmovnge mem gpr', ('1*p06', 1)),
('cmovnl gpr gpr', ('1*p06', 1)),
('cmovnl mem gpr', ('1*p06', 1)),
('cmovno gpr gpr', ('1*p06', 1)),
('cmovno mem gpr', ('1*p06', 1)),
('cmovnp gpr gpr', ('1*p06', 1)),
('cmovnp mem gpr', ('1*p06', 1)),
('cmovns gpr gpr', ('1*p06', 1)),
('cmovns mem gpr', ('1*p06', 1)),
('cmovnz gpr gpr', ('1*p06', 1)),
('cmovnz mem gpr', ('1*p06', 1)),
('cmovo gpr gpr', ('1*p06', 1)),
('cmovo mem gpr', ('1*p06', 1)),
('cmovp gpr gpr', ('1*p06', 1)),
('cmovp mem gpr', ('1*p06', 1)),
('cmovpe gpr gpr', ('1*p06', 1)),
('cmovpe mem gpr', ('1*p06', 1)),
('cmovpo gpr gpr', ('1*p06', 1)),
('cmovpo mem gpr', ('1*p06', 1)),
('cmovs gpr gpr', ('1*p06', 1)),
('cmovs mem gpr', ('1*p06', 1)),
('cmovz gpr gpr', ('1*p06', 1)),
('cmovz mem gpr', ('1*p06', 1)),
]).items())
skx_mov_instructions = list(OrderedDict(bdw_mov_instructions + [
# https://www.felixcloutier.com/x86/movapd
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movapd xmm xmm', ('1*p5', 1)),
# ('vmovapd xmm xmm', ('1*p5', 1)),
# ('vmovapd ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movaps
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movaps xmm xmm', ('1*p5', 1)),
# ('vmovaps xmm xmm', ('1*p5', 1)),
# ('vmovaps ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 4)),
('movbe mem gpr', ('1*p15', 4)),
# https://www.felixcloutier.com/x86/movddup
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
# TODO with masking!
# https://www.felixcloutier.com/x86/movntdq
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('vmovntdqa mem zmm', ('', 0)),
# https://www.felixcloutier.com/x86/movntpd
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
# https://www.felixcloutier.com/x86/movsd
# TODO with masking!
# https://www.felixcloutier.com/x86/movshdup
# TODO with masking!
# https://www.felixcloutier.com/x86/movsldup
# TODO with masking!
# https://www.felixcloutier.com/x86/movss
# TODO with masking!
# https://www.felixcloutier.com/x86/movupd
# TODO with masking!
# https://www.felixcloutier.com/x86/movups
# TODO with masking!
# https://www.felixcloutier.com/x86/pmovsx
# TODO with masking!
('vpmovsxbw ymm zmm', ('1*p5', 3)),
('vpmovsxbw mem zmm', ('1*p5', 1)),
]).items())
csx_mov_instructions = OrderedDict(skx_mov_instructions + [
]).items()
def get_description(arch, rhs_comment=None):
descriptions = {
@@ -803,7 +763,7 @@ def get_description(arch, rhs_comment=None):
'hsw': '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions]),
'bdw': '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions]),
'skx': '\n'.join([p7.process_item(*item) for item in skx_mov_instructions]),
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions])
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]),
}
description = descriptions[arch]
@@ -813,20 +773,21 @@ def get_description(arch, rhs_comment=None):
commented_description = ""
for l in descriptions[arch].split('\n'):
commented_description += ("{:<"+str(max_length)+"} # {}\n").format(l, rhs_comment)
commented_description += ("{:<" + str(max_length) + "} # {}\n").format(l, rhs_comment)
description = commented_description
return description
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx)".format(sys.argv[0]))
sys.exit(0)
try:
print(get_description(sys.argv[1], rhs_comment=' '.join(sys.argv)))
except KeyError:
print("Unknown architecture.")
sys.exit(1)

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: Intel Haswell
arch_code: HSW
isa: x86

36318
osaca/data/icl.yml Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,52 +1,35 @@
osaca_version: 0.3.0
osaca_version: 0.3.7
isa: "AArch64"
# Contains all operand-irregular instruction forms OSACA supports for AArch64.
# Operand-regular for a AArch64 instruction form with N operands in the shape of
# mnemonic op1 ... opN
# means that op1 is the only destination operand and op2 to op(N) are source operands.
instruction_forms:
- name: "fmla"
- name: fmla
operands:
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: true
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: false
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: false
- name: "fmla"
- name: ldp
operands:
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: true
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: false
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "memory"
@@ -54,18 +37,14 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
pre-indexed: "*"
post-indexed: "*"
source: true
destination: false
- name: "ldp"
- name: [ldr, ldur]
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
prefix: "*"
source: false
destination: true
- class: "memory"
@@ -73,90 +52,18 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
pre-indexed: "*"
post-indexed: "*"
source: true
destination: false
- name: "ldp"
- name: stp
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
source: true
destination: true
- name: "stp"
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "d"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
source: false
destination: true
- name: "stp"
operands:
- class: "register"
prefix: "q"
source: true
destination: false
- class: "register"
prefix: "q"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
source: false
destination: true
- name: "str"
operands:
- class: "register"
prefix: "x"
prefix: "*"
source: true
destination: false
- class: "memory"
@@ -168,10 +75,10 @@ instruction_forms:
post-indexed: "*"
source: false
destination: true
- name: "str"
- name: [str, stur]
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: true
destination: false
- class: "memory"
@@ -183,48 +90,73 @@ instruction_forms:
post-indexed: "*"
source: false
destination: true
- name: "str"
- name: cmp
operands:
- class: "register"
prefix: "q"
prefix: "*"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "stur"
- class: "register"
prefix: "*"
source: true
destination: false
- name: cmp
operands:
- class: "register"
prefix: "q"
prefix: "*"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "stur"
- class: "immediate"
imd: "int"
source: true
destination: false
- name: cmn
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- name: cmn
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "immediate"
imd: "int"
source: true
destination: false
- name: fcmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- name: fcmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "immediate"
imd: "double"
source: true
destination: false
- name: fcmp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "immediate"
imd: "float"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
import os.path
import argparse
import os.path
import sys
import xml.etree.ElementTree as ET
from distutils.version import StrictVersion
@@ -8,8 +8,23 @@ from distutils.version import StrictVersion
from osaca.parser import get_parser
from osaca.semantics import MachineModel
intel_archs = ['CON', 'WOL', 'NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL',
'CNL', 'ICL']
intel_archs = [
'CON',
'WOL',
'NHM',
'WSM',
'SNB',
'IVB',
'HSW',
'BDW',
'SKL',
'SKX',
'KBL',
'CFL',
'CNL',
'ICL',
]
amd_archs = ['ZEN1', 'ZEN+', 'ZEN2']
def port_pressure_from_tag_attributes(attrib):
@@ -19,6 +34,7 @@ def port_pressure_from_tag_attributes(attrib):
for p in attrib['ports'].split('+'):
cycles, ports = p.split('*')
ports = ports.lstrip('p')
ports = ports.lstrip('FP')
port_occupation.append([int(cycles), ports])
# Also consider div on DIV pipeline
@@ -88,10 +104,10 @@ def extract_paramters(instruction_tag, parser, isa):
return parameters
def extract_model(tree, arch):
def extract_model(tree, arch, skip_mem=True):
try:
isa = MachineModel.get_isa_for_arch(arch)
except:
except Exception:
print("Skipping...", file=sys.stderr)
return None
mm = MachineModel(isa=isa)
@@ -101,6 +117,7 @@ def extract_model(tree, arch):
ignore = False
mnemonic = instruction_tag.attrib['asm']
iform = instruction_tag.attrib['iform']
# skip any mnemonic which contain spaces (e.g., "REX CRC32")
if ' ' in mnemonic:
continue
@@ -118,6 +135,26 @@ def extract_model(tree, arch):
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
if arch_tag is None:
continue
# skip any instructions without port utilization
if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]):
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
continue
# skip if computed and measured TP don't match
if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][
0
]:
print(
"Calculated TP from port utilization doesn't match TP, skip: ",
iform,
file=sys.stderr,
)
continue
# skip if instruction contains memory operand
if skip_mem and any(
[x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')]
):
print("Contains memory operand, skip: ", iform, file=sys.stderr)
continue
# We collect all measurement and IACA information and compare them later
for measurement_tag in arch_tag.iter('measurement'):
if 'TP_ports' in measurement_tag.attrib:
@@ -143,10 +180,14 @@ def extract_model(tree, arch):
if 'max_cycles' in l_tag.attrib
]
if latencies[1:] != latencies[:-1]:
print("Contradicting latencies found, using first:", mnemonic, latencies,
file=sys.stderr)
print(
"Contradicting latencies found, using smallest:",
iform,
latencies,
file=sys.stderr,
)
if latencies:
latency = latencies[0]
latency = min(latencies)
if ignore:
continue
@@ -160,16 +201,14 @@ def extract_model(tree, arch):
# Check if all are equal
if port_pressure:
if port_pressure[1:] != port_pressure[:-1]:
print(
"Contradicting port occupancies, using latest IACA:",
mnemonic, file=sys.stderr)
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
port_pressure = port_pressure[-1]
else:
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
continue
# Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
if arch.upper() in intel_archs and not arch.upper() in ['ICL']:
if arch.upper() in intel_archs and not arch.upper() in ['ICL']:
if any([p['class'] == 'memory' for p in parameters]):
# We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
# TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
@@ -180,10 +219,16 @@ def extract_model(tree, arch):
port_23 = True
if '4' in pp[1]:
port_4 = True
# Add (1, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
# Add (X, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
# X = 2 on SNB and IVB IFF used in combination with ymm register, otherwise X = 1
if arch.upper() in ['SNB', 'IVB'] and \
any([p['class'] == 'register' and p['name'] == 'ymm' for p in parameters]):
data_port_throughput = 2
else:
data_port_throughput = 1
if port_23 and not port_4:
port_pressure.append((1, ['2D', '3D']))
port_pressure.append((data_port_throughput, ['2D', '3D']))
# Add missing ports:
for ports in [pp[1] for pp in port_pressure]:
for p in ports:
@@ -201,7 +246,7 @@ def rhs_comment(uncommented_string, comment):
commented_string = ""
for l in uncommented_string.split('\n'):
commented_string += ("{:<"+str(max_length)+"} # {}\n").format(l, comment)
commented_string += ("{:<" + str(max_length) + "} # {}\n").format(l, comment)
return commented_string
@@ -218,21 +263,33 @@ def main():
help='architecture to extract, use IACA abbreviations (e.g., SNB). '
'if not given, all will be extracted and saved to file in CWD.',
)
parser.add_argument(
'--mem',
dest='skip_mem',
action='store_false',
help='add instruction forms including memory addressing operands, which are '
'skipped by default'
)
args = parser.parse_args()
basename = os.path.basename(__file__)
tree = ET.parse(args.xml)
print('Available architectures:', ', '.join(architectures(tree)))
print('# Available architectures:', ', '.join(architectures(tree)))
if args.arch:
model = extract_model(tree, args.arch)
print('# Chosen architecture: {}'.format(args.arch))
model = extract_model(tree, args.arch, args.skip_mem)
if model is not None:
print(rhs_comment(model.dump(), basename+" "+sys.argv[0]))
print(
rhs_comment(
model.dump(), "uops.info import"
)
)
else:
for arch in architectures(tree):
print(arch, end='')
model = extract_model(tree, arch.lower())
model = extract_model(tree, arch.lower(), args.skip_mem)
if model:
model_string = rhs_comment(model.dump(), basename+" "+arch)
model_string = rhs_comment(model.dump(), basename + " " + arch)
with open('{}.yml'.format(arch.lower()), 'w') as f:
f.write(model_string)

771
osaca/data/n1.yml Normal file
View File

@@ -0,0 +1,771 @@
osaca_version: 0.3.4
micro_architecture: Arm Neoverse N1
arch_code: n1
isa: AArch64
ROB_size: 128 # wikichip
retired_uOps_per_cycle: 8 # wikichip
scheduler_size: 120 # wikichip
hidden_loads: false
load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 5.0, q: 6.0, v: 5.0, z: 4.0}
load_throughput:
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
load_throughput_default: [[1, '67']]
store_throughput: []
store_throughput_default: [[1, '56'], [1, '67']]
ports: ['0', '1', '2', '3', '4', '4DV', '5', '6', '7']
port_model_scheme: |
+----------------------------------------------------------------------------+
| 120 entries |
+----------------------------------------------------------------------------+
0 |BR 1 |IS0 2 |IS1 3 |IM0 4 |FP0 5 |FP1 6 |LDST 7 |LDST
\/ \/ \/ \/ \/ \/ \/ \/
+------+ +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
|Branch| | INT | | INT | | INT | | FP ALU | | FP ALU | | AGU | | AGU |
+------+ | ALU | | ALU | | ALU | +--------+ +--------+ +-------+ +-------+
+-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
+-----+ +-----+ | FP MUL | | FP MUL | |LD DATA| |LD DATA|
| ST | | INT | +--------+ +--------+ +-------+ +-------+
| INT | | MUL | +--------+ +---------+
+-----+ +-----+ | FP DIV | |SIMD SHFT|
+-----+ +--------+ +---------+
| INT | +--------+ +--------+
| DIV | | FMA | | FMA |
+-----+ +--------+ +--------+
+-----+ +--------+ +--------+
|SHIFT| | ST SIMD| | ST SIMD|
+-----+ | DATA | | DATA |
+-----+ +--------+ +--------+
| ST |
| INT |
+-----+
instruction_forms:
- name: add
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: add
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: adds
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '132']]
- name: b.ne
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: b.gt
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: bne
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: cmp
operands:
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: cmp
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.3333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: dup
operands:
- class: register
prefix: d
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: d
width: '*'
- class: register
prefix: d
width: '*'
- class: register
prefix: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fdiv
operands:
- class: register
prefix: v
shape: s
width: 128
- class: register
prefix: v
shape: s
width: 128
- class: register
prefix: v
shape: s
width: 128
throughput: 6.0
latency: 8.0 # 1*p4+6*p4DV
port_pressure: [[1, '4'], [6, [4DV]]]
- name: fdiv
operands:
- class: register
prefix: v
shape: d
width: 128
- class: register
prefix: v
shape: d
width: 128
- class: register
prefix: v
shape: d
width: 128
throughput: 10.0
latency: 12.0 # 1*p4+10*p4DV
port_pressure: [[4, '0'], [10, [4DV]]]
- name: fmla
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmla
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmov
operands:
- {class: register, prefix: s}
- {class: immediate, imd: double}
latency: ~ # 1*p45
port_pressure: [[1, '45']]
throughput: 0.5
- name: fmul
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmul
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmul
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: register
prefix: d
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: frecpe
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 2.0
latency: 4.0 # 1*p4
port_pressure: [[2, '4']]
- name: frecpe
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 1.0
latency: 3.0 # 1*p4
port_pressure: [[1, '4']]
- name: fsub
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fsub
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 5.0 # 2*p67, from n1 opt guide
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 5.0 # 2*p67+1*p123, from n1 opt guide
port_pressure: [[2, '67'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67, from n1 opt guide
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: ~
index: ~
scale: 1
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 7.0 # 2*p67+1*p123, from n1 opt guide
port_pressure: [[2, '56'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: true
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67+1*p123
port_pressure: [[2, '67'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 5.0 # 2*p67+1*p123
port_pressure: [[2, '67'], [1, '123']]
- name: ldur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 6.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 6.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
prefix: q
- class: register
prefix: q
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
prefix: d
- class: register
prefix: d
throughput: 0.0
latency: 0.0
port_pressure: []
- name: mov
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.25
latency: 1.0 # 1*p3456
port_pressure: [[1, '3456']]
- name: mov
operands:
- class: register
prefix: v
shape: b
width: '*'
- class: register
prefix: v
shape: b
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: stp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p45+1*p67
port_pressure: [[2, '45'], [1, '67']]
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 2*p45+2*p67+1*123
port_pressure: [[2, '45'], [2, '67'], [1, '123']]
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p45+2*p67
port_pressure: [[2, '45'], [2, '67']]
- name: stur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p23
port_pressure: [[1, '56'], [1, '23']]
- name: stur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p67+1*p45
port_pressure: [[2, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p23
port_pressure: [[1, '56'], [1, '23']]
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p45
port_pressure: [[1, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 0.5
latency: 0 # 1*p67+1*p45+1*p123
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p67+1*p45
port_pressure: [[1, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 1*p67+1*p45+1*123
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 1*p67+1*p23+1*p123
port_pressure: [[1, '67'], [1, '23'], [1, '123']]
- name: sub
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: Thunder X2
arch_code: tx2
isa: AArch64
@@ -80,24 +80,114 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mul
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 1.0
latency: 4.0 # 1*p1
port_pressure: [[1, '1']]
- name: mul
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 1.0
latency: 4.0 # 1*p1
port_pressure: [[1, '1']]
- name: b.ne
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: b.lt
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.hs
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.eq
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.gt
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: bne
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: cmp
operands:
- class: register
@@ -107,6 +197,15 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: cmp
operands:
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: cmp
operands:
- class: register
@@ -126,6 +225,17 @@ instruction_forms:
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: dup
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: fadd
operands:
- class: register
@@ -267,6 +377,34 @@ instruction_forms:
throughput: 0.5
latency: 6.0 # 1*p01
port_pressure: [[1, '01']]
- name: frecpe
operands:
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: frecpe
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: fsub
operands:
- class: register
@@ -295,6 +433,28 @@ instruction_forms:
throughput: 0.5
latency: 6.0 # 1*p01
port_pressure: [[1, '01']]
- name: lsl
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: lsl
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: ldp
operands:
- class: register
@@ -375,6 +535,22 @@ instruction_forms:
throughput: 1.0
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
- class: register
@@ -477,6 +653,15 @@ instruction_forms:
throughput: 0.5
latency: 4.0 # 1*p34
port_pressure: [[1.0, '34']]
- name: ldr
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
@@ -504,15 +689,42 @@ instruction_forms:
throughput: 0.0
latency: 0.0
port_pressure: []
- name: mov
operands:
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.5
latency: 1.0 # 1*p01
port_pressure: [[1, '01']]
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
@@ -540,6 +752,43 @@ instruction_forms:
throughput: ~
latency: ~
port_pressure: []
- name: ret
operands: []
throughput: 0.5
latency: ~ # 1*p34
port_pressure: [[1, '34']]
- name: stp
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p34+1*p5
port_pressure: [[2, '34'], [1, '5']]
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p34+1*p5
port_pressure: [[2, '34'], [1, '5']]
- name: stp
operands:
- class: register
@@ -616,6 +865,20 @@ instruction_forms:
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
- class: register
@@ -700,6 +963,39 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5'], [1, '012']]
- name: subs
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: subs
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
@@ -711,3 +1007,25 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: AMD Zen (family 17h)
arch_code: ZEN1
isa: x86

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: AMD Zen2
arch_code: ZEN2
isa: x86
@@ -725,6 +725,39 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p89+1*p10D
port_pressure: [[1, '89'], [1, [10D]]]
- name: vmovdqu
operands:
- class: memory
base: gpr
offset: "*"
index: ~
scale: 1
- class: register
name: "*"
throughput: 0.5
latency: 4.0 # 1*p8910+1*p8D9D
port_pressure: [[1, ['8','9','10']], [1, [8D,9D]]]
- name: vmovdqu
operands:
- class: memory
base: gpr
offset: "*"
index: gpr
scale: "*"
- class: register
name: "*"
throughput: 0.5
latency: 4.0 # 1*p8910+1*p8D9D
port_pressure: [[1, ['8','9']], [1, [8D,9D]]]
- name: vmovdqu
operands:
- class: register
name: "*"
- class: register
name: "*"
throughput: 0.0
latency: 0.0
port_pressure: []
- name: add
operands:
- class: immediate
@@ -1081,6 +1114,16 @@ instruction_forms:
latency: 3.0 # 1*p01
port_pressure: [[1, '01']]
uops: 1
- name: [shl, shr]
operands:
- class: immediate
imd: int
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
uops: 1
- name: UNPCKHPS # model_importer.py ./model_importer.py
operands: # model_importer.py ./model_importer.py
- class: register # model_importer.py ./model_importer.py

View File

@@ -274,10 +274,19 @@ def _create_db_operand_x86(operand):
def _scrape_from_felixcloutier(mnemonic):
"""Scrape src/dst information from felixcloutier website and return infromation for user."""
from bs4 import BeautifulSoup
"""Scrape src/dst information from felixcloutier website and return information for user."""
import requests
try:
from bs4 import BeautifulSoup
except ImportError:
print(
'Module BeautifulSoup not installed. Fetching instruction form information '
'online requires BeautifulSoup.\nUse \'pip install bs4\' for installation.',
file=sys.stderr,
)
sys.exit(1)
index = 'https://www.felixcloutier.com/x86/index.html'
base_url = 'https://www.felixcloutier.com/x86/'
url = base_url + mnemonic.lower()
@@ -287,12 +296,15 @@ def _scrape_from_felixcloutier(mnemonic):
# GET website
r = requests.get(url=url)
# Parse result
soup = BeautifulSoup(r.text, 'html.parser')
if r.status_code == 200:
# Found result
table = soup.find('h2', attrs={'id': 'instruction-operand-encoding'}).findNextSibling()
operands = _get_src_dst_from_table(table)
operand_enc = BeautifulSoup(r.text, 'html.parser').find(
'h2', attrs={'id': 'instruction-operand-encoding'}
)
if operand_enc:
# operand encoding found, otherwise, no need to mark as suspicous
table = operand_enc.findNextSibling()
operands = _get_src_dst_from_table(table)
elif r.status_code == 404:
# Check for alternative href
index = BeautifulSoup(requests.get(url=index).text, 'html.parser')
@@ -300,12 +312,15 @@ def _scrape_from_felixcloutier(mnemonic):
if len(alternatives) > 0:
# alternative(s) found, take first one
url = base_url + alternatives[0].attrs['href'][2:]
table = (
BeautifulSoup(requests.get(url=url).text, 'html.parser')
.find('h2', attrs={'id': 'instruction-operand-encoding'})
.findNextSibling()
operand_enc = BeautifulSoup(requests.get(url=url).text, 'html.parser').find(
'h2', attrs={'id': 'instruction-operand-encoding'}
)
operands = _get_src_dst_from_table(table)
if operand_enc:
# operand encoding found, otherwise, no need to mark as suspicous
table = (
operand_enc.findNextSibling()
)
operands = _get_src_dst_from_table(table)
if operands:
# Found src/dst assignment for NUM_OPERANDS
if not any(['r' in x and 'w' in x for x in operands]):
@@ -313,9 +328,8 @@ def _scrape_from_felixcloutier(mnemonic):
return (suspicious, ' '.join(operands))
def _get_src_dst_from_table(table):
def _get_src_dst_from_table(table, num_operands=2):
"""Prettify bs4 table object to string for user"""
NUM_OPERANDS = 2
# Parse table
header = [''.join(x.string.lower().split()) for x in table.find('tr').findAll('td')]
data = table.findAll('tr')[1:]
@@ -327,10 +341,10 @@ def _get_src_dst_from_table(table):
data_dict[i][header[j]] = col.string
# Get only the instruction forms with 2 operands
num_ops = [_get_number_of_operands(row) for _, row in data_dict.items()]
if NUM_OPERANDS in num_ops:
row = data_dict[num_ops.index(NUM_OPERANDS)]
if num_operands in num_ops:
row = data_dict[num_ops.index(num_operands)]
reads_writes = []
for i in range(1, NUM_OPERANDS + 1):
for i in range(1, num_operands + 1):
m = re.search(r'(\([^\(\)]+\))', row['operand{}'.format(i)])
if not m:
# no parentheses (probably immediate operand), assume READ
@@ -369,6 +383,7 @@ def _check_sanity_arch_db(arch_mm, isa_mm, internet_check=True):
missing_port_pressure = []
suspicious_instructions = []
duplicate_instr_arch = []
duplicate_strings = []
for instr_form in arch_mm['instruction_forms']:
# check value in DB entry
@@ -388,6 +403,7 @@ def _check_sanity_arch_db(arch_mm, isa_mm, internet_check=True):
# instr forms with less than 3 operands might need an ISA DB entry due to src_reg operands
if (
len(instr_form['operands']) < 3
and len(instr_form['operands']) > 1
and 'mov' not in instr_form['name'].lower()
and not instr_form['name'].lower().startswith('j')
and instr_form not in suspicious_instructions
@@ -406,9 +422,10 @@ def _check_sanity_arch_db(arch_mm, isa_mm, internet_check=True):
duplicate_instr_arch.append(instr_form)
# every entry exists twice --> uniquify
tmp_list = []
for i in range(0, len(duplicate_instr_arch)):
for _ in range(0, len(duplicate_instr_arch)):
tmp = duplicate_instr_arch.pop()
if tmp not in duplicate_instr_arch:
if _get_full_instruction_name(tmp).lower() not in duplicate_strings:
duplicate_strings.append(_get_full_instruction_name(tmp).lower())
tmp_list.append(tmp)
duplicate_instr_arch = tmp_list
return (

View File

@@ -76,7 +76,7 @@ class Frontend(object):
self._get_flag_symbols(instruction_form['flags'])
if instruction_form['instruction'] is not None
else ' ',
instruction_form['line'].strip(),
instruction_form['line'].strip().replace('\t', ' '),
)
line = line if show_lineno else col_sep + col_sep.join(line.split(col_sep)[1:])
if show_cmnts is False and self._is_comment(instruction_form):
@@ -138,13 +138,13 @@ class Frontend(object):
separator,
sum([instr_form['latency_lcd'] for instr_form in dep_dict[dep]['dependencies']]),
separator,
dep_dict[dep]['root']['line'],
dep_dict[dep]['root']['line'].strip(),
separator,
[node['line_number'] for node in dep_dict[dep]['dependencies']],
)
return s
def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, verbose=False):
def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, arch_warning=False, length_warning=False, verbose=False):
"""
Build the full analysis report including header, the symbol map, the combined TP/CP/LCD
view and the list based LCD view.
@@ -156,11 +156,16 @@ class Frontend(object):
:param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
`False`
:type ignore_unknown: boolean, optional
:param print_arch_warning: flag for additional user warning to specify micro-arch
:type print_arch_warning: boolean, optional
:param print_length_warning: flag for additional user warning to specify kernel length with --lines
:type print_length_warning: boolean, optional
:param verbose: flag for verbosity level, defaults to False
:type verbose: boolean, optional
"""
return (
self._header_report()
+ self._user_warnings(arch_warning, length_warning)
+ self._symbol_map()
+ self.combined_view(
kernel,
@@ -246,7 +251,7 @@ class Frontend(object):
self._get_flag_symbols(instruction_form['flags'])
if instruction_form['instruction'] is not None
else ' ',
instruction_form['line'].strip(),
instruction_form['line'].strip().replace('\t', ' '),
)
s += '\n'
# check for unknown instructions and throw warning if called without --ignore-unknown
@@ -279,12 +284,33 @@ class Frontend(object):
'------------------\n'
' No final analysis is given. If you want to ignore this\n'
' warning and run the analysis anyway, start osaca with\n'
' --ignore_unknown flag.\n'
' --ignore-unknown flag.\n'
'--------------------------------------------------------------------------------'
'----------------{}\n'
).format(amount, '-' * len(str(amount)))
return s
def _user_warnings(self, arch_warning, length_warning):
"""Returns warning texts for giving the user more insight in what he is doing."""
arch_text = (
'WARNING: No micro-architecture was specified and a default uarch was used.\n'
' Specify the uarch with --arch. See --help for more information.\n'
)
length_text = (
'WARNING: You are analyzing a large amount of instruction forms. Analysis '
'across loops/block boundaries often do not make much sense.\n'
' Specify the kernel length with --length. See --help for more '
'information.\n'
' If this is intentional, you can safely ignore this message.\n'
)
warnings = ''
warnings += arch_text if arch_warning else ''
warnings += length_text if length_warning else ''
warnings += '\n'
return warnings
def _get_separator_list(self, separator, separator_2=' '):
"""Creates column view for seperators in the TP/combined view."""
separator_list = []
@@ -319,7 +345,12 @@ class Frontend(object):
continue
left_len = len(str(float(ports[i])).split('.')[0])
substr = '{:' + str(left_len) + '.' + str(max(port_len[i] - left_len - 1, 0)) + 'f}'
string_result += substr.format(ports[i]) + ' {} '.format(separator[i])
substr = substr.format(ports[i])
string_result += (
substr + ' {} '.format(separator[i])
if '.' in substr
else '{:.1f}{} '.format(ports[i], separator[i])
)
return string_result[:-1]
def _get_node_by_lineno(self, lineno, kernel):

View File

@@ -5,19 +5,33 @@ import io
import os
import re
import sys
import traceback
from osaca.db_interface import import_benchmark_output, sanity_check
from osaca.frontend import Frontend
from osaca.parser import BaseParser, ParserAArch64v81, ParserX86ATT
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG,
MachineModel, reduce_to_section)
MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
)
LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/')
DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2']
SUPPORTED_ARCHS = [
'SNB',
'IVB',
'HSW',
'BDW',
'SKX',
'CSX',
'ICL',
'ZEN1',
'ZEN2',
'TX2',
'N1',
'A64FX',
]
DEFAULT_ARCHS = {
'aarch64': 'A64FX',
'x86': 'SKX',
}
# Stolen from pip
@@ -71,7 +85,8 @@ def create_parser(parser=None):
parser.add_argument(
'--arch',
type=str,
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2).',
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, N1, '
'A64FX). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.',
)
parser.add_argument(
'--fixed',
@@ -79,6 +94,13 @@ def create_parser(parser=None):
help='Run the throughput analysis with fixed probabilities for all suitable ports per '
'instruction. Otherwise, OSACA will print the optimal port utilization for the kernel.',
)
parser.add_argument(
'--lines',
type=str,
help='Define lines that should be included in the analysis. This option overwrites any'
' range defined by markers in the assembly. Add either single lines or ranges defined by'
' "-" or ":", each entry separated by commas, e.g.: --lines 1,2,8-18,20:24',
)
parser.add_argument(
'--db-check',
dest='check_db',
@@ -128,6 +150,12 @@ def create_parser(parser=None):
parser.add_argument(
'--verbose', '-v', action='count', default=0, help='Increases verbosity level.'
)
parser.add_argument(
'--out', '-o',
default=sys.stdout,
type=argparse.FileType('w'),
help='Write analysis to this file (default to stdout).'
)
parser.add_argument(
'file', type=argparse.FileType('r'), help='Path to object (ASM or instruction file).'
)
@@ -144,7 +172,12 @@ def check_arguments(args, parser):
"""
supported_import_files = ['ibench', 'asmbench']
if 'arch' in args and (args.arch is None or args.arch.upper() not in SUPPORTED_ARCHS):
if args.arch is None and (args.check_db or 'import_data' in args):
parser.error(
'DB check and data import cannot work with a default microarchitecture. '
'Please see --help for all valid architecture codes.'
)
elif args.arch is not None and args.arch.upper() not in SUPPORTED_ARCHS:
parser.error(
'Microarchitecture not supported. Please see --help for all valid architecture codes.'
)
@@ -188,9 +221,9 @@ def insert_byte_marker(args):
from kerncraft.incore_model import asm_instrumentation
except ImportError:
print(
"Module kerncraft not installed. Use 'pip install --user "
"kerncraft' for installation.\nFor more information see "
"https://github.com/RRZE-HPC/kerncraft",
'Module kerncraft not installed. Use \'pip install --user '
'kerncraft\' for installation.\nFor more information see '
'https://github.com/RRZE-HPC/kerncraft',
file=sys.stderr,
)
sys.exit(1)
@@ -221,19 +254,41 @@ def inspect(args, output_file=sys.stdout):
:param output_file: Define the stream for output, defaults to :class:`sys.stdout`
:type output_file: stream, optional
"""
arch = args.arch
# Read file
code = args.file.read()
# Detect ISA if necessary
arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
print_arch_warning = False if args.arch else True
isa = MachineModel.get_isa_for_arch(arch)
verbose = args.verbose
ignore_unknown = args.ignore_unknown
# Read file
code = args.file.read()
# Parse file
parser = get_asm_parser(arch)
parsed_code = parser.parse_file(code)
try:
parsed_code = parser.parse_file(code)
except:
# probably the wrong parser based on heuristic
if args.arch is None:
# change ISA and try again
arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64']
isa = MachineModel.get_isa_for_arch(arch)
parser = get_asm_parser(arch)
parsed_code = parser.parse_file(code)
else:
traceback.print_exc(file=sys.stderr)
sys.exit(1)
# Reduce to marked kernel and add semantics
kernel = reduce_to_section(parsed_code, isa)
# Reduce to marked kernel or chosen section and add semantics
if args.lines:
line_range = get_line_range(args.lines)
kernel = [line for line in parsed_code if line['line_number'] in line_range]
print_length_warning = False
else:
kernel = reduce_to_section(parsed_code, isa)
# Print warning if kernel has no markers and is larger than threshold (100)
print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
machine_model = MachineModel(arch=arch)
semantics = ArchSemantics(machine_model)
semantics.add_semantics(kernel)
@@ -249,7 +304,12 @@ def inspect(args, output_file=sys.stdout):
frontend = Frontend(args.file.name, arch=arch)
print(
frontend.full_analysis(
kernel, kernel_graph, ignore_unknown=ignore_unknown, verbose=verbose
kernel,
kernel_graph,
ignore_unknown=ignore_unknown,
arch_warning=print_arch_warning,
length_warning=print_length_warning,
verbose=verbose
),
file=output_file,
)
@@ -292,7 +352,7 @@ def get_asm_parser(arch) -> BaseParser:
if isa == 'x86':
return ParserX86ATT()
elif isa == 'aarch64':
return ParserAArch64v81()
return ParserAArch64()
def get_unmatched_instruction_ratio(kernel):
@@ -306,13 +366,26 @@ def get_unmatched_instruction_ratio(kernel):
unmatched_counter += 1
return unmatched_counter / len(kernel)
def get_line_range(line_str):
line_str = line_str.replace(':', '-')
lines = line_str.split(',')
lines_int = []
for l in lines:
if '-' in l:
start = int(l.split('-')[0])
end = int(l.split('-')[1])
rnge = list(range(start, end+1))
lines_int += rnge
else:
lines_int.append(int(l))
return lines_int
def main():
"""Initialize and run command line interface."""
parser = create_parser()
args = parser.parse_args()
check_arguments(args, parser)
run(args)
run(args, output_file=args.out)
if __name__ == '__main__':

View File

@@ -6,14 +6,14 @@ Only the parser below will be exported, so please add new parsers to __all__.
from .attr_dict import AttrDict
from .base_parser import BaseParser
from .parser_x86att import ParserX86ATT
from .parser_AArch64v81 import ParserAArch64v81
from .parser_AArch64 import ParserAArch64
__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64v81', 'get_parser']
__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64', 'get_parser']
def get_parser(isa):
if isa.lower() == 'x86':
return ParserX86ATT()
elif isa.lower() == 'aarch64':
return ParserAArch64v81()
return ParserAArch64()
else:
raise ValueError("Unknown ISA {!r}.".format(isa))

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""Parser superclass of specific parsers."""
import operator
import re
class BaseParser(object):
# Identifiers for operand types
@@ -8,14 +9,35 @@ class BaseParser(object):
DIRECTIVE_ID = 'directive'
IMMEDIATE_ID = 'immediate'
LABEL_ID = 'label'
IDENTIFIER_ID = 'identifier'
MEMORY_ID = 'memory'
REGISTER_ID = 'register'
SEGMENT_EXT_ID = 'segment_extension'
INSTRUCTION_ID = 'instruction'
OPERANDS_ID = 'operands'
_parser_constructed = False
def __init__(self):
self.construct_parser()
if not self._parser_constructed:
self.construct_parser()
self._parser_constructed = True
@staticmethod
def detect_ISA(file_content):
"""Detect the ISA of the assembly based on the used registers and return the ISA code."""
# Check for the amount of registers in the code to determine the ISA
# 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86
heuristics_x86ATT = [r'%[xyz]mm[0-9]', r'%[er][abcd]x[0-9]']
# 2) check for v and z vector registers and x/w general-purpose registers
heuristics_aarch64 = [r'[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]', r'[wx][0-9]']
matches = {'x86': 0, 'aarch64': 0}
for h in heuristics_x86ATT:
matches['x86'] += len(re.findall(h, file_content))
for h in heuristics_aarch64:
matches['aarch64'] += len(re.findall(h, file_content))
return max(matches.items(), key=operator.itemgetter(1))[0]
def parse_file(self, file_content, start_line=0):
"""

View File

@@ -6,7 +6,15 @@ import pyparsing as pp
from osaca.parser import AttrDict, BaseParser
class ParserAArch64v81(BaseParser):
class ParserAArch64(BaseParser):
_instance = None
# Singelton pattern, as this is created very many times
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserAArch64, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
self.isa = 'aarch64'
@@ -19,22 +27,23 @@ class ParserAArch64v81(BaseParser):
pp.ZeroOrMore(pp.Word(pp.printables))
).setResultsName(self.COMMENT_ID)
# Define ARM assembly identifier
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
relocation = pp.Combine(pp.Literal(':') + pp.Word(pp.alphanums + '_') + pp.Literal(':'))
first = pp.Word(pp.alphas + '_.', exact=1)
rest = pp.Word(pp.alphanums + '_.')
identifier = pp.Group(
pp.Optional(relocation).setResultsName('relocation')
+ pp.Combine(first + pp.Optional(rest)).setResultsName('name')
).setResultsName('identifier')
+ pp.Optional(pp.Suppress(pp.Literal('+')) + (hex_number | decimal_number).setResultsName('offset'))
).setResultsName(self.IDENTIFIER_ID)
# Label
self.label = pp.Group(
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
# Directive
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
directive_option = pp.Combine(
pp.Word(pp.alphas + '#@.%', exact=1)
+ pp.Optional(pp.Word(pp.printables + ' ', excludeChars=','))
@@ -46,7 +55,7 @@ class ParserAArch64v81(BaseParser):
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
+ commaSeparatedList.setResultsName('parameters')
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName('parameters')
+ pp.Optional(self.comment)
).setResultsName(self.DIRECTIVE_ID)
# LLVM-MCA markers
@@ -91,31 +100,49 @@ class ParserAArch64v81(BaseParser):
^ pp.CaselessLiteral('ror')
^ pp.CaselessLiteral('sxtw')
^ pp.CaselessLiteral('uxtw')
^ pp.CaselessLiteral('mul vl')
)
arith_immediate = pp.Group(
immediate.setResultsName('base_immediate')
+ pp.Suppress(pp.Literal(','))
+ shift_op.setResultsName('shift_op')
+ immediate.setResultsName('shift')
+ pp.Optional(immediate).setResultsName('shift')
).setResultsName(self.IMMEDIATE_ID)
# Register:
# scalar: [XWBHSDQ][0-9]{1,2} | vector: V[0-9]{1,2}\.[12468]{1,2}[BHSD]()?
# define SP and ZR register aliases as regex, due to pyparsing does not support
# scalar: [XWBHSDQ][0-9]{1,2} | vector: [VZ][0-9]{1,2}(\.[12468]{1,2}[BHSD])?
# | predicate: P[0-9]{1,2}(/[ZM])?
# ignore vector len control ZCR_EL[123] for now
# define SP, ZR register aliases as regex, due to pyparsing does not support
# proper lookahead
alias_r31_sp = pp.Regex('(?P<prefix>[a-zA-Z])?(?P<name>(sp|SP))')
alias_r31_zr = pp.Regex('(?P<prefix>[a-zA-Z])?(?P<name>(zr|ZR))')
scalar = pp.Word(pp.alphas, exact=1).setResultsName('prefix') + pp.Word(
scalar = pp.Word('xwbhsdqXWBHSDQ', exact=1).setResultsName('prefix') + pp.Word(
pp.nums
).setResultsName('name')
index = pp.Literal('[') + pp.Word(pp.nums).setResultsName('index') + pp.Literal(']')
vector = (
pp.CaselessLiteral('v').setResultsName('prefix')
pp.oneOf('v z', caseless=True).setResultsName('prefix')
+ pp.Word(pp.nums).setResultsName('name')
+ pp.Literal('.')
+ pp.Optional(pp.Word('12468')).setResultsName('lanes')
+ pp.Word(pp.alphas, exact=1).setResultsName('shape')
+ pp.Optional(index)
)
predicate = (
pp.CaselessLiteral('p').setResultsName('prefix')
+ pp.Word(pp.nums).setResultsName('name')
+ pp.Optional(
(
pp.Suppress(pp.Literal('/'))
+ pp.oneOf('z m', caseless=True).setResultsName('predication')
)
| (
pp.Literal('.')
+ pp.Optional(pp.Word('12468')).setResultsName('lanes')
+ pp.Word(pp.alphas, exact=1).setResultsName('shape')
)
)
)
self.list_element = vector ^ scalar
register_list = (
pp.Literal('{')
@@ -129,7 +156,8 @@ class ParserAArch64v81(BaseParser):
+ pp.Optional(index)
)
register = pp.Group(
(alias_r31_sp | alias_r31_zr | vector | scalar | register_list)
(alias_r31_sp | alias_r31_zr | vector | scalar | predicate | register_list)
#(alias_r31_sp | alias_r31_zr | vector | scalar | predicate | register_list)
+ pp.Optional(
pp.Suppress(pp.Literal(','))
+ shift_op.setResultsName('shift_op')
@@ -144,7 +172,7 @@ class ParserAArch64v81(BaseParser):
pp.Literal('[')
+ pp.Optional(register.setResultsName('base'))
+ pp.Optional(pp.Suppress(pp.Literal(',')))
+ pp.Optional(register_index ^ immediate.setResultsName('offset'))
+ pp.Optional(register_index ^ (immediate ^ arith_immediate).setResultsName('offset'))
+ pp.Literal(']')
+ pp.Optional(
pp.Literal('!').setResultsName('pre_indexed')
@@ -177,6 +205,11 @@ class ParserAArch64v81(BaseParser):
+ pp.Optional(self.comment)
)
# for testing
self.predicate = predicate
self.vector = vector
self.register = register
def parse_line(self, line, line_number=None):
"""
Parse line and return instruction form.
@@ -193,7 +226,7 @@ class ParserAArch64v81(BaseParser):
self.DIRECTIVE_ID: None,
self.COMMENT_ID: None,
self.LABEL_ID: None,
'line': line.strip(),
'line': line,
'line_number': line_number,
}
)
@@ -317,14 +350,18 @@ class ParserAArch64v81(BaseParser):
return self.process_immediate(operand[self.IMMEDIATE_ID])
if self.LABEL_ID in operand:
return self.process_label(operand[self.LABEL_ID])
if self.IDENTIFIER_ID in operand:
return self.process_identifier(operand[self.IDENTIFIER_ID])
return operand
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
# Remove unnecessarily created dictionary entries during parsing
offset = None if 'offset' not in memory_address else memory_address['offset']
base = None if 'base' not in memory_address else memory_address['base']
index = None if 'index' not in memory_address else memory_address['index']
offset = memory_address.get('offset', None)
if isinstance(offset, list) and len(offset) == 1:
offset = offset[0]
base = memory_address.get('base', None)
index = memory_address.get('index', None)
scale = 1
if base is not None and 'name' in base and base['name'] == 'sp':
base['prefix'] = 'x'
@@ -351,18 +388,20 @@ class ParserAArch64v81(BaseParser):
def process_register_list(self, register_list):
"""Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})"""
# Remove unnecessarily created dictionary entries during parsing
vlist = []
rlist = []
dict_name = ''
if 'list' in register_list:
dict_name = 'list'
if 'range' in register_list:
dict_name = 'range'
for v in register_list[dict_name]:
vlist.append(
AttrDict.convert_dict(self.list_element.parseString(v, parseAll=True).asDict())
for r in register_list[dict_name]:
rlist.append(
AttrDict.convert_dict(self.list_element.parseString(r, parseAll=True).asDict())
)
index = None if 'index' not in register_list else register_list['index']
new_dict = AttrDict({dict_name: vlist, 'index': index})
index = register_list.get('index', None)
new_dict = AttrDict({dict_name: rlist, 'index': index})
if len(new_dict[dict_name]) == 1:
return AttrDict({self.REGISTER_ID: new_dict[dict_name][0]})
return AttrDict({self.REGISTER_ID: new_dict})
def process_immediate(self, immediate):
@@ -375,7 +414,9 @@ class ParserAArch64v81(BaseParser):
# normal integer value, nothing to do
return AttrDict({self.IMMEDIATE_ID: immediate})
if 'base_immediate' in immediate:
# arithmetic immediate, nothing to do
# arithmetic immediate, add calculated value as value
immediate['shift'] = immediate['shift'][0]
immediate['value'] = int(immediate['base_immediate']['value']) << int(immediate['shift']['value'])
return AttrDict({self.IMMEDIATE_ID: immediate})
if 'float' in immediate:
dict_name = 'float'
@@ -396,6 +437,13 @@ class ParserAArch64v81(BaseParser):
label['name'] = label['name']['name']
return AttrDict({self.LABEL_ID: label})
def process_identifier(self, identifier):
"""Post-process identifier operand"""
# remove value if it consists of symbol+offset
if 'value' in identifier:
del identifier['value']
return AttrDict({self.IDENTIFIER_ID: identifier})
def get_full_reg_name(self, register):
"""Return one register name string including all attributes"""
if 'lanes' in register:
@@ -440,7 +488,7 @@ class ParserAArch64v81(BaseParser):
def is_vector_register(self, register):
"""Check if register is a vector register"""
if register['prefix'] in 'bhsdqv':
if register['prefix'] in 'bhsdqvz':
return True
return False
@@ -455,7 +503,7 @@ class ParserAArch64v81(BaseParser):
def is_reg_dependend_of(self, reg_a, reg_b):
"""Check if ``reg_a`` is dependent on ``reg_b``"""
prefixes_gpr = 'wx'
prefixes_vec = 'bhsdqv'
prefixes_vec = 'bhsdqvz'
if reg_a['name'] == reg_b['name']:
if reg_a['prefix'].lower() in prefixes_gpr and reg_b['prefix'].lower() in prefixes_gpr:
return True

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import string
import re
import pyparsing as pp
@@ -8,6 +9,14 @@ from osaca.parser import AttrDict, BaseParser
class ParserX86ATT(BaseParser):
_instance = None
# Singelton pattern, as this is created very many times
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserX86ATT, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
self.isa = 'x86'
@@ -33,8 +42,20 @@ class ParserX86ATT(BaseParser):
+ pp.Optional(relocation).setResultsName('relocation')
).setResultsName('identifier')
# Label
rest = pp.Word(pp.alphanums + '$_.+-()')
label_identifier = pp.Group(
pp.Optional(id_offset).setResultsName('offset')
+ pp.Combine(first + pp.Optional(rest)).setResultsName('name')
+ pp.Optional(relocation).setResultsName('relocation')
).setResultsName('identifier')
numeric_identifier = pp.Group(
pp.Word(pp.nums).setResultsName('name')
+ pp.Optional(pp.oneOf('b f', caseless=True).setResultsName('suffix'))
).setResultsName('identifier')
self.label = pp.Group(
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
(label_identifier | numeric_identifier).setResultsName('name')
+ pp.Literal(':')
+ pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
# Register: pp.Regex('^%[0-9a-zA-Z]+{}{z},?')
self.register = pp.Group(
@@ -43,7 +64,7 @@ class ParserX86ATT(BaseParser):
+ pp.Optional(pp.Literal('(') + pp.Word(pp.nums) + pp.Literal(')'))
+ pp.Optional(
pp.Literal('{')
+ pp.Literal('%')
+ pp.Optional(pp.Suppress(pp.Literal('%')))
+ pp.Word(pp.alphanums).setResultsName('mask')
+ pp.Literal('}')
+ pp.Optional(
@@ -98,7 +119,7 @@ class ParserX86ATT(BaseParser):
+ pp.Literal(')')
+ pp.Optional(
pp.Literal('{')
+ pp.Literal('%')
+ pp.Optional(pp.Suppress(pp.Literal('%')))
+ pp.Word(pp.alphanums).setResultsName('mask')
+ pp.Literal('}')
)
@@ -108,23 +129,20 @@ class ParserX86ATT(BaseParser):
).setResultsName(self.MEMORY_ID)
# Directive
directive_option = pp.Combine(
pp.Word('#@.', exact=1) + pp.Word(pp.printables, excludeChars=',')
)
# parameter can be any quoted string or sequence of characters besides '#' (for comments)
# or ',' (parameter delimiter)
directive_parameter = (
pp.quotedString
^ directive_option
^ identifier
^ hex_number
^ decimal_number
^ self.register
^ pp.Group(pp.Word(pp.alphanums + '_').setResultsName('name'))
^ (
pp.Word(pp.printables, excludeChars=',#')
+ pp.Optional(pp.Suppress(pp.Literal(',')))
)
^ pp.Suppress(pp.Literal(','))
)
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',')
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
+ commaSeparatedList.setResultsName('parameters')
+ pp.ZeroOrMore(directive_parameter).setResultsName('parameters')
+ pp.Optional(self.comment)
).setResultsName(self.DIRECTIVE_ID)
@@ -134,7 +152,9 @@ class ParserX86ATT(BaseParser):
pp.alphanums
).setResultsName('mnemonic')
# Combine to instruction form
operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier)
operand_first = pp.Group(
self.register ^ immediate ^ memory ^ identifier ^ numeric_identifier
)
operand_rest = pp.Group(self.register ^ immediate ^ memory)
self.instruction_parser = (
mnemonic
@@ -173,7 +193,7 @@ class ParserX86ATT(BaseParser):
self.DIRECTIVE_ID: None,
self.COMMENT_ID: None,
self.LABEL_ID: None,
'line': line.strip(),
'line': line,
'line_number': line_number,
}
)
@@ -277,14 +297,24 @@ class ParserX86ATT(BaseParser):
return self.process_immediate(operand[self.IMMEDIATE_ID])
if self.LABEL_ID in operand:
return self.process_label(operand[self.LABEL_ID])
if self.DIRECTIVE_ID in operand:
return self.process_directive(operand[self.DIRECTIVE_ID])
return operand
def process_directive(self, directive):
directive_new = {'name': directive['name'], 'parameters': []}
if 'parameters' in directive:
directive_new['parameters'] = directive['parameters']
if 'comment' in directive:
directive_new['comment'] = directive['comment']
return AttrDict({self.DIRECTIVE_ID: directive_new})
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
# Remove unecessarily created dictionary entries during memory address parsing
offset = None if 'offset' not in memory_address else memory_address['offset']
base = None if 'base' not in memory_address else memory_address['base']
index = None if 'index' not in memory_address else memory_address['index']
offset = memory_address.get('offset', None)
base = memory_address.get('base', None)
index = memory_address.get('index', None)
scale = 1 if 'scale' not in memory_address else int(memory_address['scale'])
if isinstance(offset, str) and base is None and index is None:
offset = {'value': offset}
@@ -297,7 +327,7 @@ class ParserX86ATT(BaseParser):
def process_label(self, label):
"""Post-process label asm line"""
# remove duplicated 'name' level due to identifier
label['name'] = label['name']['name']
label['name'] = label['name'][0]['name']
return AttrDict({self.LABEL_ID: label})
def process_immediate(self, immediate):
@@ -333,45 +363,44 @@ class ParserX86ATT(BaseParser):
def is_reg_dependend_of(self, reg_a, reg_b):
"""Check if ``reg_a`` is dependent on ``reg_b``"""
# Normalize name
reg_a_name = reg_a['name'].upper()
reg_b_name = reg_b['name'].upper()
# Check if they are the same registers
if reg_a.name == reg_b.name:
if reg_a_name == reg_b_name:
return True
# Check vector registers first
if self.is_vector_register(reg_a):
if self.is_vector_register(reg_b):
if reg_a.name[1:] == reg_b.name[1:]:
if reg_a_name[1:] == reg_b_name[1:]:
# Registers in the same vector space
return True
return False
# Check basic GPRs
a_dep = ['RAX', 'EAX', 'AX', 'AH', 'AL']
b_dep = ['RBX', 'EBX', 'BX', 'BH', 'BL']
c_dep = ['RCX', 'ECX', 'CX', 'CH', 'CL']
d_dep = ['RDX', 'EDX', 'DX', 'DH', 'DL']
sp_dep = ['RSP', 'ESP', 'SP', 'SPL']
src_dep = ['RSI', 'ESI', 'SI', 'SIL']
dst_dep = ['RDI', 'EDI', 'DI', 'DIL']
basic_gprs = [a_dep, b_dep, c_dep, d_dep, sp_dep, src_dep, dst_dep]
gpr_groups = {
'A': ['RAX', 'EAX', 'AX', 'AH', 'AL'],
'B': ['RBX', 'EBX', 'BX', 'BH', 'BL'],
'C': ['RCX', 'ECX', 'CX', 'CH', 'CL'],
'D': ['RDX', 'EDX', 'DX', 'DH', 'DL'],
'SP': ['RSP', 'ESP', 'SP', 'SPL'],
'SRC': ['RSI', 'ESI', 'SI', 'SIL'],
'DST': ['RDI', 'EDI', 'DI', 'DIL']
}
if self.is_basic_gpr(reg_a):
if self.is_basic_gpr(reg_b):
for dep_group in basic_gprs:
if reg_a['name'].upper() in dep_group:
if reg_b['name'].upper() in dep_group:
for dep_group in gpr_groups.values():
if reg_a_name in dep_group:
if reg_b_name in dep_group:
return True
return False
# Check other GPRs
gpr_parser = (
pp.CaselessLiteral('R')
+ pp.Word(pp.nums).setResultsName('id')
+ pp.Optional(pp.Word('dwbDWB', exact=1))
)
try:
id_a = gpr_parser.parseString(reg_a['name'], parseAll=True).asDict()['id']
id_b = gpr_parser.parseString(reg_b['name'], parseAll=True).asDict()['id']
if id_a == id_b:
return True
except pp.ParseException:
return False
ma = re.match(r'R([0-9]+)[DWB]?', reg_a_name)
mb = re.match(r'R([0-9]+)[DWB]?', reg_b_name)
if ma and mb and ma.group(1) == mb.group(1):
return True
# No dependencies
return False
@@ -385,19 +414,11 @@ class ParserX86ATT(BaseParser):
"""Check if register is a general purpose register"""
if register is None:
return False
gpr_parser = (
pp.CaselessLiteral('R')
+ pp.Word(pp.nums).setResultsName('id')
+ pp.Optional(pp.Word('dwbDWB', exact=1))
)
if self.is_basic_gpr(register):
return True
else:
try:
gpr_parser.parseString(register['name'], parseAll=True)
return True
except pp.ParseException:
return False
return re.match(r'R([0-9]+)[DWB]?', register['name'], re.IGNORECASE)
def is_vector_register(self, register):
"""Check if register is a vector register"""

View File

@@ -53,9 +53,18 @@ class ArchSemantics(ISASemantics):
)
if len(set(port_sums)) > 1:
# balance ports
for _ in range(cycles * 100):
instr_ports[port_sums.index(max(port_sums))] -= INC
instr_ports[port_sums.index(min(port_sums))] += INC
# init list for keeping track of the current change
differences = [cycles / len(ports) for p in ports]
for _ in range(int(cycles * (1 / INC))):
if len(instr_ports) == 1:
# no balancing possible anymore
break
max_port_idx = port_sums.index(max(port_sums))
min_port_idx = port_sums.index(min(port_sums))
instr_ports[max_port_idx] -= INC
instr_ports[min_port_idx] += INC
differences[max_port_idx] -= INC
differences[min_port_idx] += INC
# instr_ports = [round(p, 2) for p in instr_ports]
self._itemsetter(*indices)(instruction_form['port_pressure'], *instr_ports)
# check if min port is zero
@@ -63,7 +72,12 @@ class ArchSemantics(ISASemantics):
# if port_pressure is not exactly 0.00, add the residual to
# the former port
if min(instr_ports) != 0.0:
instr_ports[port_sums.index(min(port_sums))] += min(instr_ports)
min_port_idx = port_sums.index(min(port_sums))
instr_ports[min_port_idx] += min(instr_ports)
differences[min_port_idx] += min(instr_ports)
# we don't need to decrease difference for other port, just
# delete it
del differences[instr_ports.index(min(instr_ports))]
self._itemsetter(*indices)(
instruction_form['port_pressure'], *instr_ports
)
@@ -80,6 +94,17 @@ class ArchSemantics(ISASemantics):
instr_ports = self._to_list(
itemgetter(*indices)(instruction_form['port_pressure'])
)
# never remove more than the fixed utilization per uop and port, i.e.,
# cycles/len(ports)
if round(min(differences), 2) <= 0:
# don't worry if port_pressure isn't exactly 0 and just
# remove from further balancing by deleting index since
# pressure is not 0
del indices[differences.index(min(differences))]
instr_ports = self._to_list(
itemgetter(*indices)(instruction_form['port_pressure'])
)
del differences[differences.index(min(differences))]
port_sums = self._to_list(
itemgetter(*indices)(self.get_throughput_sum(kernel))
)
@@ -373,9 +398,7 @@ class ArchSemantics(ISASemantics):
def g(obj, value):
obj[item] = value
else:
def g(obj, *values):
for item, value in zip(items, values):
obj[item] = value
@@ -391,9 +414,11 @@ class ArchSemantics(ISASemantics):
@staticmethod
def get_throughput_sum(kernel):
"""Get the overall throughput sum separated by port of all instructions of a kernel."""
tp_sum = reduce(
(lambda x, y: [sum(z) for z in zip(x, y)]),
[instr['port_pressure'] for instr in kernel],
)
tp_sum = [round(x, 2) for x in tp_sum]
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up
# typically comment, label and non-instruction lines
port_pressures = [instr['port_pressure'] for instr in kernel if instr['throughput'] != 0.0]
# Essentially summing up each columns of port_pressures, where each column is one port
# and each row is one line of the kernel
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
tp_sum = [round(sum(col), 2) for col in zip(*port_pressures)]
return tp_sum

View File

@@ -1,12 +1,14 @@
#!/usr/bin/env python3
import base64
import os
import pickle
import re
import string
from copy import deepcopy
from itertools import product
import hashlib
from pathlib import Path
from collections import defaultdict
import ruamel.yaml
from ruamel.yaml.compat import StringIO
@@ -17,6 +19,7 @@ from osaca.parser import ParserX86ATT
class MachineModel(object):
WILDCARD = '*'
INTERNAL_VERSION = 1 # increase whenever self._data format changes to invalidate cache!
def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
if not arch and not path_to_yaml:
@@ -39,7 +42,7 @@ class MachineModel(object):
'load_throughput_default': [],
'ports': [],
'port_model_scheme': None,
'instruction_forms': [],
'instruction_forms': []
}
else:
if arch and path_to_yaml:
@@ -49,7 +52,7 @@ class MachineModel(object):
yaml = self._create_yaml_object()
if arch:
self._arch = arch.lower()
self._path = utils.find_file(self._arch + '.yml')
self._path = utils.find_datafile(self._arch + '.yml')
# check if file is cached
cached = self._get_cached(self._path) if not lazy else False
if cached:
@@ -59,8 +62,6 @@ class MachineModel(object):
with open(self._path, 'r') as f:
if not lazy:
self._data = yaml.load(f)
# cache file for next call
self._write_in_cache(self._path, self._data)
else:
file_content = ''
line = f.readline()
@@ -69,21 +70,26 @@ class MachineModel(object):
line = f.readline()
self._data = yaml.load(file_content)
self._data['instruction_forms'] = []
# separate multi-alias instruction forms
for entry in [
x for x in self._data['instruction_forms'] if isinstance(x['name'], list)
]:
for name in entry['name']:
new_entry = {'name': name}
for k in [x for x in entry.keys() if x != 'name']:
new_entry[k] = entry[k]
self._data['instruction_forms'].append(new_entry)
# remove old entry
self._data['instruction_forms'].remove(entry)
# For use with dict instead of list as DB
# self._data['instruction_dict'] = (
# self._convert_to_dict(self._data['instruction_forms'])
# )
# separate multi-alias instruction forms
for entry in [x for x in self._data['instruction_forms']
if isinstance(x['name'], list)]:
for name in entry['name']:
new_entry = {'name': name}
for k in [x for x in entry.keys() if x != 'name']:
new_entry[k] = entry[k]
self._data['instruction_forms'].append(new_entry)
# remove old entry
self._data['instruction_forms'].remove(entry)
# Normalize instruction_form names (to UPPERCASE) and build dict for faster access:
self._data['instruction_forms_dict'] = defaultdict(list)
for iform in self._data['instruction_forms']:
iform['name'] = iform['name'].upper()
self._data['instruction_forms_dict'][iform['name']].append(iform)
self._data['internal_version'] = self.INTERNAL_VERSION
if not lazy:
# cache internal representation for future use
self._write_in_cache(self._path)
def __getitem__(self, key):
"""Return configuration entry."""
@@ -98,36 +104,21 @@ class MachineModel(object):
def get_instruction(self, name, operands):
"""Find and return instruction data from name and operands."""
# For use with dict instead of list as DB
# return self.get_instruction_from_dict(name, operands)
if name is None:
return None
name_matched_iforms = self._data['instruction_forms_dict'].get(name.upper(), [])
try:
return next(
instruction_form
for instruction_form in self._data['instruction_forms']
if instruction_form['name'].upper() == name.upper()
and self._match_operands(
for instruction_form in name_matched_iforms if self._match_operands(
instruction_form['operands'] if 'operands' in instruction_form else [],
operands,
)
)
operands))
except StopIteration:
return None
except TypeError as e:
print('\nname: {}\noperands: {}'.format(name, operands))
raise TypeError from e
def get_instruction_from_dict(self, name, operands):
"""Find and return instruction data from name and operands stored in dictionary."""
if name is None:
return None
try:
# Check if key is in dict
instruction_form = self._data['instruction_dict'][self._get_key(name, operands)]
return instruction_form
except KeyError:
return None
def average_port_pressure(self, port_pressure):
"""Construct average port pressure list from instruction data."""
port_list = self._data['ports']
@@ -234,13 +225,15 @@ class MachineModel(object):
for y in list(filter(lambda x: True if x != 'class' else False, op))
]
operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
return '{} {}'.format(instruction_form['name'], ','.join(operands))
return '{} {}'.format(instruction_form['name'].lower(), ','.join(operands))
@staticmethod
def get_isa_for_arch(arch):
"""Return ISA for given micro-arch ``arch``."""
arch_dict = {
'a64fx': 'aarch64',
'tx2': 'aarch64',
'n1': 'aarch64',
'zen1': 'x86',
'zen+': 'x86',
'zen2': 'x86',
@@ -292,7 +285,8 @@ class MachineModel(object):
{
k: v
for k, v in self._data.items()
if k not in ['instruction_forms', 'load_throughput']
if k not in ['instruction_forms', 'instruction_forms_dict', 'load_throughput',
'internal_version']
},
stream,
)
@@ -312,35 +306,54 @@ class MachineModel(object):
:type filepath: str
:returns: cached DB if existing, `False` otherwise
"""
hashname = self._get_hashname(filepath)
cachepath = utils.exists_cached_file(hashname + '.pickle')
if cachepath:
# Check if modification date of DB is older than cached version
if os.path.getmtime(filepath) < os.path.getmtime(cachepath):
# load cached version
cached_db = pickle.load(open(cachepath, 'rb'))
return cached_db
else:
# DB newer than cached version --> delete cached file and return False
os.remove(cachepath)
p = Path(filepath)
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name('.' + p.stem + '_' + hexhash).with_suffix('.pickle')
if companion_cachefile.exists():
# companion file (must be up-to-date, due to equal hash)
with companion_cachefile.open('rb') as f:
data = pickle.load(f)
if data.get('internal_version') == self.INTERNAL_VERSION:
return data
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + '_' + hexhash)).with_suffix('.pickle')
if home_cachefile.exists():
# home file (must be up-to-date, due to equal hash)
with home_cachefile.open('rb') as f:
data = pickle.load(f)
if data.get('internal_version') == self.INTERNAL_VERSION:
return data
return False
def _write_in_cache(self, filepath, data):
def _write_in_cache(self, filepath):
"""
Write machine model to cache
:param filepath: path to store DB
:type filepath: str
:param data: :class:`MachineModel` to store
:type data: :class:`dict`
"""
hashname = self._get_hashname(filepath)
filepath = os.path.join(utils.CACHE_DIR, hashname + '.pickle')
pickle.dump(data, open(filepath, 'wb'))
p = Path(filepath)
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name('.' + p.stem + '_' + hexhash).with_suffix('.pickle')
if os.access(str(companion_cachefile.parent), os.W_OK):
with companion_cachefile.open('wb') as f:
pickle.dump(self._data, f)
return
def _get_hashname(self, name):
"""Returns unique hashname for machine model"""
return base64.b64encode(name.encode()).decode()
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
cache_dir = Path(utils.CACHE_DIR)
try:
os.makedirs(cache_dir, exist_ok=True)
except OSError:
return
home_cachefile = (cache_dir / (p.stem + '_' + hexhash)).with_suffix('.pickle')
if os.access(str(home_cachefile.parent), os.W_OK):
with home_cachefile.open('wb') as f:
pickle.dump(self._data, f)
def _get_key(self, name, operands):
"""Get unique instruction form key for dict DB."""
@@ -350,18 +363,6 @@ class MachineModel(object):
key_string += '_'.join([self._get_operand_hash(op) for op in operands])
return key_string
def _convert_to_dict(self, instruction_forms):
"""Convert list DB to dict DB"""
instruction_dict = {}
for instruction_form in instruction_forms:
instruction_dict[
self._get_key(
instruction_form['name'],
instruction_form['operands'] if 'operands' in instruction_form else None,
)
] = instruction_form
return instruction_dict
def _get_operand_hash(self, operand):
"""Get unique key for operand for dict DB"""
operand_string = ''
@@ -396,7 +397,7 @@ class MachineModel(object):
operand_string += 'p' if operand['post-indexed'] else ''
return operand_string
def _create_db_operand_aarch64(operand):
def _create_db_operand_aarch64(self, operand):
"""Create instruction form operand for DB out of operand string."""
if operand == 'i':
return {'class': 'immediate', 'imd': 'int'}
@@ -417,7 +418,7 @@ class MachineModel(object):
else:
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
def _create_db_operand_x86(operand):
def _create_db_operand_x86(self, operand):
"""Create instruction form operand for DB out of operand string."""
if operand == 'r':
return {'class': 'register', 'name': 'gpr'}
@@ -490,6 +491,7 @@ class MachineModel(object):
if 'class' in operand:
# compare two DB entries
return self._compare_db_entries(i_operand, operand)
# TODO support class wildcards
# register
if 'register' in operand:
if i_operand['class'] != 'register':
@@ -501,12 +503,14 @@ class MachineModel(object):
return False
return self._is_AArch64_mem_type(i_operand, operand['memory'])
# immediate
# TODO support wildcards
if 'value' in operand or ('immediate' in operand and 'value' in operand['immediate']):
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'int'
if 'float' in operand or ('immediate' in operand and 'float' in operand['immediate']):
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'float'
if 'double' in operand or ('immediate' in operand and 'double' in operand['immediate']):
return i_operand['class'] == 'immediate' and i_operand['imd'] == 'double'
# identifier
if 'identifier' in operand or (
'immediate' in operand and 'identifier' in operand['immediate']
):
@@ -526,7 +530,7 @@ class MachineModel(object):
if 'register' in operand:
if i_operand['class'] != 'register':
return False
return self._is_x86_reg_type(i_operand['name'], operand['register'])
return self._is_x86_reg_type(i_operand, operand['register'], consider_masking=True)
# memory
if 'memory' in operand:
if i_operand['class'] != 'memory':
@@ -546,7 +550,9 @@ class MachineModel(object):
)
for key in operand_attributes:
try:
if operand_1[key] != operand_2[key] and not any([x == self.WILDCARD for x in [operand_1[key], operand_2[key]]]):
if operand_1[key] != operand_2[key] and not any(
[x == self.WILDCARD for x in [operand_1[key], operand_2[key]]]
):
return False
except KeyError:
return False
@@ -573,8 +579,13 @@ class MachineModel(object):
return False
return True
def _is_x86_reg_type(self, i_reg_name, reg):
def _is_x86_reg_type(self, i_reg, reg, consider_masking=False):
"""Check if register type match."""
i_reg_name = i_reg['name'] if i_reg and 'name' in i_reg else i_reg
if reg is None:
if i_reg is None:
return True
return False
# check for wildcards
if i_reg_name == self.WILDCARD or reg['name'] == self.WILDCARD:
return True
@@ -582,6 +593,33 @@ class MachineModel(object):
parser_x86 = ParserX86ATT()
if parser_x86.is_vector_register(reg):
if reg['name'].rstrip(string.digits).lower() == i_reg_name:
# Consider masking and zeroing for AVX512
if consider_masking:
mask_ok = zero_ok = True
if 'mask' in reg or 'mask' in i_reg:
# one instruction is missing the masking while the other has it
mask_ok = False
# check for wildcard
if (
(
'mask' in reg
and reg['mask'].rstrip(string.digits).lower() == i_reg.get('mask')
)
or reg.get('mask') == self.WILDCARD
or i_reg.get('mask') == self.WILDCARD
):
mask_ok = True
if bool('zeroing' in reg) ^ bool('zeroing' in i_reg):
# one instruction is missing zeroing while the other has it
zero_ok = False
# check for wildcard
if (
i_reg.get('zeroing') == self.WILDCARD
or reg.get('zeroing') == self.WILDCARD
):
zero_ok = True
if not mask_ok or not zero_ok:
return False
return True
else:
if i_reg_name == 'gpr':

View File

@@ -2,7 +2,7 @@
from itertools import chain
from osaca import utils
from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from .hw_model import MachineModel
@@ -26,12 +26,12 @@ class ISASemantics(object):
def __init__(self, isa, path_to_yaml=None):
self._isa = isa.lower()
path = utils.find_file('isa/' + self._isa + '.yml') if not path_to_yaml else path_to_yaml
path = path_to_yaml or utils.find_datafile('isa/' + self._isa + '.yml')
self._isa_model = MachineModel(path_to_yaml=path)
if self._isa == 'x86':
self._parser = ParserX86ATT()
elif self._isa == 'aarch64':
self._parser = ParserAArch64v81()
self._parser = ParserAArch64()
def process(self, instruction_forms):
"""Process a list of instruction forms."""
@@ -52,7 +52,6 @@ class ISASemantics(object):
return
# check if instruction form is in ISA yaml, otherwise apply standard operand assignment
# (one dest, others source)
# import pdb; pdb.set_trace()
isa_data = self._isa_model.get_instruction(
instruction_form['instruction'], instruction_form['operands']
)
@@ -103,14 +102,14 @@ class ISASemantics(object):
if ('post_indexed' in operand['memory'] and operand['memory']['post_indexed']) or (
'pre_indexed' in operand['memory'] and operand['memory']['pre_indexed']
):
op_dict['source'].remove(operand)
op_dict['src_dst'].append(operand)
op_dict['src_dst'].append(AttrDict.convert_dict(
{'register': operand['memory']['base']}))
for operand in [op for op in op_dict['destination'] if 'memory' in op]:
if ('post_indexed' in operand['memory'] and operand['memory']['post_indexed']) or (
'pre_indexed' in operand['memory'] and operand['memory']['pre_indexed']
):
op_dict['destination'].remove(operand)
op_dict['src_dst'].append(operand)
op_dict['src_dst'].append(AttrDict.convert_dict(
{'register': operand['memory']['base']}))
# store operand list in dict and reassign operand key/value pair
instruction_form['semantic_operands'] = AttrDict.convert_dict(op_dict)
# assign LD/ST flags

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
from collections import OrderedDict
from osaca.parser import ParserAArch64v81, ParserX86ATT, get_parser
from osaca.parser import ParserAArch64, ParserX86ATT, get_parser
COMMENT_MARKER = {'start': 'OSACA-BEGIN', 'end': 'OSACA-END'}
@@ -22,9 +22,9 @@ def reduce_to_section(kernel, isa):
else:
raise ValueError('ISA not supported.')
if start == -1:
raise LookupError('Could not find START MARKER. Make sure it is inserted!')
start = 0
if end == -1:
raise LookupError('Could not find END MARKER. Make sure it is inserted!')
end = len(kernel)
return kernel[start:end]
@@ -38,7 +38,7 @@ def find_marked_kernel_AArch64(lines):
nop_bytes = ['213', '3', '32', '31']
return find_marked_section(
lines,
ParserAArch64v81(),
ParserAArch64(),
['mov'],
'x1',
[111, 222],
@@ -277,6 +277,11 @@ def find_basic_loop_bodies(lines):
current_block.append(line)
# Find end of block by searching for references to valid jump labels
if line['instruction'] and line['operands']:
# Ignore `b.none` instructions (relevant von ARM SVE code)
# This branch instruction is often present _within_ inner loop blocks, but usually
# do not terminate
if line['instruction'] == 'b.none':
continue
for operand in [o for o in line['operands'] if 'identifier' in o]:
if operand['identifier']['name'] in valid_jump_labels:
if operand['identifier']['name'] == label:

View File

@@ -1,28 +1,14 @@
#!/usr/bin/env python3
import os.path
DATA_DIRS = [os.path.expanduser('~/.osaca/data'), os.path.join(os.path.dirname(__file__), 'data')]
CACHE_DIR = os.path.expanduser('~/.osaca/cache')
def find_file(name):
def find_datafile(name):
"""Check for existence of name in user or package data folders and return path."""
search_paths = [os.path.expanduser('~/.osaca/data'),
os.path.join(os.path.dirname(__file__), 'data')]
for dir in search_paths:
for dir in DATA_DIRS:
path = os.path.join(dir, name)
if os.path.exists(path):
return path
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, search_paths))
def exists_cached_file(name):
"""Check for existence of file in cache dir. Returns path if it exists and False otherwise."""
if not os.path.exists(CACHE_DIR):
os.makedirs(CACHE_DIR)
return False
search_paths = [CACHE_DIR]
for dir in search_paths:
path = os.path.join(dir, name)
if os.path.exists(path):
return path
return False
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, DATA_DIRS))

View File

@@ -2,11 +2,14 @@
# Always prefer setuptools over distutils
from setuptools import setup, find_packages
from setuptools.command.install import install as _install
from setuptools.command.sdist import sdist as _sdist
# To use a consistent encoding
from codecs import open
import os
import io
import re
import sys
here = os.path.abspath(os.path.dirname(__file__))
@@ -27,6 +30,27 @@ def find_version(*file_paths):
raise RuntimeError("Unable to find version string.")
def _run_build_cache(dir):
from subprocess import check_call
# This is run inside the install staging directory (that had no .pyc files)
# We don't want to generate any.
# https://github.com/eliben/pycparser/pull/135
check_call([sys.executable, '-B', '_build_cache.py'],
cwd=os.path.join(dir, 'osaca', 'data'))
class install(_install):
def run(self):
_install.run(self)
self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
class sdist(_sdist):
def make_release_tree(self, basedir, files):
_sdist.make_release_tree(self, basedir, files)
self.execute(_run_build_cache, (basedir,), msg="Build ISA and architecture cache")
# Get the long description from the README file
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()
@@ -59,7 +83,7 @@ setup(
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
'Development Status :: 3 - Alpha',
'Development Status :: 4 - Beta',
# Indicate who your project is intended for
'Intended Audience :: Developers',
@@ -76,6 +100,9 @@ setup(
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
# What doesd your project relate to?
@@ -91,8 +118,8 @@ setup(
# https://packaging.python.org/en/latest/requirements.html
install_requires=[
'networkx',
'pyparsing',
'ruamel.yaml',
'pyparsing>=2.3.1',
'ruamel.yaml>=0.15.71',
],
python_requires='>=3.5',
@@ -124,4 +151,7 @@ setup(
'osaca=osaca.osaca:main',
],
},
# Overwriting install and sdist to enforce cache distribution with package
cmdclass={'install': install, 'sdist': sdist},
)

View File

@@ -8,7 +8,7 @@ suite = unittest.TestLoader().loadTestsFromNames(
[
'test_base_parser',
'test_parser_x86att',
'test_parser_AArch64v81',
'test_parser_AArch64',
'test_marker_utils',
'test_semantics',
'test_frontend',

View File

@@ -18,6 +18,12 @@ class TestBaseParser(unittest.TestCase):
pass
with open(self._find_file('triad_x86_iaca.s')) as f:
self.triad_code = f.read()
with open(self._find_file('triad_arm_iaca.s')) as f:
self.triad_code_arm = f.read()
with open(self._find_file('kernel_x86.s')) as f:
self.x86_code = f.read()
with open(self._find_file('kernel_aarch64.s')) as f:
self.aarch64_code = f.read()
##################
# Test
@@ -59,6 +65,12 @@ class TestBaseParser(unittest.TestCase):
with self.assertRaises(NotImplementedError):
self.parser.normalize_imd(imd_hex_1)
def test_detect_ISA(self):
self.assertEqual(BaseParser.detect_ISA(self.triad_code), 'x86')
self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), 'aarch64')
self.assertEqual(BaseParser.detect_ISA(self.x86_code), 'x86')
self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), 'aarch64')
##################
# Helper functions
##################

View File

@@ -11,7 +11,7 @@ from shutil import copyfile
from unittest.mock import patch
import osaca.osaca as osaca
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.semantics import MachineModel
@@ -71,7 +71,7 @@ class TestCLI(unittest.TestCase):
def test_get_parser(self):
self.assertTrue(isinstance(osaca.get_asm_parser('csx'), ParserX86ATT))
self.assertTrue(isinstance(osaca.get_asm_parser('tx2'), ParserAArch64v81))
self.assertTrue(isinstance(osaca.get_asm_parser('tx2'), ParserAArch64))
with self.assertRaises(ValueError):
osaca.get_asm_parser('UNKNOWN')
@@ -153,6 +153,64 @@ class TestCLI(unittest.TestCase):
output = StringIO()
osaca.run(args, output_file=output)
def test_without_arch(self):
# Run test kernels without --arch flag
parser = osaca.create_parser()
# x86
kernel_x86 = 'kernel_x86.s'
args = parser.parse_args([self._find_test_file(kernel_x86)])
output = StringIO()
osaca.run(args, output_file=output)
# AArch64
kernel_aarch64 = 'kernel_aarch64.s'
args = parser.parse_args([self._find_test_file(kernel_aarch64)])
osaca.run(args, output_file=output)
def test_user_warnings(self):
parser = osaca.create_parser()
kernel = 'triad_x86_unmarked.s'
args = parser.parse_args(
['--arch', 'csx', '--ignore-unknown', self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for length
self.assertTrue(output.getvalue().count('WARNING') == 1)
args = parser.parse_args(
['--lines', '100-199', '--ignore-unknown', self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for arch
self.assertTrue(output.getvalue().count('WARNING') == 1)
def test_lines_arg(self):
# Run tests with --lines option
parser = osaca.create_parser()
kernel_x86 = 'triad_x86_iaca.s'
args_base = parser.parse_args(
['--arch', 'csx', self._find_test_file(kernel_x86)]
)
output_base = StringIO()
osaca.run(args_base, output_file=output_base)
output_base = output_base.getvalue().split('\n')[8:]
args = []
args.append(parser.parse_args(
['--lines', '146-154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
args.append(parser.parse_args(
['--lines', '146:154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
args.append(parser.parse_args(
['--lines', '146,147:148,149-154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
for a in args:
with self.subTest(params=a):
output = StringIO()
osaca.run(a, output_file=output)
self.assertEqual(output.getvalue().split('\n')[8:], output_base)
##################
# Helper functions
##################

View File

@@ -124,6 +124,18 @@ class TestDBInterface(unittest.TestCase):
with self.assertRaises(AssertionError):
dbi.import_benchmark_output('csx', 'ibench', 'invalid_file')
def test_online_scraping(self):
# addpd -- suspicious instruction, normal URL
instr_1 = ['addpd', (True, '(r) (r,w)')]
self.assertEqual(dbi._scrape_from_felixcloutier(instr_1[0]), instr_1[1])
# movpd -- not suspicious,
instr_2 = ['movapd', (False, '(r) (w)')]
self.assertEqual(dbi._scrape_from_felixcloutier(instr_2[0]), instr_2[1])
# vfmadd132pd -- only in combined view with 213/231.
# No 2-operand version, therefore, empty string
instr_3 = ['vfmadd132pd', (True, '')]
self.assertEqual(dbi._scrape_from_felixcloutier(instr_3[0]), instr_3[1])
##################
# Helper functions
##################

View File

@@ -0,0 +1,345 @@
.file "triad.c"
.section .rodata.str1.8,"aMS",@progbits,1
.align 8
.LC9:
.string "%12.1f | %9.8f | %9.3f | %7.1f | %7.1f | %7d | %4d \n"
.text
.p2align 4,,15
.globl triad
.type triad, @function
triad:
.LFB24:
.cfi_startproc
pushq %r13
.cfi_def_cfa_offset 16
.cfi_offset 13, -16
movslq %edi, %rax
movl $64, %edi
leaq 16(%rsp), %r13
.cfi_def_cfa 13, 0
andq $-32, %rsp
pushq -8(%r13)
pushq %rbp
.cfi_escape 0x10,0x6,0x2,0x76,0
movq %rsp, %rbp
pushq %r15
.cfi_escape 0x10,0xf,0x2,0x76,0x78
leaq 0(,%rax,8), %r15
pushq %r14
movq %r15, %rsi
pushq %r13
.cfi_escape 0xf,0x3,0x76,0x68,0x6
.cfi_escape 0x10,0xe,0x2,0x76,0x70
pushq %r12
pushq %rbx
.cfi_escape 0x10,0xc,0x2,0x76,0x60
.cfi_escape 0x10,0x3,0x2,0x76,0x58
movq %rax, %rbx
subq $72, %rsp
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r14
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r12
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r13
call aligned_alloc
movq %rax, %r15
leal -1(%rbx), %eax
movl %eax, -96(%rbp)
testl %ebx, %ebx
jle .L2
cmpl $2, %eax
jbe .L14
movl %ebx, %esi
vmovapd .LC0(%rip), %ymm0
xorl %eax, %eax
xorl %ecx, %ecx
shrl $2, %esi
.p2align 4,,10
.p2align 3
.L4:
addl $1, %ecx
vmovapd %ymm0, (%r15,%rax)
vmovapd %ymm0, 0(%r13,%rax)
vmovapd %ymm0, (%r12,%rax)
vmovapd %ymm0, (%r14,%rax)
addq $32, %rax
cmpl %ecx, %esi
ja .L4
movl %ebx, %eax
andl $-4, %eax
cmpl %eax, %ebx
je .L26
vzeroupper
.L3:
vmovsd .LC1(%rip), %xmm0
movslq %eax, %rcx
vmovsd %xmm0, (%r15,%rcx,8)
vmovsd %xmm0, 0(%r13,%rcx,8)
vmovsd %xmm0, (%r12,%rcx,8)
vmovsd %xmm0, (%r14,%rcx,8)
leal 1(%rax), %ecx
cmpl %ecx, %ebx
jle .L2
movslq %ecx, %rcx
addl $2, %eax
vmovsd %xmm0, (%r15,%rcx,8)
vmovsd %xmm0, 0(%r13,%rcx,8)
vmovsd %xmm0, (%r12,%rcx,8)
vmovsd %xmm0, (%r14,%rcx,8)
cmpl %eax, %ebx
jle .L2
cltq
vmovsd %xmm0, (%r15,%rax,8)
vmovsd %xmm0, 0(%r13,%rax,8)
vmovsd %xmm0, (%r12,%rax,8)
vmovsd %xmm0, (%r14,%rax,8)
.L2:
movl %ebx, %eax
movl $1, -84(%rbp)
movl %ebx, %r10d
andl $-4, %eax
shrl $2, %r10d
movl %eax, -100(%rbp)
.p2align 4,,10
.p2align 3
.L13:
leaq -56(%rbp), %rsi
leaq -72(%rbp), %rdi
movl %r10d, -88(%rbp)
call timing
movl -88(%rbp), %r10d
xorl %r11d, %r11d
.p2align 4,,10
.p2align 3
.L12:
vmovsd (%r14), %xmm0
vxorpd %xmm7, %xmm7, %xmm7
vucomisd %xmm7, %xmm0
jbe .L6
movq %r14, %rdi
movl %r11d, -92(%rbp)
movl %r10d, -88(%rbp)
vzeroupper
call dummy
movl -92(%rbp), %r11d
movl -88(%rbp), %r10d
.L6:
testl %ebx, %ebx
jle .L8
cmpl $2, -96(%rbp)
jbe .L15
xorl %eax, %eax
xorl %ecx, %ecx
.p2align 4,,10
.p2align 3
.L10:
vmovapd (%r15,%rax), %ymm0
vmovapd (%r12,%rax), %ymm3
addl $1, %ecx
vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0
vmovapd %ymm0, (%r14,%rax)
addq $32, %rax
cmpl %ecx, %r10d
ja .L10
movl -100(%rbp), %eax
cmpl %ebx, %eax
je .L8
.L9:
movslq %eax, %rcx
vmovsd 0(%r13,%rcx,8), %xmm0
vmovsd (%r12,%rcx,8), %xmm5
vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0
vmovsd %xmm0, (%r14,%rcx,8)
leal 1(%rax), %ecx
cmpl %ebx, %ecx
jge .L8
movslq %ecx, %rcx
addl $2, %eax
vmovsd 0(%r13,%rcx,8), %xmm0
vmovsd (%r12,%rcx,8), %xmm6
vfmadd132sd (%r15,%rcx,8), %xmm6, %xmm0
vmovsd %xmm0, (%r14,%rcx,8)
cmpl %eax, %ebx
jle .L8
cltq
vmovsd (%r15,%rax,8), %xmm0
vmovsd (%r12,%rax,8), %xmm4
vfmadd132sd 0(%r13,%rax,8), %xmm4, %xmm0
vmovsd %xmm0, (%r14,%rax,8)
.L8:
addl $1, %r11d
cmpl -84(%rbp), %r11d
jne .L12
leaq -56(%rbp), %rsi
leaq -64(%rbp), %rdi
movl %r11d, -84(%rbp)
movl %r10d, -88(%rbp)
vzeroupper
call timing
vmovsd -64(%rbp), %xmm1
vsubsd -72(%rbp), %xmm1, %xmm1
vmovsd .LC3(%rip), %xmm2
movl -84(%rbp), %r11d
movl -88(%rbp), %r10d
vucomisd %xmm1, %xmm2
leal (%r11,%r11), %eax
movl %eax, -84(%rbp)
ja .L13
movl %eax, %esi
vxorpd %xmm6, %xmm6, %xmm6
vxorpd %xmm0, %xmm0, %xmm0
movl %ebx, %edx
sarl %esi
vcvtsi2sd %ebx, %xmm0, %xmm0
movl $.LC9, %edi
movl $5, %eax
vcvtsi2sd %esi, %xmm6, %xmm6
vmulsd .LC5(%rip), %xmm6, %xmm2
vmovsd .LC4(%rip), %xmm5
vmovsd .LC6(%rip), %xmm7
vmulsd %xmm0, %xmm6, %xmm4
vmulsd %xmm0, %xmm2, %xmm2
vdivsd %xmm1, %xmm4, %xmm4
vdivsd %xmm1, %xmm2, %xmm2
vdivsd %xmm5, %xmm4, %xmm4
vmulsd %xmm7, %xmm2, %xmm3
vaddsd %xmm0, %xmm0, %xmm2
vmulsd .LC8(%rip), %xmm0, %xmm0
vmulsd %xmm6, %xmm2, %xmm2
vmulsd .LC7(%rip), %xmm2, %xmm2
vmulsd %xmm7, %xmm3, %xmm3
vdivsd %xmm5, %xmm0, %xmm0
vdivsd %xmm5, %xmm4, %xmm4
vdivsd %xmm1, %xmm2, %xmm2
call printf
movq %r14, %rdi
call free
movq %r12, %rdi
call free
movq %r13, %rdi
call free
addq $72, %rsp
movq %r15, %rdi
popq %rbx
popq %r12
popq %r13
.cfi_remember_state
.cfi_def_cfa 13, 0
popq %r14
popq %r15
popq %rbp
leaq -16(%r13), %rsp
.cfi_def_cfa 7, 16
popq %r13
.cfi_def_cfa_offset 8
jmp free
.p2align 4,,10
.p2align 3
.L15:
.cfi_restore_state
xorl %eax, %eax
jmp .L9
.L26:
vzeroupper
jmp .L2
.L14:
xorl %eax, %eax
jmp .L3
.cfi_endproc
.LFE24:
.size triad, .-triad
.section .rodata.str1.8
.align 8
.LC10:
.string "TRIAD a[i] = b[i]+c[i]*d[i], 32 byte/it, 2 Flop/it"
.align 8
.LC11:
.string "Size (KByte) | runtime | MFlop/s | MB/s | MLUP/s | repeat | size"
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB25:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movl $.LC10, %edi
movl $20, %ebx
call puts
movl $.LC11, %edi
call puts
.p2align 4,,10
.p2align 3
.L28:
vxorpd %xmm1, %xmm1, %xmm1
movq .LC12(%rip), %rax
vcvtsi2sd %ebx, %xmm1, %xmm1
addl $1, %ebx
vmovq %rax, %xmm0
call pow
vcvttsd2si %xmm0, %edi
call triad
cmpl $36, %ebx
jne .L28
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE25:
.size main, .-main
.section .rodata.cst32,"aM",@progbits,32
.align 32
.LC0:
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC1:
.long 1907715710
.long 1048610426
.align 8
.LC3:
.long 2576980378
.long 1070176665
.align 8
.LC4:
.long 0
.long 1083129856
.align 8
.LC5:
.long 0
.long 1077936128
.align 8
.LC6:
.long 0
.long 1062207488
.align 8
.LC7:
.long 2696277389
.long 1051772663
.align 8
.LC8:
.long 0
.long 1075838976
.align 8
.LC12:
.long 3435973837
.long 1073007820
.ident "GCC: (GNU) 7.2.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -7,7 +7,7 @@ import os
import unittest
from osaca.frontend import Frontend
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.semantics import ArchSemantics, KernelDG, MachineModel
@@ -20,7 +20,7 @@ class TestFrontend(unittest.TestCase):
def setUpClass(self):
# set up parser and kernels
self.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64v81()
self.parser_AArch64 = ParserAArch64()
with open(self._find_file('kernel_x86.s')) as f:
code_x86 = f.read()
with open(self._find_file('kernel_aarch64.s')) as f:
@@ -33,7 +33,7 @@ class TestFrontend(unittest.TestCase):
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
)
self.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
arch='tx2'
)
self.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')

View File

@@ -9,7 +9,7 @@ import unittest
from collections import OrderedDict
from osaca.api import KerncraftAPI
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
class TestKerncraftAPI(unittest.TestCase):
@@ -17,7 +17,7 @@ class TestKerncraftAPI(unittest.TestCase):
def setUpClass(self):
# set up parser and kernels
self.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64v81()
self.parser_AArch64 = ParserAArch64()
with open(self._find_file('triad_x86_iaca.s')) as f:
self.code_x86 = f.read()
with open(self._find_file('triad_arm_iaca.s')) as f:
@@ -63,7 +63,7 @@ class TestKerncraftAPI(unittest.TestCase):
('0DV', 0.0),
('1', 34.0),
('1DV', 0.0),
('2', 2.0),
('2', 3.0),
('3', 64.0),
('4', 64.0),
('5', 32.0),

View File

@@ -8,13 +8,13 @@ from collections import OrderedDict
from osaca.semantics import reduce_to_section, find_basic_blocks, find_jump_labels, \
find_basic_loop_bodies
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT
class TestMarkerUtils(unittest.TestCase):
@classmethod
def setUpClass(self):
self.parser_AArch = ParserAArch64v81()
self.parser_AArch = ParserAArch64()
self.parser_x86 = ParserX86ATT()
with open(self._find_file('triad_arm_iaca.s')) as f:
triad_code_arm = f.read()
@@ -178,120 +178,115 @@ class TestMarkerUtils(unittest.TestCase):
def test_marker_special_cases_AArch(self):
bytes_line = '.byte 213,3,32,31\n'
mov_start = 'mov x1, #111\n'
mov_end = 'mov x1, #222\n'
prologue = 'dup v0.2d, x14\n' + ' neg x9, x9\n' + ' .p2align 6\n'
start_marker = 'mov x1, #111\n' + bytes_line
end_marker = 'mov x1, #222\n' + bytes_line
prologue = (
'dup v0.2d, x14\n'
'neg x9, x9\n'
'.p2align 6\n')
kernel = (
'.LBB0_28:\n'
+ 'fmul v7.2d, v7.2d, v19.2d\n'
+ 'stp q0, q1, [x10, #-32]\n'
+ 'b.ne .LBB0_28\n'
)
epilogue = '.LBB0_29: // Parent Loop BB0_20 Depth=1\n' + 'bl dummy\n'
kernel_length = len(list(filter(None, kernel.split('\n'))))
+ 'b.ne .LBB0_28\n')
epilogue = (
'.LBB0_29: // Parent Loop BB0_20 Depth=1\n'
'bl dummy\n')
# marker directly at the beginning
code_beginning = mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
beginning_parsed = self.parser_AArch.parse_file(code_beginning)
test_kernel = reduce_to_section(beginning_parsed, 'AArch64')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
samples = [
# (test name,
# ignored prologue, section to be extraced, ignored epilogue)
("markers",
prologue + start_marker, kernel, end_marker + epilogue),
("marker at file start",
start_marker, kernel, end_marker + epilogue),
("no start marker",
'', prologue + kernel, end_marker + epilogue),
("marker at file end",
prologue + start_marker, kernel, end_marker),
("no end marker",
prologue + start_marker, kernel + epilogue, ''),
("empty kernel",
prologue + start_marker, '', end_marker + epilogue),
]
# marker at the end
code_end = prologue + mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
end_parsed = self.parser_AArch.parse_file(code_end)
test_kernel = reduce_to_section(end_parsed, 'AArch64')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
# no kernel
code_empty = prologue + mov_start + bytes_line + mov_end + bytes_line + epilogue
empty_parsed = self.parser_AArch.parse_file(code_empty)
test_kernel = reduce_to_section(empty_parsed, 'AArch64')
self.assertEqual(len(test_kernel), 0)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
self.assertEqual(test_kernel, [])
# no start marker
code_no_start = prologue + bytes_line + kernel + mov_end + bytes_line + epilogue
no_start_parsed = self.parser_AArch.parse_file(code_no_start)
with self.assertRaises(LookupError):
reduce_to_section(no_start_parsed, 'AArch64')
# no end marker
code_no_end = prologue + mov_start + bytes_line + kernel + mov_end + epilogue
no_end_parsed = self.parser_AArch.parse_file(code_no_end)
with self.assertRaises(LookupError):
reduce_to_section(no_end_parsed, 'AArch64')
# no marker at all
code_no_marker = prologue + kernel + epilogue
no_marker_parsed = self.parser_AArch.parse_file(code_no_marker)
with self.assertRaises(LookupError):
reduce_to_section(no_marker_parsed, 'AArch64')
for test_name, pro, kernel, epi in samples:
code = pro + kernel + epi
parsed = self.parser_AArch.parse_file(code)
test_kernel = reduce_to_section(parsed, 'AArch64')
if kernel:
kernel_length = len(kernel.strip().split('\n'))
else:
kernel_length = 0
self.assertEqual(
len(test_kernel), kernel_length,
msg="Invalid exctracted kernel length on {!r} sample".format(test_name))
if pro:
kernel_start = len((pro).strip().split('\n'))
else:
kernel_start = 0
parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
self.assertEqual(
test_kernel, parsed_kernel,
msg="Invalid exctracted kernel on {!r}".format(test_name))
def test_marker_special_cases_x86(self):
bytes_line = '.byte 100\n.byte 103\n.byte 144\n'
mov_start = 'movl $111, %ebx\n'
mov_end = 'movl $222, %ebx\n'
prologue = 'movl -88(%rbp), %r10d\n' + 'xorl %r11d, %r11d\n' + '.p2align 4,,10\n'
bytes_line = (
'.byte 100\n'
'.byte 103\n'
'.byte 144\n')
start_marker = 'movl $111, %ebx\n' + bytes_line
end_marker = 'movl $222, %ebx\n' + bytes_line
prologue = (
'movl -88(%rbp), %r10d\n'
'xorl %r11d, %r11d\n'
'.p2align 4,,10\n')
kernel = (
'.L3: #L3\n'
+ 'vmovsd .LC1(%rip), %xmm0\n'
+ 'vmovsd %xmm0, (%r15,%rcx,8)\n'
+ 'cmpl %ecx, %ebx\n'
+ 'jle .L3\n'
)
epilogue = 'leaq -56(%rbp), %rsi\n' + 'movl %r10d, -88(%rbp)\n' + 'call timing\n'
kernel_length = len(list(filter(None, kernel.split('\n'))))
'vmovsd .LC1(%rip), %xmm0\n'
'vmovsd %xmm0, (%r15,%rcx,8)\n'
'cmpl %ecx, %ebx\n'
'jle .L3\n')
epilogue = (
'leaq -56(%rbp), %rsi\n'
'movl %r10d, -88(%rbp)\n'
'call timing\n')
samples = [
# (test name,
# ignored prologue, section to be extraced, ignored epilogue)
("markers",
prologue + start_marker, kernel, end_marker + epilogue),
("marker at file start",
start_marker, kernel, end_marker + epilogue),
("no start marker",
'', prologue + kernel, end_marker + epilogue),
("marker at file end",
prologue + start_marker, kernel, end_marker),
("no end marker",
prologue + start_marker, kernel + epilogue, ''),
("empty kernel",
prologue + start_marker, '', end_marker + epilogue),
]
# marker directly at the beginning
code_beginning = mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
beginning_parsed = self.parser_x86.parse_file(code_beginning)
test_kernel = reduce_to_section(beginning_parsed, 'x86')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
# marker at the end
code_end = prologue + mov_start + bytes_line + kernel + mov_end + bytes_line + epilogue
end_parsed = self.parser_x86.parse_file(code_end)
test_kernel = reduce_to_section(end_parsed, 'x86')
self.assertEqual(len(test_kernel), kernel_length)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
self.assertEqual(test_kernel, parsed_kernel)
# no kernel
code_empty = prologue + mov_start + bytes_line + mov_end + bytes_line + epilogue
empty_parsed = self.parser_x86.parse_file(code_empty)
test_kernel = reduce_to_section(empty_parsed, 'x86')
self.assertEqual(len(test_kernel), 0)
kernel_start = len(list(filter(None, (prologue + mov_start + bytes_line).split('\n'))))
self.assertEqual(test_kernel, [])
# no start marker
code_no_start = prologue + bytes_line + kernel + mov_end + bytes_line + epilogue
no_start_parsed = self.parser_x86.parse_file(code_no_start)
with self.assertRaises(LookupError):
reduce_to_section(no_start_parsed, 'x86')
# no end marker
code_no_end = prologue + mov_start + bytes_line + kernel + mov_end + epilogue
no_end_parsed = self.parser_x86.parse_file(code_no_end)
with self.assertRaises(LookupError):
reduce_to_section(no_end_parsed, 'x86')
# no marker at all
code_no_marker = prologue + kernel + epilogue
no_marker_parsed = self.parser_x86.parse_file(code_no_marker)
with self.assertRaises(LookupError):
reduce_to_section(no_marker_parsed, 'x86')
for test_name, pro, kernel, epi in samples:
code = pro + kernel + epi
parsed = self.parser_x86.parse_file(code)
test_kernel = reduce_to_section(parsed, 'x86')
if kernel:
kernel_length = len(kernel.strip().split('\n'))
else:
kernel_length = 0
self.assertEqual(
len(test_kernel), kernel_length,
msg="Invalid exctracted kernel length on {!r} sample".format(test_name))
if pro:
kernel_start = len((pro).strip().split('\n'))
else:
kernel_start = 0
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
self.assertEqual(
test_kernel, parsed_kernel,
msg="Invalid exctracted kernel on {!r}".format(test_name))
def test_find_jump_labels(self):
self.assertEqual(find_jump_labels(self.parsed_x86),

View File

@@ -8,13 +8,13 @@ import unittest
from pyparsing import ParseException
from osaca.parser import AttrDict, ParserAArch64v81
from osaca.parser import AttrDict, ParserAArch64
class TestParserAArch64v81(unittest.TestCase):
class TestParserAArch64(unittest.TestCase):
@classmethod
def setUpClass(self):
self.parser = ParserAArch64v81()
self.parser = ParserAArch64()
with open(self._find_file('triad_arm_iaca.s')) as f:
self.triad_code = f.read()
@@ -146,8 +146,8 @@ class TestParserAArch64v81(unittest.TestCase):
def test_parse_line(self):
line_comment = '// -- Begin main'
line_label = '.LBB0_1: // =>This Inner Loop Header: Depth=1'
line_directive = '\t.cfi_def_cfa w29, -16'
line_instruction = '\tldr s0, [x11, w10, sxtw #2]\t\t// = <<2'
line_directive = '.cfi_def_cfa w29, -16'
line_instruction = 'ldr s0, [x11, w10, sxtw #2] // = <<2'
line_prefetch = 'prfm pldl1keep, [x26, #2048] //HPL'
line_preindexed = 'stp x29, x30, [sp, #-16]!'
line_postindexed = 'ldp q2, q3, [x11], #64'
@@ -201,7 +201,7 @@ class TestParserAArch64v81(unittest.TestCase):
'directive': None,
'comment': '= <<2',
'label': None,
'line': 'ldr s0, [x11, w10, sxtw #2]\t\t// = <<2',
'line': 'ldr s0, [x11, w10, sxtw #2] // = <<2',
'line_number': 4,
}
instruction_form_5 = {
@@ -309,23 +309,23 @@ class TestParserAArch64v81(unittest.TestCase):
self.assertEqual(self.parser.normalize_imd(identifier), identifier)
def test_multiple_regs(self):
instr_range = 'PUSH {r5-r7}'
instr_range = 'PUSH {x5-x7}'
reg_range = AttrDict({
'register': {
'range': [
{'prefix': 'r', 'name': '5'},
{'prefix': 'r', 'name': '7'}
{'prefix': 'x', 'name': '5'},
{'prefix': 'x', 'name': '7'}
],
'index': None
}
})
instr_list = 'POP {r5, r7, r9}'
instr_list = 'POP {x5, x7, x9}'
reg_list = AttrDict({
'register': {
'list': [
{'prefix': 'r', 'name': '5'},
{'prefix': 'r', 'name': '7'},
{'prefix': 'r', 'name': '9'}
{'prefix': 'x', 'name': '5'},
{'prefix': 'x', 'name': '7'},
{'prefix': 'x', 'name': '9'}
],
'index': None
}
@@ -411,5 +411,5 @@ class TestParserAArch64v81(unittest.TestCase):
if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromTestCase(TestParserAArch64v81)
suite = unittest.TestLoader().loadTestsFromTestCase(TestParserAArch64)
unittest.TextTestRunner(verbosity=2).run(suite)

View File

@@ -45,20 +45,31 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(len(self._get_directive(self.parser, '\t.text').parameters), 0)
self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90').name, 'align')
self.assertEqual(len(self._get_directive(self.parser, '\t.align\t16,0x90').parameters), 2)
self.assertEqual(len(self._get_directive(self.parser, '.text').parameters), 0)
self.assertEqual(
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
)
self.assertEqual(
self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
'"path/to/file.c"',
)
self.assertEqual(
self._get_directive(self.parser, '\t.set\tL$set$0,LECIE1-LSCIE1').parameters,
[{'name': 'L$set$0'}, {'name': 'LECIE1-LSCIE1'}])
['L$set$0', 'LECIE1-LSCIE1'],
)
self.assertEqual(
self._get_directive(
self.parser,
'\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support'
).parameters,
[{'name': v} for v in
['__TEXT', '__eh_frame', 'coalesced', 'no_toc+strip_static_syms+live_support']])
self.parser,
'\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support',
).parameters,
['__TEXT', '__eh_frame', 'coalesced', 'no_toc+strip_static_syms+live_support'],
)
self.assertEqual(
self._get_directive(
self.parser, '\t.section\t__TEXT,__literal16,16byte_literals').parameters,
[{'name': v} for v in ['__TEXT', '__literal16', '16byte_literals']])
self.parser, '\t.section\t__TEXT,__literal16,16byte_literals'
).parameters,
['__TEXT', '__literal16', '16byte_literals'],
)
self.assertEqual(
self._get_directive(self.parser, '\t.align\t16,0x90').parameters[1], '0x90'
)
@@ -145,8 +156,8 @@ class TestParserX86ATT(unittest.TestCase):
def test_parse_line(self):
line_comment = '# -- Begin main'
line_label = '..B1.7: # Preds ..B1.6'
line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed'
line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
line_directive = '.quad .2.3_2__kmpc_loc_pack.2 #qed'
line_instruction = 'lea 2(%rax,%rax), %ecx #12.9'
instruction_form_1 = {
'instruction': None,
@@ -169,7 +180,7 @@ class TestParserX86ATT(unittest.TestCase):
instruction_form_3 = {
'instruction': None,
'operands': [],
'directive': {'name': 'quad', 'parameters': [{'name': '.2.3_2__kmpc_loc_pack.2'}]},
'directive': {'name': 'quad', 'parameters': ['.2.3_2__kmpc_loc_pack.2']},
'comment': 'qed',
'label': None,
'line': '.quad .2.3_2__kmpc_loc_pack.2 #qed',
@@ -186,9 +197,7 @@ class TestParserX86ATT(unittest.TestCase):
'scale': 1,
}
},
{
'register': {'name': 'ecx'}
}
{'register': {'name': 'ecx'}},
],
'directive': None,
'comment': '12.9',

View File

@@ -11,7 +11,7 @@ from subprocess import call
import networkx as nx
from osaca.osaca import get_unmatched_instruction_ratio
from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG,
MachineModel, reduce_to_section)
@@ -20,48 +20,43 @@ class TestSemanticTools(unittest.TestCase):
MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
)
USER_DATA_DIR = os.path.join(os.path.expanduser('~'), '.osaca/')
@classmethod
def setUpClass(self):
# copy db files in user directory
if not os.path.isdir(os.path.join(self.USER_DATA_DIR, 'data')):
os.makedirs(os.path.join(self.USER_DATA_DIR, 'data'))
call(['cp', '-r', self.MODULE_DATA_DIR, self.USER_DATA_DIR])
def setUpClass(cls):
# set up parser and kernels
self.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64v81()
with open(self._find_file('kernel_x86.s')) as f:
self.code_x86 = f.read()
with open(self._find_file('kernel_aarch64.s')) as f:
self.code_AArch64 = f.read()
self.kernel_x86 = reduce_to_section(self.parser_x86.parse_file(self.code_x86), 'x86')
self.kernel_AArch64 = reduce_to_section(
self.parser_AArch64.parse_file(self.code_AArch64), 'aarch64'
cls.parser_x86 = ParserX86ATT()
cls.parser_AArch64 = ParserAArch64()
with open(cls._find_file('kernel_x86.s')) as f:
cls.code_x86 = f.read()
with open(cls._find_file('kernel_aarch64.s')) as f:
cls.code_AArch64 = f.read()
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), 'x86')
cls.kernel_AArch64 = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64'
)
# set up machine models
self.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
cls.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml')
)
self.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml')
)
self.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')
cls.semantics_csx = ArchSemantics(
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/x86.yml')
)
self.semantics_tx2 = ArchSemantics(
self.machine_model_tx2,
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'),
cls.semantics_tx2 = ArchSemantics(
cls.machine_model_tx2,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'),
)
self.machine_model_zen = MachineModel(arch='zen1')
cls.machine_model_zen = MachineModel(arch='zen1')
for i in range(len(self.kernel_x86)):
self.semantics_csx.assign_src_dst(self.kernel_x86[i])
self.semantics_csx.assign_tp_lt(self.kernel_x86[i])
for i in range(len(self.kernel_AArch64)):
self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i])
self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i])
for i in range(len(cls.kernel_x86)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
for i in range(len(cls.kernel_AArch64)):
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
###########
# Tests
@@ -88,28 +83,21 @@ class TestSemanticTools(unittest.TestCase):
self.assertIsNone(test_mm_x86.get_instruction(None, []))
self.assertIsNone(test_mm_arm.get_instruction(None, []))
# test dict DB creation
test_mm_x86._data['instruction_dict'] = test_mm_x86._convert_to_dict(
test_mm_x86._data['instruction_forms']
)
test_mm_arm._data['instruction_dict'] = test_mm_arm._convert_to_dict(
test_mm_arm._data['instruction_forms']
)
# test get_instruction from dict DB
self.assertIsNone(test_mm_x86.get_instruction_from_dict(None, []))
self.assertIsNone(test_mm_arm.get_instruction_from_dict(None, []))
self.assertIsNone(test_mm_x86.get_instruction_from_dict('NOT_IN_DB', []))
self.assertIsNone(test_mm_arm.get_instruction_from_dict('NOT_IN_DB', []))
# test get_instruction from DB
self.assertIsNone(test_mm_x86.get_instruction(None, []))
self.assertIsNone(test_mm_arm.get_instruction(None, []))
self.assertIsNone(test_mm_x86.get_instruction('NOT_IN_DB', []))
self.assertIsNone(test_mm_arm.get_instruction('NOT_IN_DB', []))
name_x86_1 = 'vaddpd'
operands_x86_1 = [
{'class': 'register', 'name': 'xmm'},
{'class': 'register', 'name': 'xmm'},
{'class': 'register', 'name': 'xmm'},
]
instr_form_x86_1 = test_mm_x86.get_instruction_from_dict(name_x86_1, operands_x86_1)
instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1, operands_x86_1)
self.assertEqual(instr_form_x86_1, test_mm_x86.get_instruction(name_x86_1, operands_x86_1))
self.assertEqual(
test_mm_x86.get_instruction_from_dict('jg', [{'class': 'identifier'}]),
test_mm_x86.get_instruction('jg', [{'class': 'identifier'}]),
test_mm_x86.get_instruction('jg', [{'class': 'identifier'}]),
)
name_arm_1 = 'fadd'
@@ -118,10 +106,10 @@ class TestSemanticTools(unittest.TestCase):
{'class': 'register', 'prefix': 'v', 'shape': 's'},
{'class': 'register', 'prefix': 'v', 'shape': 's'},
]
instr_form_arm_1 = test_mm_arm.get_instruction_from_dict(name_arm_1, operands_arm_1)
instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1, operands_arm_1)
self.assertEqual(instr_form_arm_1, test_mm_arm.get_instruction(name_arm_1, operands_arm_1))
self.assertEqual(
test_mm_arm.get_instruction_from_dict('b.ne', [{'class': 'identifier'}]),
test_mm_arm.get_instruction('b.ne', [{'class': 'identifier'}]),
test_mm_arm.get_instruction('b.ne', [{'class': 'identifier'}]),
)

View File

@@ -1,5 +1,5 @@
[tox]
envlist = py35,py36
envlist = py35,py36,py37,py38,py39
[testenv]
commands=
python tests/all_tests.py