Compare commits

...

24 Commits

Author SHA1 Message Date
Julian Hammer
4ff8fdc4ab version bump 2020-11-11 15:14:27 +01:00
JanLJL
c204096d74 fixed typo 2020-11-11 14:11:00 +01:00
JanLJL
dea217c12c fixed test after changing TP value of instruction 2020-11-11 14:04:07 +01:00
JanLJL
92c162daa2 new instructions 2020-11-11 13:54:23 +01:00
JanLJL
87ea8f0f0a new instructions 2020-11-11 12:27:49 +01:00
Julian Hammer
cb04efc384 fixed typo 2020-11-10 13:33:24 +01:00
JanLJL
14c0ea6180 bugfixes 2020-11-09 23:29:42 +01:00
Julian Hammer
314ff4cf9d improved performance of arch_semantics and reg dependency matching 2020-11-09 19:27:47 +01:00
Julian Hammer
f64253b2b9 added dict for instruction lookup 2020-11-09 17:00:46 +01:00
Julian Hammer
979d08358e singelton for isa parsers 2020-11-09 12:36:14 +01:00
Julian Hammer
a2dd6f752d added comment 2020-11-09 12:35:13 +01:00
Julian Hammer
2fb36406a7 performance improvement of throughput summation 2020-11-09 12:01:00 +01:00
Julian Hammer
94086033a8 added __main__.py 2020-11-09 08:27:31 +01:00
JanLJL
75edfc808a version bump 2020-11-06 20:40:13 +01:00
JanLJL
c8c077a834 enhanced length warning 2020-11-06 15:49:13 +01:00
JanLJL
26ee005adc added missing test file 2020-11-06 15:07:57 +01:00
JanLJL
207c53aaad minor bugfix in HW model and added user warnings for more insight 2020-11-06 15:06:36 +01:00
JanLJL
fafd7bc526 Merge branch 'master' of https://github.com/RRZE-HPC/OSACA 2020-11-06 12:57:46 +01:00
JanLJL
b986d7eba0 added --lines option 2020-11-06 12:57:41 +01:00
Julian Hammer
6b0adb5d68 improved cache handing (always hashing original file) 2020-11-06 12:27:34 +01:00
JanLJL
f9f382a948 bugfixes 2020-11-06 12:03:54 +01:00
Julian Hammer
c6b58c63ab Merge branch 'master' of github.com:RRZE-HPC/OSACA 2020-11-03 16:28:28 +01:00
Julian Hammer
78530bfdb0 fail-safed _build_cache.py 2020-11-03 16:28:07 +01:00
JanLJL
5aa0899961 added bdist 2020-11-03 16:10:46 +01:00
17 changed files with 1171 additions and 150 deletions

View File

@@ -26,7 +26,7 @@ deploy:
username: "__token__"
password:
secure: "fRRCETOwDkJ4pFacYZghPfCQ9mSsV4PlD3sTDp8rDHoCnebPjvFYc1tIdv+Wds0ae162KNUaj9GbxjK0MTGiRcy4pD08n7ufv8snmBQ2rtOLkj7RCRg1hw30WcMHjzqScFJgQcBrpjdPmR5AlesUufh6OadGvF1NspmVRWKr8ir3KQhmNV+itAliYoqaSTRTg1zC/znm+49l5gkzlLxd+mPj5/dtcc8vZ/i2M2+nNTTjDxq71q4Ddqv+bgZV1y7OZY2YuvjEDPflUbwc3fjOxpj891uMDHodsGmEHBu8WsLpF2tAO0C/x63S0jXamkV+/4cAQqQAwWr0Lby9/BjCfUwyUMOEgZ0S+z9WoFpBpQTQEfkD2JH/UFrv4CMnLFqgDkVMcx0vc/rT4Od8eJ5wOSG5+VdniJNOLpodFOXuKc09eJMk2lE9vk9OBrcsZ09UOTPTUCMZSIP4cBDxaIkx+RHQEy63TQdJZcElRBEWGEgj2e9hbiktvIoOvbFGQDscpz7ShBDklXIpu9hnxcKHtNDEjyywTUJmx7lTMILL05DPUnpUmnMb1Gyx5lbHzhSExc9re0cxEA354UUQKBS5HwHQcEBw9stMfsaForiBAUOocUKdGqlGP9cOXFoxdC9M+ff5FNstgbjPYSowb/JbATMlmCWKgH/bXXcTGCO10sk="
distributions: sdist
distributions: "sdist bdist_wheel"
skip_existing: true
cleanup: false
on:

View File

@@ -1,6 +1,6 @@
"""Open Source Architecture Code Analyzer"""
name = 'osaca'
__version__ = '0.3.10'
__version__ = '0.3.12'
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___

4
osaca/__main__.py Normal file
View File

@@ -0,0 +1,4 @@
#!/usr/bin/env python3
from .osaca import main
main()

9
osaca/data/_build_cache.py Normal file → Executable file
View File

@@ -4,7 +4,14 @@ import os.path
import sys
sys.path[0:0] = ['../..']
from osaca.semantics.hw_model import MachineModel
failed = False
try:
from osaca.semantics.hw_model import MachineModel
except ModuleNotFoundError:
print("Unable to import MachineModel, probably some dependency is not yet installed. SKIPPING. "
"First run of OSACA may take a while to build caches, subsequent runs will be as fast as "
"ever.")
sys.exit()
print('Building cache: ', end='')
sys.stdout.flush()

View File

@@ -57,6 +57,28 @@ port_model_scheme: |
| CALC |
+--------+
instruction_forms:
- name: add
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: add
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: add
operands:
- class: register
@@ -101,6 +123,56 @@ instruction_forms:
throughput: 0.5
latency: 1.0 # 1*p34
port_pressure: [[1, '34']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: b
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '7']]
- name: b.ne
operands:
- class: identifier
@@ -119,6 +191,24 @@ instruction_forms:
throughput: 1.0
latency: 0.0
port_pressure: [[1, '7']]
- name: b.lt
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '7']]
- name: b.eq
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '7']]
- name: b.hs
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '7']]
- name: b.gt
operands:
- class: identifier
@@ -175,6 +265,19 @@ instruction_forms:
throughput: 1.0
latency: 6.0 # 1*p0
port_pressure: [[1, '0']]
- name: dup
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 1.0
latency: 6.0 # 1*p0
port_pressure: [[1, '0']]
- name: fadd
operands:
- class: register
@@ -755,6 +858,15 @@ instruction_forms:
throughput: 0.5
latency: 5.0 # 2*p56+2*p5D6D
port_pressure: [[1, '56'], [1, ['5D', '6D']]]
- name: ldr
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
@@ -782,6 +894,55 @@ instruction_forms:
throughput: 0.0
latency: 0.0
port_pressure: []
- name: lsl
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: lsl
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: mov
operands:
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: mov
operands:
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: mov
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: mov
operands:
- class: register
@@ -815,6 +976,33 @@ instruction_forms:
throughput: 0.5
latency: 4.0 # 1*p02
port_pressure: [[1, '02']]
- name: mul
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 1.0
latency: 5.0 # 1*p1
port_pressure: [[1, '3']]
- name: mul
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 1.0
latency: 5.0 # 1*p3
port_pressure: [[1, '3']]
- name: ret
operands: []
throughput: 0.5
latency: ~ # 1*p56
port_pressure: [[1, '56']]
- name: stp
operands:
- class: register
@@ -831,6 +1019,70 @@ instruction_forms:
throughput: 2.0
latency: 0 # 2*p56+2*p0
port_pressure: [[2, '56'], [2, '0']]
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 2.0
latency: 0 # 2*p56+2*p0+1*0234
port_pressure: [[2, '56'], [2, '0'], [1, '0234']]
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: 0 # 2*p56+2*p0
port_pressure: [[2, '56'], [2, '0']]
- name: stp
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 2.0
latency: 0 # 2*p56+2*p0+1*0234
port_pressure: [[2, '56'], [2, '0'], [1, '0234']]
- name: stp
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: 0 # 2*p56+2*p0
port_pressure: [[2, '56'], [2, '0']]
- name: stp
operands:
- class: register
@@ -891,6 +1143,20 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p56+1*p0
port_pressure: [[1, '56'], [1, '0']]
- name: str
operands:
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 1*p56+1*p0
port_pressure: [[1, '56'], [1, '0']]
- name: str
operands:
- class: register
@@ -992,6 +1258,17 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p5+1*p6+1*p0
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
- name: sub
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.5
latency: 1.0 # 1*p34
port_pressure: [[1, '34']]
- name: sub
operands:
- class: register
@@ -1025,6 +1302,28 @@ instruction_forms:
throughput: 0.25
latency: 1.0 # 1*p0234
port_pressure: [[1, '0234']]
- name: sub
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.5
latency: 1.0 # 1*p34
port_pressure: [[1, '34']]
- name: subs
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.5
latency: 1.0 # 1*p34
port_pressure: [[1, '34']]
- name: [whilele, whilelo, whilels, whilelt]
operands:
- class: register

View File

@@ -80,24 +80,114 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: and
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mul
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 1.0
latency: 4.0 # 1*p1
port_pressure: [[1, '1']]
- name: mul
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 1.0
latency: 4.0 # 1*p1
port_pressure: [[1, '1']]
- name: b.ne
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: b.lt
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.hs
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.eq
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '2']]
- name: b.gt
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: bne
operands:
- class: identifier
throughput: 0.0
throughput: 1.0
latency: 0.0
port_pressure: []
port_pressure: [[1, '2']]
- name: cmp
operands:
- class: register
@@ -107,6 +197,15 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: cmp
operands:
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: cmp
operands:
- class: register
@@ -126,6 +225,17 @@ instruction_forms:
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: dup
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: fadd
operands:
- class: register
@@ -323,6 +433,28 @@ instruction_forms:
throughput: 0.5
latency: 6.0 # 1*p01
port_pressure: [[1, '01']]
- name: lsl
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: lsl
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: ldp
operands:
- class: register
@@ -521,6 +653,15 @@ instruction_forms:
throughput: 0.5
latency: 4.0 # 1*p34
port_pressure: [[1.0, '34']]
- name: ldr
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
@@ -548,15 +689,42 @@ instruction_forms:
throughput: 0.0
latency: 0.0
port_pressure: []
- name: mov
operands:
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.5
latency: 1.0 # 1*p01
port_pressure: [[1, '01']]
throughput: 0.333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: mov
operands:
- class: register
@@ -584,6 +752,43 @@ instruction_forms:
throughput: ~
latency: ~
port_pressure: []
- name: ret
operands: []
throughput: 0.5
latency: ~ # 1*p34
port_pressure: [[1, '34']]
- name: stp
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p34+1*p5
port_pressure: [[2, '34'], [1, '5']]
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p34+1*p5
port_pressure: [[2, '34'], [1, '5']]
- name: stp
operands:
- class: register
@@ -660,6 +865,20 @@ instruction_forms:
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
- class: register
prefix: w
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
- class: register
@@ -744,6 +963,39 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5'], [1, '012']]
- name: subs
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: subs
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
@@ -755,3 +1007,25 @@ instruction_forms:
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: register
prefix: w
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]
- name: sub
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]

View File

@@ -144,7 +144,7 @@ class Frontend(object):
)
return s
def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, verbose=False):
def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, arch_warning=False, length_warning=False, verbose=False):
"""
Build the full analysis report including header, the symbol map, the combined TP/CP/LCD
view and the list based LCD view.
@@ -156,11 +156,16 @@ class Frontend(object):
:param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
`False`
:type ignore_unknown: boolean, optional
:param print_arch_warning: flag for additional user warning to specify micro-arch
:type print_arch_warning: boolean, optional
:param print_length_warning: flag for additional user warning to specify kernel length with --lines
:type print_length_warning: boolean, optional
:param verbose: flag for verbosity level, defaults to False
:type verbose: boolean, optional
"""
return (
self._header_report()
+ self._user_warnings(arch_warning, length_warning)
+ self._symbol_map()
+ self.combined_view(
kernel,
@@ -285,6 +290,27 @@ class Frontend(object):
).format(amount, '-' * len(str(amount)))
return s
def _user_warnings(self, arch_warning, length_warning):
"""Returns warning texts for giving the user more insight in what he is doing."""
arch_text = (
'WARNING: No micro-architecture was specified and a default uarch was used.\n'
' Specify the uarch with --arch. See --help for more information.\n'
)
length_text = (
'WARNING: You are analyzing a large amount of instruction forms. Analysis '
'across loops/block boundaries often do not make much sense.\n'
' Specify the kernel length with --length. See --help for more '
'information.\n'
' If this is intentional, you can safely ignore this message.\n'
)
warnings = ''
warnings += arch_text if arch_warning else ''
warnings += length_text if length_warning else ''
warnings += '\n'
return warnings
def _get_separator_list(self, separator, separator_2=' '):
"""Creates column view for seperators in the TP/combined view."""
separator_list = []

View File

@@ -94,6 +94,13 @@ def create_parser(parser=None):
help='Run the throughput analysis with fixed probabilities for all suitable ports per '
'instruction. Otherwise, OSACA will print the optimal port utilization for the kernel.',
)
parser.add_argument(
'--lines',
type=str,
help='Define lines that should be included in the analysis. This option overwrites any'
' range defined by markers in the assembly. Add either single lines or ranges defined by'
' "-" or ":", each entry separated by commas, e.g.: --lines 1,2,8-18,20:24',
)
parser.add_argument(
'--db-check',
dest='check_db',
@@ -252,6 +259,7 @@ def inspect(args, output_file=sys.stdout):
# Detect ISA if necessary
arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
print_arch_warning = False if args.arch else True
isa = MachineModel.get_isa_for_arch(arch)
verbose = args.verbose
ignore_unknown = args.ignore_unknown
@@ -272,8 +280,15 @@ def inspect(args, output_file=sys.stdout):
traceback.print_exc(file=sys.stderr)
sys.exit(1)
# Reduce to marked kernel and add semantics
kernel = reduce_to_section(parsed_code, isa)
# Reduce to marked kernel or chosen section and add semantics
if args.lines:
line_range = get_line_range(args.lines)
kernel = [line for line in parsed_code if line['line_number'] in line_range]
print_length_warning = False
else:
kernel = reduce_to_section(parsed_code, isa)
# Print warning if kernel has no markers and is larger than threshold (100)
print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
machine_model = MachineModel(arch=arch)
semantics = ArchSemantics(machine_model)
semantics.add_semantics(kernel)
@@ -289,7 +304,12 @@ def inspect(args, output_file=sys.stdout):
frontend = Frontend(args.file.name, arch=arch)
print(
frontend.full_analysis(
kernel, kernel_graph, ignore_unknown=ignore_unknown, verbose=verbose
kernel,
kernel_graph,
ignore_unknown=ignore_unknown,
arch_warning=print_arch_warning,
length_warning=print_length_warning,
verbose=verbose
),
file=output_file,
)
@@ -346,6 +366,19 @@ def get_unmatched_instruction_ratio(kernel):
unmatched_counter += 1
return unmatched_counter / len(kernel)
def get_line_range(line_str):
line_str = line_str.replace(':', '-')
lines = line_str.split(',')
lines_int = []
for l in lines:
if '-' in l:
start = int(l.split('-')[0])
end = int(l.split('-')[1])
rnge = list(range(start, end+1))
lines_int += rnge
else:
lines_int.append(int(l))
return lines_int
def main():
"""Initialize and run command line interface."""

View File

@@ -15,9 +15,12 @@ class BaseParser(object):
SEGMENT_EXT_ID = 'segment_extension'
INSTRUCTION_ID = 'instruction'
OPERANDS_ID = 'operands'
_parser_constructed = False
def __init__(self):
self.construct_parser()
if not self._parser_constructed:
self.construct_parser()
self._parser_constructed = True
@staticmethod
def detect_ISA(file_content):

View File

@@ -7,6 +7,14 @@ from osaca.parser import AttrDict, BaseParser
class ParserAArch64(BaseParser):
_instance = None
# Singelton pattern, as this is created very many times
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserAArch64, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
self.isa = 'aarch64'
@@ -47,7 +55,7 @@ class ParserAArch64(BaseParser):
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
+ commaSeparatedList.setResultsName('parameters')
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName('parameters')
+ pp.Optional(self.comment)
).setResultsName(self.DIRECTIVE_ID)
# LLVM-MCA markers

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import string
import re
import pyparsing as pp
@@ -8,6 +9,14 @@ from osaca.parser import AttrDict, BaseParser
class ParserX86ATT(BaseParser):
_instance = None
# Singelton pattern, as this is created very many times
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserX86ATT, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
self.isa = 'x86'
@@ -354,45 +363,44 @@ class ParserX86ATT(BaseParser):
def is_reg_dependend_of(self, reg_a, reg_b):
"""Check if ``reg_a`` is dependent on ``reg_b``"""
# Normalize name
reg_a_name = reg_a['name'].upper()
reg_b_name = reg_b['name'].upper()
# Check if they are the same registers
if reg_a.name == reg_b.name:
if reg_a_name == reg_b_name:
return True
# Check vector registers first
if self.is_vector_register(reg_a):
if self.is_vector_register(reg_b):
if reg_a.name[1:] == reg_b.name[1:]:
if reg_a_name[1:] == reg_b_name[1:]:
# Registers in the same vector space
return True
return False
# Check basic GPRs
a_dep = ['RAX', 'EAX', 'AX', 'AH', 'AL']
b_dep = ['RBX', 'EBX', 'BX', 'BH', 'BL']
c_dep = ['RCX', 'ECX', 'CX', 'CH', 'CL']
d_dep = ['RDX', 'EDX', 'DX', 'DH', 'DL']
sp_dep = ['RSP', 'ESP', 'SP', 'SPL']
src_dep = ['RSI', 'ESI', 'SI', 'SIL']
dst_dep = ['RDI', 'EDI', 'DI', 'DIL']
basic_gprs = [a_dep, b_dep, c_dep, d_dep, sp_dep, src_dep, dst_dep]
gpr_groups = {
'A': ['RAX', 'EAX', 'AX', 'AH', 'AL'],
'B': ['RBX', 'EBX', 'BX', 'BH', 'BL'],
'C': ['RCX', 'ECX', 'CX', 'CH', 'CL'],
'D': ['RDX', 'EDX', 'DX', 'DH', 'DL'],
'SP': ['RSP', 'ESP', 'SP', 'SPL'],
'SRC': ['RSI', 'ESI', 'SI', 'SIL'],
'DST': ['RDI', 'EDI', 'DI', 'DIL']
}
if self.is_basic_gpr(reg_a):
if self.is_basic_gpr(reg_b):
for dep_group in basic_gprs:
if reg_a['name'].upper() in dep_group:
if reg_b['name'].upper() in dep_group:
for dep_group in gpr_groups.values():
if reg_a_name in dep_group:
if reg_b_name in dep_group:
return True
return False
# Check other GPRs
gpr_parser = (
pp.CaselessLiteral('R')
+ pp.Word(pp.nums).setResultsName('id')
+ pp.Optional(pp.Word('dwbDWB', exact=1))
)
try:
id_a = gpr_parser.parseString(reg_a['name'], parseAll=True).asDict()['id']
id_b = gpr_parser.parseString(reg_b['name'], parseAll=True).asDict()['id']
if id_a == id_b:
return True
except pp.ParseException:
return False
ma = re.match(r'R([0-9]+)[DWB]?', reg_a_name)
mb = re.match(r'R([0-9]+)[DWB]?', reg_b_name)
if ma and mb and ma.group(1) == mb.group(1):
return True
# No dependencies
return False
@@ -406,19 +414,11 @@ class ParserX86ATT(BaseParser):
"""Check if register is a general purpose register"""
if register is None:
return False
gpr_parser = (
pp.CaselessLiteral('R')
+ pp.Word(pp.nums).setResultsName('id')
+ pp.Optional(pp.Word('dwbDWB', exact=1))
)
if self.is_basic_gpr(register):
return True
else:
try:
gpr_parser.parseString(register['name'], parseAll=True)
return True
except pp.ParseException:
return False
return re.match(r'R([0-9]+)[DWB]?', register['name'], re.IGNORECASE)
def is_vector_register(self, register):
"""Check if register is a vector register"""

View File

@@ -398,9 +398,7 @@ class ArchSemantics(ISASemantics):
def g(obj, value):
obj[item] = value
else:
def g(obj, *values):
for item, value in zip(items, values):
obj[item] = value
@@ -416,9 +414,11 @@ class ArchSemantics(ISASemantics):
@staticmethod
def get_throughput_sum(kernel):
"""Get the overall throughput sum separated by port of all instructions of a kernel."""
tp_sum = reduce(
(lambda x, y: [sum(z) for z in zip(x, y)]),
[instr['port_pressure'] for instr in kernel],
)
tp_sum = [round(x, 2) for x in tp_sum]
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up
# typically comment, label and non-instruction lines
port_pressures = [instr['port_pressure'] for instr in kernel if instr['throughput'] != 0.0]
# Essentially summing up each columns of port_pressures, where each column is one port
# and each row is one line of the kernel
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
tp_sum = [round(sum(col), 2) for col in zip(*port_pressures)]
return tp_sum

View File

@@ -8,6 +8,7 @@ from copy import deepcopy
from itertools import product
import hashlib
from pathlib import Path
from collections import defaultdict
import ruamel.yaml
from ruamel.yaml.compat import StringIO
@@ -18,6 +19,7 @@ from osaca.parser import ParserX86ATT
class MachineModel(object):
WILDCARD = '*'
INTERNAL_VERSION = 1 # increase whenever self._data format changes to invalidate cache!
def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
if not arch and not path_to_yaml:
@@ -40,7 +42,7 @@ class MachineModel(object):
'load_throughput_default': [],
'ports': [],
'port_model_scheme': None,
'instruction_forms': [],
'instruction_forms': []
}
else:
if arch and path_to_yaml:
@@ -60,8 +62,6 @@ class MachineModel(object):
with open(self._path, 'r') as f:
if not lazy:
self._data = yaml.load(f)
# cache file for next call
self._write_in_cache(self._path, self._data)
else:
file_content = ''
line = f.readline()
@@ -70,21 +70,26 @@ class MachineModel(object):
line = f.readline()
self._data = yaml.load(file_content)
self._data['instruction_forms'] = []
# separate multi-alias instruction forms
for entry in [
x for x in self._data['instruction_forms'] if isinstance(x['name'], list)
]:
for name in entry['name']:
new_entry = {'name': name}
for k in [x for x in entry.keys() if x != 'name']:
new_entry[k] = entry[k]
self._data['instruction_forms'].append(new_entry)
# remove old entry
self._data['instruction_forms'].remove(entry)
# For use with dict instead of list as DB
# self._data['instruction_dict'] = (
# self._convert_to_dict(self._data['instruction_forms'])
# )
# separate multi-alias instruction forms
for entry in [x for x in self._data['instruction_forms']
if isinstance(x['name'], list)]:
for name in entry['name']:
new_entry = {'name': name}
for k in [x for x in entry.keys() if x != 'name']:
new_entry[k] = entry[k]
self._data['instruction_forms'].append(new_entry)
# remove old entry
self._data['instruction_forms'].remove(entry)
# Normalize instruction_form names (to UPPERCASE) and build dict for faster access:
self._data['instruction_forms_dict'] = defaultdict(list)
for iform in self._data['instruction_forms']:
iform['name'] = iform['name'].upper()
self._data['instruction_forms_dict'][iform['name']].append(iform)
self._data['internal_version'] = self.INTERNAL_VERSION
if not lazy:
# cache internal representation for future use
self._write_in_cache(self._path)
def __getitem__(self, key):
"""Return configuration entry."""
@@ -99,36 +104,21 @@ class MachineModel(object):
def get_instruction(self, name, operands):
"""Find and return instruction data from name and operands."""
# For use with dict instead of list as DB
# return self.get_instruction_from_dict(name, operands)
if name is None:
return None
name_matched_iforms = self._data['instruction_forms_dict'].get(name.upper(), [])
try:
return next(
instruction_form
for instruction_form in self._data['instruction_forms']
if instruction_form['name'].upper() == name.upper()
and self._match_operands(
for instruction_form in name_matched_iforms if self._match_operands(
instruction_form['operands'] if 'operands' in instruction_form else [],
operands,
)
)
operands))
except StopIteration:
return None
except TypeError as e:
print('\nname: {}\noperands: {}'.format(name, operands))
raise TypeError from e
def get_instruction_from_dict(self, name, operands):
"""Find and return instruction data from name and operands stored in dictionary."""
if name is None:
return None
try:
# Check if key is in dict
instruction_form = self._data['instruction_dict'][self._get_key(name, operands)]
return instruction_form
except KeyError:
return None
def average_port_pressure(self, port_pressure):
"""Construct average port pressure list from instruction data."""
port_list = self._data['ports']
@@ -235,7 +225,7 @@ class MachineModel(object):
for y in list(filter(lambda x: True if x != 'class' else False, op))
]
operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
return '{} {}'.format(instruction_form['name'], ','.join(operands))
return '{} {}'.format(instruction_form['name'].lower(), ','.join(operands))
@staticmethod
def get_isa_for_arch(arch):
@@ -295,7 +285,8 @@ class MachineModel(object):
{
k: v
for k, v in self._data.items()
if k not in ['instruction_forms', 'load_throughput']
if k not in ['instruction_forms', 'instruction_forms_dict', 'load_throughput',
'internal_version']
},
stream,
)
@@ -316,51 +307,53 @@ class MachineModel(object):
:returns: cached DB if existing, `False` otherwise
"""
p = Path(filepath)
# 1. companion cachefile: same location, with '.' prefix and '.pickle' suffix
companion_cachefile = p.with_name('.' + p.name).with_suffix('.pickle')
if companion_cachefile.exists():
if companion_cachefile.stat().st_mtime > p.stat().st_mtime:
# companion file up-to-date
with companion_cachefile.open('rb') as f:
return pickle.load(f)
# 2. home cachefile: ~/.osaca/cache/<sha512hash>.pickle
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
home_cachefile = (Path(utils.CACHE_DIR) / hexhash).with_suffix('.pickle')
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name('.' + p.stem + '_' + hexhash).with_suffix('.pickle')
if companion_cachefile.exists():
# companion file (must be up-to-date, due to equal hash)
with companion_cachefile.open('rb') as f:
data = pickle.load(f)
if data.get('internal_version') == self.INTERNAL_VERSION:
return data
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + '_' + hexhash)).with_suffix('.pickle')
if home_cachefile.exists():
# home file (must be up-to-date, due to equal hash)
with home_cachefile.open('rb') as f:
return pickle.load(f)
data = pickle.load(f)
if data.get('internal_version') == self.INTERNAL_VERSION:
return data
return False
def _write_in_cache(self, filepath, data):
def _write_in_cache(self, filepath):
"""
Write machine model to cache
:param filepath: path to store DB
:type filepath: str
:param data: :class:`MachineModel` to store
:type data: :class:`dict`
"""
p = Path(filepath)
# 1. companion cachefile: same location, with '.' prefix and '.pickle' suffix
companion_cachefile = p.with_name('.' + p.name).with_suffix('.pickle')
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name('.' + p.stem + '_' + hexhash).with_suffix('.pickle')
if os.access(str(companion_cachefile.parent), os.W_OK):
with companion_cachefile.open('wb') as f:
pickle.dump(data, f)
pickle.dump(self._data, f)
return
# 2. home cachefile: ~/.osaca/cache/<sha512hash>.pickle
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
cache_dir = Path(utils.CACHE_DIR)
try:
os.makedirs(cache_dir, exist_ok=True)
except OSError:
return
home_cachefile = (cache_dir / hexhash).with_suffix('.pickle')
home_cachefile = (cache_dir / (p.stem + '_' + hexhash)).with_suffix('.pickle')
if os.access(str(home_cachefile.parent), os.W_OK):
with home_cachefile.open('wb') as f:
pickle.dump(data, f)
pickle.dump(self._data, f)
def _get_key(self, name, operands):
"""Get unique instruction form key for dict DB."""
@@ -370,18 +363,6 @@ class MachineModel(object):
key_string += '_'.join([self._get_operand_hash(op) for op in operands])
return key_string
def _convert_to_dict(self, instruction_forms):
"""Convert list DB to dict DB"""
instruction_dict = {}
for instruction_form in instruction_forms:
instruction_dict[
self._get_key(
instruction_form['name'],
instruction_form['operands'] if 'operands' in instruction_form else None,
)
] = instruction_form
return instruction_dict
def _get_operand_hash(self, operand):
"""Get unique key for operand for dict DB"""
operand_string = ''
@@ -600,7 +581,11 @@ class MachineModel(object):
def _is_x86_reg_type(self, i_reg, reg, consider_masking=False):
"""Check if register type match."""
i_reg_name = i_reg if not consider_masking else i_reg['name']
i_reg_name = i_reg['name'] if i_reg and 'name' in i_reg else i_reg
if reg is None:
if i_reg is None:
return True
return False
# check for wildcards
if i_reg_name == self.WILDCARD or reg['name'] == self.WILDCARD:
return True

View File

@@ -165,8 +165,52 @@ class TestCLI(unittest.TestCase):
kernel_aarch64 = 'kernel_aarch64.s'
args = parser.parse_args([self._find_test_file(kernel_aarch64)])
osaca.run(args, output_file=output)
def test_user_warnings(self):
parser = osaca.create_parser()
kernel = 'triad_x86_unmarked.s'
args = parser.parse_args(
['--arch', 'csx', '--ignore-unknown', self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for length
self.assertTrue(output.getvalue().count('WARNING') == 1)
args = parser.parse_args(
['--lines', '100-199', '--ignore-unknown', self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for arch
self.assertTrue(output.getvalue().count('WARNING') == 1)
def test_lines_arg(self):
# Run tests with --lines option
parser = osaca.create_parser()
kernel_x86 = 'triad_x86_iaca.s'
args_base = parser.parse_args(
['--arch', 'csx', self._find_test_file(kernel_x86)]
)
output_base = StringIO()
osaca.run(args_base, output_file=output_base)
output_base = output_base.getvalue().split('\n')[8:]
args = []
args.append(parser.parse_args(
['--lines', '146-154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
args.append(parser.parse_args(
['--lines', '146:154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
args.append(parser.parse_args(
['--lines', '146,147:148,149-154', '--arch', 'csx', self._find_test_file(kernel_x86)]
))
for a in args:
with self.subTest(params=a):
output = StringIO()
osaca.run(a, output_file=output)
self.assertEqual(output.getvalue().split('\n')[8:], output_base)
##################
# Helper functions
##################

View File

@@ -0,0 +1,345 @@
.file "triad.c"
.section .rodata.str1.8,"aMS",@progbits,1
.align 8
.LC9:
.string "%12.1f | %9.8f | %9.3f | %7.1f | %7.1f | %7d | %4d \n"
.text
.p2align 4,,15
.globl triad
.type triad, @function
triad:
.LFB24:
.cfi_startproc
pushq %r13
.cfi_def_cfa_offset 16
.cfi_offset 13, -16
movslq %edi, %rax
movl $64, %edi
leaq 16(%rsp), %r13
.cfi_def_cfa 13, 0
andq $-32, %rsp
pushq -8(%r13)
pushq %rbp
.cfi_escape 0x10,0x6,0x2,0x76,0
movq %rsp, %rbp
pushq %r15
.cfi_escape 0x10,0xf,0x2,0x76,0x78
leaq 0(,%rax,8), %r15
pushq %r14
movq %r15, %rsi
pushq %r13
.cfi_escape 0xf,0x3,0x76,0x68,0x6
.cfi_escape 0x10,0xe,0x2,0x76,0x70
pushq %r12
pushq %rbx
.cfi_escape 0x10,0xc,0x2,0x76,0x60
.cfi_escape 0x10,0x3,0x2,0x76,0x58
movq %rax, %rbx
subq $72, %rsp
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r14
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r12
call aligned_alloc
movq %r15, %rsi
movl $64, %edi
movq %rax, %r13
call aligned_alloc
movq %rax, %r15
leal -1(%rbx), %eax
movl %eax, -96(%rbp)
testl %ebx, %ebx
jle .L2
cmpl $2, %eax
jbe .L14
movl %ebx, %esi
vmovapd .LC0(%rip), %ymm0
xorl %eax, %eax
xorl %ecx, %ecx
shrl $2, %esi
.p2align 4,,10
.p2align 3
.L4:
addl $1, %ecx
vmovapd %ymm0, (%r15,%rax)
vmovapd %ymm0, 0(%r13,%rax)
vmovapd %ymm0, (%r12,%rax)
vmovapd %ymm0, (%r14,%rax)
addq $32, %rax
cmpl %ecx, %esi
ja .L4
movl %ebx, %eax
andl $-4, %eax
cmpl %eax, %ebx
je .L26
vzeroupper
.L3:
vmovsd .LC1(%rip), %xmm0
movslq %eax, %rcx
vmovsd %xmm0, (%r15,%rcx,8)
vmovsd %xmm0, 0(%r13,%rcx,8)
vmovsd %xmm0, (%r12,%rcx,8)
vmovsd %xmm0, (%r14,%rcx,8)
leal 1(%rax), %ecx
cmpl %ecx, %ebx
jle .L2
movslq %ecx, %rcx
addl $2, %eax
vmovsd %xmm0, (%r15,%rcx,8)
vmovsd %xmm0, 0(%r13,%rcx,8)
vmovsd %xmm0, (%r12,%rcx,8)
vmovsd %xmm0, (%r14,%rcx,8)
cmpl %eax, %ebx
jle .L2
cltq
vmovsd %xmm0, (%r15,%rax,8)
vmovsd %xmm0, 0(%r13,%rax,8)
vmovsd %xmm0, (%r12,%rax,8)
vmovsd %xmm0, (%r14,%rax,8)
.L2:
movl %ebx, %eax
movl $1, -84(%rbp)
movl %ebx, %r10d
andl $-4, %eax
shrl $2, %r10d
movl %eax, -100(%rbp)
.p2align 4,,10
.p2align 3
.L13:
leaq -56(%rbp), %rsi
leaq -72(%rbp), %rdi
movl %r10d, -88(%rbp)
call timing
movl -88(%rbp), %r10d
xorl %r11d, %r11d
.p2align 4,,10
.p2align 3
.L12:
vmovsd (%r14), %xmm0
vxorpd %xmm7, %xmm7, %xmm7
vucomisd %xmm7, %xmm0
jbe .L6
movq %r14, %rdi
movl %r11d, -92(%rbp)
movl %r10d, -88(%rbp)
vzeroupper
call dummy
movl -92(%rbp), %r11d
movl -88(%rbp), %r10d
.L6:
testl %ebx, %ebx
jle .L8
cmpl $2, -96(%rbp)
jbe .L15
xorl %eax, %eax
xorl %ecx, %ecx
.p2align 4,,10
.p2align 3
.L10:
vmovapd (%r15,%rax), %ymm0
vmovapd (%r12,%rax), %ymm3
addl $1, %ecx
vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0
vmovapd %ymm0, (%r14,%rax)
addq $32, %rax
cmpl %ecx, %r10d
ja .L10
movl -100(%rbp), %eax
cmpl %ebx, %eax
je .L8
.L9:
movslq %eax, %rcx
vmovsd 0(%r13,%rcx,8), %xmm0
vmovsd (%r12,%rcx,8), %xmm5
vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0
vmovsd %xmm0, (%r14,%rcx,8)
leal 1(%rax), %ecx
cmpl %ebx, %ecx
jge .L8
movslq %ecx, %rcx
addl $2, %eax
vmovsd 0(%r13,%rcx,8), %xmm0
vmovsd (%r12,%rcx,8), %xmm6
vfmadd132sd (%r15,%rcx,8), %xmm6, %xmm0
vmovsd %xmm0, (%r14,%rcx,8)
cmpl %eax, %ebx
jle .L8
cltq
vmovsd (%r15,%rax,8), %xmm0
vmovsd (%r12,%rax,8), %xmm4
vfmadd132sd 0(%r13,%rax,8), %xmm4, %xmm0
vmovsd %xmm0, (%r14,%rax,8)
.L8:
addl $1, %r11d
cmpl -84(%rbp), %r11d
jne .L12
leaq -56(%rbp), %rsi
leaq -64(%rbp), %rdi
movl %r11d, -84(%rbp)
movl %r10d, -88(%rbp)
vzeroupper
call timing
vmovsd -64(%rbp), %xmm1
vsubsd -72(%rbp), %xmm1, %xmm1
vmovsd .LC3(%rip), %xmm2
movl -84(%rbp), %r11d
movl -88(%rbp), %r10d
vucomisd %xmm1, %xmm2
leal (%r11,%r11), %eax
movl %eax, -84(%rbp)
ja .L13
movl %eax, %esi
vxorpd %xmm6, %xmm6, %xmm6
vxorpd %xmm0, %xmm0, %xmm0
movl %ebx, %edx
sarl %esi
vcvtsi2sd %ebx, %xmm0, %xmm0
movl $.LC9, %edi
movl $5, %eax
vcvtsi2sd %esi, %xmm6, %xmm6
vmulsd .LC5(%rip), %xmm6, %xmm2
vmovsd .LC4(%rip), %xmm5
vmovsd .LC6(%rip), %xmm7
vmulsd %xmm0, %xmm6, %xmm4
vmulsd %xmm0, %xmm2, %xmm2
vdivsd %xmm1, %xmm4, %xmm4
vdivsd %xmm1, %xmm2, %xmm2
vdivsd %xmm5, %xmm4, %xmm4
vmulsd %xmm7, %xmm2, %xmm3
vaddsd %xmm0, %xmm0, %xmm2
vmulsd .LC8(%rip), %xmm0, %xmm0
vmulsd %xmm6, %xmm2, %xmm2
vmulsd .LC7(%rip), %xmm2, %xmm2
vmulsd %xmm7, %xmm3, %xmm3
vdivsd %xmm5, %xmm0, %xmm0
vdivsd %xmm5, %xmm4, %xmm4
vdivsd %xmm1, %xmm2, %xmm2
call printf
movq %r14, %rdi
call free
movq %r12, %rdi
call free
movq %r13, %rdi
call free
addq $72, %rsp
movq %r15, %rdi
popq %rbx
popq %r12
popq %r13
.cfi_remember_state
.cfi_def_cfa 13, 0
popq %r14
popq %r15
popq %rbp
leaq -16(%r13), %rsp
.cfi_def_cfa 7, 16
popq %r13
.cfi_def_cfa_offset 8
jmp free
.p2align 4,,10
.p2align 3
.L15:
.cfi_restore_state
xorl %eax, %eax
jmp .L9
.L26:
vzeroupper
jmp .L2
.L14:
xorl %eax, %eax
jmp .L3
.cfi_endproc
.LFE24:
.size triad, .-triad
.section .rodata.str1.8
.align 8
.LC10:
.string "TRIAD a[i] = b[i]+c[i]*d[i], 32 byte/it, 2 Flop/it"
.align 8
.LC11:
.string "Size (KByte) | runtime | MFlop/s | MB/s | MLUP/s | repeat | size"
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB25:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movl $.LC10, %edi
movl $20, %ebx
call puts
movl $.LC11, %edi
call puts
.p2align 4,,10
.p2align 3
.L28:
vxorpd %xmm1, %xmm1, %xmm1
movq .LC12(%rip), %rax
vcvtsi2sd %ebx, %xmm1, %xmm1
addl $1, %ebx
vmovq %rax, %xmm0
call pow
vcvttsd2si %xmm0, %edi
call triad
cmpl $36, %ebx
jne .L28
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE25:
.size main, .-main
.section .rodata.cst32,"aM",@progbits,32
.align 32
.LC0:
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.long 1907715710
.long 1048610426
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC1:
.long 1907715710
.long 1048610426
.align 8
.LC3:
.long 2576980378
.long 1070176665
.align 8
.LC4:
.long 0
.long 1083129856
.align 8
.LC5:
.long 0
.long 1077936128
.align 8
.LC6:
.long 0
.long 1062207488
.align 8
.LC7:
.long 2696277389
.long 1051772663
.align 8
.LC8:
.long 0
.long 1075838976
.align 8
.LC12:
.long 3435973837
.long 1073007820
.ident "GCC: (GNU) 7.2.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -63,7 +63,7 @@ class TestKerncraftAPI(unittest.TestCase):
('0DV', 0.0),
('1', 34.0),
('1DV', 0.0),
('2', 2.0),
('2', 3.0),
('3', 64.0),
('4', 64.0),
('5', 32.0),

View File

@@ -83,28 +83,21 @@ class TestSemanticTools(unittest.TestCase):
self.assertIsNone(test_mm_x86.get_instruction(None, []))
self.assertIsNone(test_mm_arm.get_instruction(None, []))
# test dict DB creation
test_mm_x86._data['instruction_dict'] = test_mm_x86._convert_to_dict(
test_mm_x86._data['instruction_forms']
)
test_mm_arm._data['instruction_dict'] = test_mm_arm._convert_to_dict(
test_mm_arm._data['instruction_forms']
)
# test get_instruction from dict DB
self.assertIsNone(test_mm_x86.get_instruction_from_dict(None, []))
self.assertIsNone(test_mm_arm.get_instruction_from_dict(None, []))
self.assertIsNone(test_mm_x86.get_instruction_from_dict('NOT_IN_DB', []))
self.assertIsNone(test_mm_arm.get_instruction_from_dict('NOT_IN_DB', []))
# test get_instruction from DB
self.assertIsNone(test_mm_x86.get_instruction(None, []))
self.assertIsNone(test_mm_arm.get_instruction(None, []))
self.assertIsNone(test_mm_x86.get_instruction('NOT_IN_DB', []))
self.assertIsNone(test_mm_arm.get_instruction('NOT_IN_DB', []))
name_x86_1 = 'vaddpd'
operands_x86_1 = [
{'class': 'register', 'name': 'xmm'},
{'class': 'register', 'name': 'xmm'},
{'class': 'register', 'name': 'xmm'},
]
instr_form_x86_1 = test_mm_x86.get_instruction_from_dict(name_x86_1, operands_x86_1)
instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1, operands_x86_1)
self.assertEqual(instr_form_x86_1, test_mm_x86.get_instruction(name_x86_1, operands_x86_1))
self.assertEqual(
test_mm_x86.get_instruction_from_dict('jg', [{'class': 'identifier'}]),
test_mm_x86.get_instruction('jg', [{'class': 'identifier'}]),
test_mm_x86.get_instruction('jg', [{'class': 'identifier'}]),
)
name_arm_1 = 'fadd'
@@ -113,10 +106,10 @@ class TestSemanticTools(unittest.TestCase):
{'class': 'register', 'prefix': 'v', 'shape': 's'},
{'class': 'register', 'prefix': 'v', 'shape': 's'},
]
instr_form_arm_1 = test_mm_arm.get_instruction_from_dict(name_arm_1, operands_arm_1)
instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1, operands_arm_1)
self.assertEqual(instr_form_arm_1, test_mm_arm.get_instruction(name_arm_1, operands_arm_1))
self.assertEqual(
test_mm_arm.get_instruction_from_dict('b.ne', [{'class': 'identifier'}]),
test_mm_arm.get_instruction('b.ne', [{'class': 'identifier'}]),
test_mm_arm.get_instruction('b.ne', [{'class': 'identifier'}]),
)