mirror of
https://github.com/RRZE-HPC/asmbench.git
synced 2026-01-08 13:30:06 +01:00
Merge branch 'master' of github.com:RRZE-HPC/asmbench
This commit is contained in:
6
.idea/other.xml
generated
6
.idea/other.xml
generated
@@ -1,6 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="PySciProjectComponent">
|
||||
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
|
||||
</component>
|
||||
</project>
|
||||
7
.idea/vcs.xml
generated
7
.idea/vcs.xml
generated
@@ -1,7 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/doc/asmbench-SC18SRC-poster" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
22
README.rst
22
README.rst
@@ -8,10 +8,28 @@ Usage
|
||||
|
||||
To benchmark latency and throughput of a 64bit integer add use the following command:
|
||||
|
||||
``python -m asmbench 'add {src:i64:r}, {srcdst:i64:r}'``
|
||||
``asmbench 'add {src:i64:r}, {srcdst:i64:r}'``
|
||||
|
||||
To benchmark two instructions interleaved use this:
|
||||
|
||||
``python -m asmbench 'add {src:i64:r}, {srcdst:i64:r}' 'sub {src:i64:r}, {srcdst:i64:r}'``
|
||||
``asmbench 'add {src:i64:r}, {srcdst:i64:r}' 'sub {src:i64:r}, {srcdst:i64:r}'``
|
||||
|
||||
To find out more add `-h` for help and `-v` for verbose mode.
|
||||
|
||||
Operand Templates
|
||||
=================
|
||||
Operands always follow this form: ``{direction:data_type:pass_type}``.
|
||||
|
||||
Direction may be ``src``, ``dst`` or ``srcdst``. This will allow asmbench to serialize the code (wherever possible). ``src`` operands are read, but not modiefied by the instruction. ``dst`` operands are modified to, but not read. ``srcdst`` operands will be read and modified by the instruction.
|
||||
|
||||
Data and Pass Types:
|
||||
|
||||
* ``i64:r`` -> 64bit general purpose register (gpr) (e.g., ``%rax``)
|
||||
* ``i32:r`` -> 32bit gpr (e.g., ``%ecx``)
|
||||
* ``<2 x double>:x`` -> 128bit SSE register with two double precision floating-point numbers (e.g., ``%xmm1``)
|
||||
* ``<4 x float>:x`` -> 128bit SSE register with four single precision floating-point numbers (e.g., ``%xmm1``)
|
||||
* ``<4 x double>:x`` -> 256bit AVX register with four double precision floating-point numbers (e.g., ``%ymm1``)
|
||||
* ``<8 x float>:x`` -> 256bit AVX register with eight single precision floating-point numbers (e.g., ``%ymm1``)
|
||||
* ``<8 x double>:x`` -> 512bit AVX512 register with eight double precision floating-point numbers (e.g., ``%zmm1``)
|
||||
* ``<16 x float>:x`` -> 512bit AVX512 register with sixteen single precision floating-point numbers (e.g., ``%zmm1``)
|
||||
* ``i8:23`` -> immediate 0 (i.e., ``$23``)
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
__version__ = '0.1.4'
|
||||
@@ -41,10 +41,9 @@ def main():
|
||||
verbosity=args.verbose,
|
||||
iaca_comparison=args.iaca,
|
||||
frequency=args.frequency)
|
||||
if lat:
|
||||
print("Latency: {:.2f} cycle\nThroughput: {:.2f} cycle\n".format(lat, tp))
|
||||
else:
|
||||
print("Throughput: {:.2f} cycle\n".format(tp))
|
||||
if lat is not None:
|
||||
print("Latency: {:.2f} cycle".format(lat))
|
||||
print("Throughput: {:.2f} cycle\n".format(tp))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -12,9 +12,9 @@ import sys
|
||||
import llvmlite.binding as llvm
|
||||
import psutil
|
||||
try:
|
||||
from kerncraft import iaca
|
||||
from kerncraft import incode_model
|
||||
except ImportError:
|
||||
iaca = None
|
||||
incode_model = None
|
||||
|
||||
from . import op
|
||||
|
||||
@@ -34,7 +34,7 @@ def uniquify(l):
|
||||
|
||||
class Benchmark:
|
||||
def __init__(self, frequency=None):
|
||||
self.frequency = frequency or psutil.cpu_freq().current * 1e6
|
||||
self.frequency = frequency or psutil.cpu_freq().max * 1e6
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(
|
||||
@@ -87,13 +87,13 @@ class Benchmark:
|
||||
|
||||
def get_iaca_analysis(self, arch):
|
||||
"""Compile and return IACA analysis."""
|
||||
if iaca is None:
|
||||
if incode_model is None:
|
||||
raise ValueError("kerncraft not installed. IACA analysis is not supported.")
|
||||
tm = self.get_target_machine()
|
||||
tmpf = tempfile.NamedTemporaryFile("wb")
|
||||
tmpf.write(tm.emit_object(self.get_llvm_module(iaca_marker=True)))
|
||||
tmpf.flush()
|
||||
return iaca.iaca_analyse_instrumented_binary(tmpf.name, arch)
|
||||
return incode_model.iaca_analyse_instrumented_binary(tmpf.name, arch)
|
||||
|
||||
def build_and_execute(self, repeat=10, min_elapsed=0.1, max_elapsed=0.3):
|
||||
# Compile the module to machine code using MCJIT
|
||||
@@ -191,7 +191,7 @@ class LoopBenchmark(Benchmark):
|
||||
if src_idx == last_match_idx:
|
||||
break
|
||||
if not matched:
|
||||
raise ValueError("Unable to match source to any destination.")
|
||||
pass #raise ValueError("Unable to match source to any destination.")
|
||||
|
||||
code = ''
|
||||
for dst_reg, dst_name, init_value, src_reg, src_name in lcd:
|
||||
@@ -307,6 +307,7 @@ def bench_instructions(instructions, serial_factor=8, parallel_factor=4, through
|
||||
except op.NotSerializableError as e:
|
||||
print("Latency measurement not possible:", e)
|
||||
not_serializable = True
|
||||
lat = None
|
||||
|
||||
if not_serializable:
|
||||
lat = None
|
||||
|
||||
82
asmbench/streams.py
Executable file
82
asmbench/streams.py
Executable file
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import collections
|
||||
import itertools
|
||||
import socket
|
||||
import textwrap
|
||||
|
||||
import numpy
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib as mpl
|
||||
|
||||
from asmbench import op, bench
|
||||
from asmbench import oldjit
|
||||
|
||||
|
||||
type_size = {
|
||||
'i32': 4,
|
||||
'i64': 8,
|
||||
'f32': 4,
|
||||
'float': 4,
|
||||
'f64': 8,
|
||||
'double': 8,
|
||||
}
|
||||
|
||||
|
||||
class StreamsBenchmark(bench.Benchmark):
|
||||
def __init__(self,
|
||||
read_streams=0, read_write_streams=0, write_streams=0,
|
||||
stream_byte_length=0,
|
||||
element_type='i64'):
|
||||
super().__init__()
|
||||
self.read_streams = read_streams
|
||||
self.read_write_streams = read_write_streams
|
||||
self.write_streams = write_streams
|
||||
self.stream_byte_length = stream_byte_length
|
||||
self.element_type = element_type
|
||||
|
||||
def build_ir(self, iaca_marker=False):
|
||||
if iaca_marker:
|
||||
iaca_start_marker = textwrap.dedent('''\
|
||||
call void asm "movl $$111,%ebx", ""()
|
||||
call void asm ".byte 100,103,144", ""()''')
|
||||
iaca_stop_marker = textwrap.dedent('''\
|
||||
call void asm "movl $$222,%ebx", ""()
|
||||
call void asm ".byte 100,103,144", ""()''')
|
||||
else:
|
||||
iaca_start_marker = ''
|
||||
iaca_stop_marker = ''
|
||||
|
||||
ir = textwrap.dedent('''\
|
||||
define i64 @"test"(i64 %"N"{pointer_arguments})
|
||||
{{
|
||||
entry:
|
||||
%"loop_cond" = icmp slt i64 0, %"N"
|
||||
br i1 %"loop_cond", label %"loop", label %"end"
|
||||
|
||||
loop:
|
||||
%"loop_counter" = phi i64 [0, %"entry"], [%"loop_counter.1", %"loop"]
|
||||
{iaca_start_marker}
|
||||
{loop_body}
|
||||
%"loop_counter.1" = add i64 %"loop_counter", 1
|
||||
%"loop_cond.1" = icmp slt i64 %"loop_counter.1", %"N"
|
||||
br i1 %"loop_cond.1", label %"loop", label %"end"
|
||||
|
||||
end:
|
||||
%"ret" = phi i64 [0, %"entry"], [%"loop_counter", %"loop"]
|
||||
{iaca_stop_marker}
|
||||
ret i64 %"ret"
|
||||
}}
|
||||
''').format(
|
||||
pointer_arguments='',
|
||||
loop_body='',
|
||||
iaca_start_marker=iaca_start_marker,
|
||||
iaca_stop_marker=iaca_stop_marker)
|
||||
|
||||
return ir
|
||||
|
||||
if __name__ == '__main__':
|
||||
bench.setup_llvm()
|
||||
sb = StreamsBenchmark()
|
||||
print(sb.build_and_execute())
|
||||
|
||||
31
setup.py
31
setup.py
@@ -1,12 +1,39 @@
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
from codecs import open # To use a consistent encoding
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
with open('README.rst') as f:
|
||||
|
||||
# Stolen from pip
|
||||
def read(*names, **kwargs):
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8")
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
|
||||
# Stolen from pip
|
||||
def find_version(*file_paths):
|
||||
version_file = read(*file_paths)
|
||||
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
|
||||
version_file, re.M)
|
||||
if version_match:
|
||||
return version_match.group(1)
|
||||
raise RuntimeError("Unable to find version string.")
|
||||
|
||||
|
||||
# Get the long description from the relevant file
|
||||
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
|
||||
long_description = f.read()
|
||||
|
||||
setup(
|
||||
name='asmbench',
|
||||
version='0.1.3',
|
||||
version=find_version('asmbench', '__init__.py'),
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
|
||||
url='https://github.com/RRZE-HPC/asmbench',
|
||||
license='AGPLv3',
|
||||
|
||||
Reference in New Issue
Block a user