Merge branch 'master' of github.com:RRZE-HPC/asmbench

This commit is contained in:
JanLJL
2020-06-08 16:08:14 +02:00
8 changed files with 142 additions and 27 deletions

6
.idea/other.xml generated
View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>

7
.idea/vcs.xml generated
View File

@@ -1,7 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/doc/asmbench-SC18SRC-poster" vcs="Git" />
</component>
</project>

View File

@@ -8,10 +8,28 @@ Usage
To benchmark latency and throughput of a 64bit integer add use the following command:
``python -m asmbench 'add {src:i64:r}, {srcdst:i64:r}'``
``asmbench 'add {src:i64:r}, {srcdst:i64:r}'``
To benchmark two instructions interleaved use this:
``python -m asmbench 'add {src:i64:r}, {srcdst:i64:r}' 'sub {src:i64:r}, {srcdst:i64:r}'``
``asmbench 'add {src:i64:r}, {srcdst:i64:r}' 'sub {src:i64:r}, {srcdst:i64:r}'``
To find out more add `-h` for help and `-v` for verbose mode.
Operand Templates
=================
Operands always follow this form: ``{direction:data_type:pass_type}``.
Direction may be ``src``, ``dst`` or ``srcdst``. This will allow asmbench to serialize the code (wherever possible). ``src`` operands are read, but not modiefied by the instruction. ``dst`` operands are modified to, but not read. ``srcdst`` operands will be read and modified by the instruction.
Data and Pass Types:
* ``i64:r`` -> 64bit general purpose register (gpr) (e.g., ``%rax``)
* ``i32:r`` -> 32bit gpr (e.g., ``%ecx``)
* ``<2 x double>:x`` -> 128bit SSE register with two double precision floating-point numbers (e.g., ``%xmm1``)
* ``<4 x float>:x`` -> 128bit SSE register with four single precision floating-point numbers (e.g., ``%xmm1``)
* ``<4 x double>:x`` -> 256bit AVX register with four double precision floating-point numbers (e.g., ``%ymm1``)
* ``<8 x float>:x`` -> 256bit AVX register with eight single precision floating-point numbers (e.g., ``%ymm1``)
* ``<8 x double>:x`` -> 512bit AVX512 register with eight double precision floating-point numbers (e.g., ``%zmm1``)
* ``<16 x float>:x`` -> 512bit AVX512 register with sixteen single precision floating-point numbers (e.g., ``%zmm1``)
* ``i8:23`` -> immediate 0 (i.e., ``$23``)

View File

@@ -0,0 +1 @@
__version__ = '0.1.4'

View File

@@ -41,10 +41,9 @@ def main():
verbosity=args.verbose,
iaca_comparison=args.iaca,
frequency=args.frequency)
if lat:
print("Latency: {:.2f} cycle\nThroughput: {:.2f} cycle\n".format(lat, tp))
else:
print("Throughput: {:.2f} cycle\n".format(tp))
if lat is not None:
print("Latency: {:.2f} cycle".format(lat))
print("Throughput: {:.2f} cycle\n".format(tp))
if __name__ == "__main__":

View File

@@ -12,9 +12,9 @@ import sys
import llvmlite.binding as llvm
import psutil
try:
from kerncraft import iaca
from kerncraft import incode_model
except ImportError:
iaca = None
incode_model = None
from . import op
@@ -34,7 +34,7 @@ def uniquify(l):
class Benchmark:
def __init__(self, frequency=None):
self.frequency = frequency or psutil.cpu_freq().current * 1e6
self.frequency = frequency or psutil.cpu_freq().max * 1e6
def __repr__(self):
return '{}({})'.format(
@@ -87,13 +87,13 @@ class Benchmark:
def get_iaca_analysis(self, arch):
"""Compile and return IACA analysis."""
if iaca is None:
if incode_model is None:
raise ValueError("kerncraft not installed. IACA analysis is not supported.")
tm = self.get_target_machine()
tmpf = tempfile.NamedTemporaryFile("wb")
tmpf.write(tm.emit_object(self.get_llvm_module(iaca_marker=True)))
tmpf.flush()
return iaca.iaca_analyse_instrumented_binary(tmpf.name, arch)
return incode_model.iaca_analyse_instrumented_binary(tmpf.name, arch)
def build_and_execute(self, repeat=10, min_elapsed=0.1, max_elapsed=0.3):
# Compile the module to machine code using MCJIT
@@ -191,7 +191,7 @@ class LoopBenchmark(Benchmark):
if src_idx == last_match_idx:
break
if not matched:
raise ValueError("Unable to match source to any destination.")
pass #raise ValueError("Unable to match source to any destination.")
code = ''
for dst_reg, dst_name, init_value, src_reg, src_name in lcd:
@@ -307,6 +307,7 @@ def bench_instructions(instructions, serial_factor=8, parallel_factor=4, through
except op.NotSerializableError as e:
print("Latency measurement not possible:", e)
not_serializable = True
lat = None
if not_serializable:
lat = None

82
asmbench/streams.py Executable file
View File

@@ -0,0 +1,82 @@
#!/usr/bin/env python3
import collections
import itertools
import socket
import textwrap
import numpy
import matplotlib.pyplot as plt
import matplotlib as mpl
from asmbench import op, bench
from asmbench import oldjit
type_size = {
'i32': 4,
'i64': 8,
'f32': 4,
'float': 4,
'f64': 8,
'double': 8,
}
class StreamsBenchmark(bench.Benchmark):
def __init__(self,
read_streams=0, read_write_streams=0, write_streams=0,
stream_byte_length=0,
element_type='i64'):
super().__init__()
self.read_streams = read_streams
self.read_write_streams = read_write_streams
self.write_streams = write_streams
self.stream_byte_length = stream_byte_length
self.element_type = element_type
def build_ir(self, iaca_marker=False):
if iaca_marker:
iaca_start_marker = textwrap.dedent('''\
call void asm "movl $$111,%ebx", ""()
call void asm ".byte 100,103,144", ""()''')
iaca_stop_marker = textwrap.dedent('''\
call void asm "movl $$222,%ebx", ""()
call void asm ".byte 100,103,144", ""()''')
else:
iaca_start_marker = ''
iaca_stop_marker = ''
ir = textwrap.dedent('''\
define i64 @"test"(i64 %"N"{pointer_arguments})
{{
entry:
%"loop_cond" = icmp slt i64 0, %"N"
br i1 %"loop_cond", label %"loop", label %"end"
loop:
%"loop_counter" = phi i64 [0, %"entry"], [%"loop_counter.1", %"loop"]
{iaca_start_marker}
{loop_body}
%"loop_counter.1" = add i64 %"loop_counter", 1
%"loop_cond.1" = icmp slt i64 %"loop_counter.1", %"N"
br i1 %"loop_cond.1", label %"loop", label %"end"
end:
%"ret" = phi i64 [0, %"entry"], [%"loop_counter", %"loop"]
{iaca_stop_marker}
ret i64 %"ret"
}}
''').format(
pointer_arguments='',
loop_body='',
iaca_start_marker=iaca_start_marker,
iaca_stop_marker=iaca_stop_marker)
return ir
if __name__ == '__main__':
bench.setup_llvm()
sb = StreamsBenchmark()
print(sb.build_and_execute())

View File

@@ -1,12 +1,39 @@
import io
import os
import re
from codecs import open # To use a consistent encoding
from setuptools import setup, find_packages
here = os.path.abspath(os.path.dirname(__file__))
with open('README.rst') as f:
# Stolen from pip
def read(*names, **kwargs):
with io.open(
os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")
) as fp:
return fp.read()
# Stolen from pip
def find_version(*file_paths):
version_file = read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
# Get the long description from the relevant file
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()
setup(
name='asmbench',
version='0.1.3',
version=find_version('asmbench', '__init__.py'),
packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
url='https://github.com/RRZE-HPC/asmbench',
license='AGPLv3',