Merge branch 'master' of github.com:RRZE-HPC/asmbench

2026-01-08 13:30:06 +01:00 · 2020-06-08 16:08:14 +02:00
parent 50d585c758 ba91c9ff02
commit 9143ac609e
8 changed files with 142 additions and 27 deletions
--- a/.idea/other.xml
+++ b/.idea/other.xml
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="PySciProjectComponent">
-    <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
-  </component>
-</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/doc/asmbench-SC18SRC-poster" vcs="Git" />
-  </component>
-</project>
--- a/README.rst
+++ b/README.rst
@@ -8,10 +8,28 @@ Usage

 To benchmark latency and throughput of a 64bit integer add use the following command:

-``python -m asmbench 'add {src:i64:r}, {srcdst:i64:r}'``
+``asmbench 'add {src:i64:r}, {srcdst:i64:r}'``

 To benchmark two instructions interleaved use this:

-``python -m asmbench 'add {src:i64:r}, {srcdst:i64:r}' 'sub {src:i64:r}, {srcdst:i64:r}'``
+``asmbench 'add {src:i64:r}, {srcdst:i64:r}' 'sub {src:i64:r}, {srcdst:i64:r}'``

 To find out more add `-h` for help and `-v` for verbose mode.
+
+Operand Templates
+=================
+Operands always follow this form: ``{direction:data_type:pass_type}``.
+
+Direction may be ``src``, ``dst`` or ``srcdst``. This will allow asmbench to serialize the code (wherever possible). ``src`` operands are  read, but not modiefied by the instruction. ``dst`` operands are modified to, but not read. ``srcdst`` operands will be read and modified by the instruction.
+
+Data and Pass Types:
+
+* ``i64:r`` -> 64bit general purpose register (gpr) (e.g., ``%rax``)
+* ``i32:r`` -> 32bit gpr (e.g., ``%ecx``)
+* ``<2 x double>:x`` -> 128bit SSE register with two double precision floating-point numbers (e.g., ``%xmm1``)
+* ``<4 x float>:x`` -> 128bit SSE register with four single precision floating-point numbers (e.g., ``%xmm1``)
+* ``<4 x double>:x`` -> 256bit AVX register with four double precision floating-point numbers (e.g., ``%ymm1``)
+* ``<8 x float>:x`` -> 256bit AVX register with eight single precision floating-point numbers (e.g., ``%ymm1``)
+* ``<8 x double>:x`` -> 512bit AVX512 register with eight double precision floating-point numbers (e.g., ``%zmm1``)
+* ``<16 x float>:x`` -> 512bit AVX512 register with sixteen single precision floating-point numbers (e.g., ``%zmm1``)
+* ``i8:23`` -> immediate 0 (i.e., ``$23``)
--- a/asmbench/init.py
+++ b/asmbench/init.py
@@ -0,0 +1 @@
+__version__ = '0.1.4'
--- a/asmbench/main.py
+++ b/asmbench/main.py
@@ -41,10 +41,9 @@ def main():
                                       verbosity=args.verbose,
                                       iaca_comparison=args.iaca,
                                       frequency=args.frequency)
-    if lat:
-        print("Latency: {:.2f} cycle\nThroughput: {:.2f} cycle\n".format(lat, tp))
-    else:
-        print("Throughput: {:.2f} cycle\n".format(tp))
+    if lat is not None:
+        print("Latency: {:.2f} cycle".format(lat))
+    print("Throughput: {:.2f} cycle\n".format(tp))


 if __name__ == "__main__":
--- a/asmbench/bench.py
+++ b/asmbench/bench.py
@@ -12,9 +12,9 @@ import sys
 import llvmlite.binding as llvm
 import psutil
 try:
-    from kerncraft import iaca
+    from kerncraft import incode_model
 except ImportError:
-    iaca = None
+    incode_model = None

 from . import op

@@ -34,7 +34,7 @@ def uniquify(l):

 class Benchmark:
    def __init__(self, frequency=None):
-        self.frequency = frequency or psutil.cpu_freq().current * 1e6
+        self.frequency = frequency or psutil.cpu_freq().max * 1e6

    def __repr__(self):
        return '{}({})'.format(
@@ -87,13 +87,13 @@ class Benchmark:

    def get_iaca_analysis(self, arch):
        """Compile and return IACA analysis."""
-        if iaca is None:
+        if incode_model is None:
            raise ValueError("kerncraft not installed. IACA analysis is not supported.")
        tm = self.get_target_machine()
        tmpf = tempfile.NamedTemporaryFile("wb")
        tmpf.write(tm.emit_object(self.get_llvm_module(iaca_marker=True)))
        tmpf.flush()
-        return iaca.iaca_analyse_instrumented_binary(tmpf.name, arch)
+        return incode_model.iaca_analyse_instrumented_binary(tmpf.name, arch)

    def build_and_execute(self, repeat=10, min_elapsed=0.1, max_elapsed=0.3):
        # Compile the module to machine code using MCJIT
@@ -191,7 +191,7 @@ class LoopBenchmark(Benchmark):
                if src_idx == last_match_idx:
                    break
        if not matched:
-            raise ValueError("Unable to match source to any destination.")
+            pass #raise ValueError("Unable to match source to any destination.")

        code = ''
        for dst_reg, dst_name, init_value, src_reg, src_name in lcd:
@@ -307,6 +307,7 @@ def bench_instructions(instructions, serial_factor=8, parallel_factor=4, through
    except op.NotSerializableError as e:
        print("Latency measurement not possible:", e)
        not_serializable = True
+        lat = None

    if not_serializable:
        lat = None
--- a/asmbench/streams.py
+++ b/asmbench/streams.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+import collections
+import itertools
+import socket
+import textwrap
+
+import numpy
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+
+from asmbench import op, bench
+from asmbench import oldjit
+
+
+type_size = {
+    'i32': 4,
+    'i64': 8,
+    'f32': 4,
+    'float': 4,
+    'f64': 8,
+    'double': 8,
+}
+
+
+class StreamsBenchmark(bench.Benchmark):
+    def __init__(self,
+                 read_streams=0, read_write_streams=0, write_streams=0,
+                 stream_byte_length=0,
+                 element_type='i64'):
+        super().__init__()
+        self.read_streams = read_streams
+        self.read_write_streams = read_write_streams
+        self.write_streams = write_streams
+        self.stream_byte_length = stream_byte_length
+        self.element_type = element_type
+
+    def build_ir(self, iaca_marker=False):
+        if iaca_marker:
+            iaca_start_marker = textwrap.dedent('''\
+                call void asm "movl    $$111,%ebx", ""()
+                call void asm ".byte   100,103,144", ""()''')
+            iaca_stop_marker = textwrap.dedent('''\
+                call void asm "movl    $$222,%ebx", ""()
+                call void asm ".byte   100,103,144", ""()''')
+        else:
+            iaca_start_marker = ''
+            iaca_stop_marker = ''
+
+        ir = textwrap.dedent('''\
+            define i64 @"test"(i64 %"N"{pointer_arguments})
+            {{
+            entry:
+              %"loop_cond" = icmp slt i64 0, %"N"
+              br i1 %"loop_cond", label %"loop", label %"end"
+
+            loop:
+              %"loop_counter" = phi i64 [0, %"entry"], [%"loop_counter.1", %"loop"]
+            {iaca_start_marker}
+            {loop_body}
+              %"loop_counter.1" = add i64 %"loop_counter", 1
+              %"loop_cond.1" = icmp slt i64 %"loop_counter.1", %"N"
+              br i1 %"loop_cond.1", label %"loop", label %"end"
+
+            end:
+              %"ret" = phi i64 [0, %"entry"], [%"loop_counter", %"loop"]
+            {iaca_stop_marker}
+              ret i64 %"ret"
+            }}
+            ''').format(
+            pointer_arguments='',
+            loop_body='',
+            iaca_start_marker=iaca_start_marker,
+            iaca_stop_marker=iaca_stop_marker)
+
+        return ir
+
+if __name__ == '__main__':
+    bench.setup_llvm()
+    sb = StreamsBenchmark()
+    print(sb.build_and_execute())
+
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,39 @@
+import io
+import os
+import re
+from codecs import open  # To use a consistent encoding
+
 from setuptools import setup, find_packages

+here = os.path.abspath(os.path.dirname(__file__))

-with open('README.rst') as f:
+
+# Stolen from pip
+def read(*names, **kwargs):
+    with io.open(
+            os.path.join(os.path.dirname(__file__), *names),
+            encoding=kwargs.get("encoding", "utf8")
+    ) as fp:
+        return fp.read()
+
+
+# Stolen from pip
+def find_version(*file_paths):
+    version_file = read(*file_paths)
+    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
+                              version_file, re.M)
+    if version_match:
+        return version_match.group(1)
+    raise RuntimeError("Unable to find version string.")
+
+
+# Get the long description from the relevant file
+with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
    long_description = f.read()

 setup(
    name='asmbench',
-    version='0.1.3',
+    version=find_version('asmbench', '__init__.py'),
    packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
    url='https://github.com/RRZE-HPC/asmbench',
    license='AGPLv3',