mirror of
https://github.com/RRZE-HPC/asmbench.git
synced 2026-01-08 21:40:12 +01:00
finalizing CLI, started with mem support
This commit is contained in:
@@ -26,7 +26,10 @@ def main():
|
||||
parallel_factor=args.parallel,
|
||||
throughput_serial_factor=args.throughput_serial,
|
||||
verbosity=args.verbose)
|
||||
print("Latency: {}\nThroughput: {}\n".format(lat, tp))
|
||||
print("Latency: {:.2f} cycle\nThroughput: {:.2f} cycle\n".format(lat, tp))
|
||||
|
||||
b = bench.IntegerLoopBenchmark(args.instructions[0])
|
||||
b.get_iaca_analysis()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -5,6 +5,8 @@ import textwrap
|
||||
import itertools
|
||||
import re
|
||||
from pprint import pprint
|
||||
import tempfile
|
||||
import subprocess
|
||||
|
||||
import llvmlite.binding as llvm
|
||||
import psutil
|
||||
@@ -80,6 +82,17 @@ class Benchmark:
|
||||
def get_function_ctype(self):
|
||||
return ctypes.CFUNCTYPE(ctypes.c_int64, ctypes.c_int64)
|
||||
|
||||
def get_iaca_analysis(self):
|
||||
"""Compile and return IACA analysis."""
|
||||
tm = self.get_target_machine()
|
||||
tmpf = tempfile.NamedTemporaryFile("wb")
|
||||
tmpf.write(tm.emit_object(self.get_llvm_module()))
|
||||
tmpf.flush()
|
||||
|
||||
# assuming "iaca.sh" to be available
|
||||
subprocess.check_output(['objdump', tmpf.name])
|
||||
|
||||
|
||||
def build_and_execute(self, repeat=10, min_elapsed=0.1, max_elapsed=0.3):
|
||||
# Compile the module to machine code using MCJIT
|
||||
tm = self.get_target_machine()
|
||||
@@ -130,7 +143,7 @@ class LoopBenchmark(Benchmark):
|
||||
def __init__(self, root_synth, init_values=None):
|
||||
super().__init__()
|
||||
self.root_synth = root_synth
|
||||
self.init_values = init_values or []
|
||||
self.init_values = init_values or root_synth.get_default_init_values()
|
||||
|
||||
if len(root_synth.get_source_registers()) != len(self.init_values):
|
||||
raise ValueError("Number of init values and source registers do not match.")
|
||||
@@ -142,7 +155,7 @@ class LoopBenchmark(Benchmark):
|
||||
return ['%out.{}'.format(i) for i in
|
||||
range(len(self.root_synth.get_destination_registers()))]
|
||||
|
||||
def get_phi_code(self):
|
||||
def get_phi_code(self, latency=True):
|
||||
# Compile loop carried dependencies
|
||||
lcd = []
|
||||
# Change in naming (src <-> dst) is on purpose!
|
||||
@@ -180,7 +193,7 @@ class LoopBenchmark(Benchmark):
|
||||
init_value=init_value,
|
||||
src_name=src_name)
|
||||
|
||||
# Add extra phi for constant values. Assuming LLVM will optimiz them "away"
|
||||
# Add extra phi for constant values. Assuming LLVM will optimize them "away"
|
||||
for dst_idx, dst in enumerate(dsts):
|
||||
if dst not in [d for d, dn, i, s, sn in lcd]:
|
||||
code += ('{dst_reg} = phi {llvm_type} [{init_value}, %"entry"], '
|
||||
@@ -225,28 +238,36 @@ class IntegerLoopBenchmark(LoopBenchmark):
|
||||
|
||||
def bench_instructions(instructions, serial_factor=8, parallel_factor=4, throughput_serial_factor=8,
|
||||
verbosity=0):
|
||||
# Latency Benchmark
|
||||
if verbosity > 0:
|
||||
print('## Latency Benchmark')
|
||||
p_instrs = []
|
||||
for i in instructions:
|
||||
p_instrs.append(op.Serialized([i] * serial_factor))
|
||||
p = op.Parallelized(p_instrs)
|
||||
init_values = [op.init_value_by_llvm_type[reg.llvm_type] for reg in p.get_source_registers()]
|
||||
b = IntegerLoopBenchmark(p, init_values)
|
||||
if verbosity >= 3:
|
||||
print('### LLVM IR')
|
||||
print(b.build_ir())
|
||||
if verbosity >= 2:
|
||||
print('### Assembly')
|
||||
print(b.get_assembly())
|
||||
result = b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2)
|
||||
lat = min(*[(t / serial_factor) * result['frequency'] / result['iterations']
|
||||
for t in result['runtimes']])
|
||||
if verbosity > 0:
|
||||
print('### Detailed Results')
|
||||
pprint(result)
|
||||
print()
|
||||
not_serializable = False
|
||||
try:
|
||||
# Latency Benchmark
|
||||
if verbosity > 0:
|
||||
print('## Latency Benchmark')
|
||||
p_instrs = []
|
||||
for i in instructions:
|
||||
p_instrs.append(op.Serialized([i] * serial_factor))
|
||||
p = op.Parallelized(p_instrs)
|
||||
b = IntegerLoopBenchmark(p)
|
||||
if verbosity >= 3:
|
||||
print('### LLVM IR')
|
||||
print(b.build_ir())
|
||||
if verbosity >= 2:
|
||||
print('### Assembly')
|
||||
print(b.get_assembly())
|
||||
result = b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2)
|
||||
lat = min(*[(t / serial_factor) * result['frequency'] / result['iterations']
|
||||
for t in result['runtimes']])
|
||||
if verbosity > 0:
|
||||
print('### Detailed Results')
|
||||
pprint(result)
|
||||
print()
|
||||
except op.NotSerializableError as e:
|
||||
print("Latency measurement not possible:", e)
|
||||
not_serializable = True
|
||||
|
||||
if not_serializable:
|
||||
throughput_serial_factor = 1
|
||||
print("WARNING: throughput_serial_factor has be set to 1.")
|
||||
|
||||
# Throughput Benchmark
|
||||
if verbosity > 0:
|
||||
@@ -255,9 +276,7 @@ def bench_instructions(instructions, serial_factor=8, parallel_factor=4, through
|
||||
for i in instructions:
|
||||
p_instrs.append(op.Serialized([i] * throughput_serial_factor))
|
||||
p = op.Parallelized(p_instrs * parallel_factor)
|
||||
init_values = [op.init_value_by_llvm_type[reg.llvm_type] for reg in
|
||||
p.get_source_registers()]
|
||||
b = IntegerLoopBenchmark(p, init_values)
|
||||
b = IntegerLoopBenchmark(p)
|
||||
if verbosity >= 3:
|
||||
print('### LLVM IR')
|
||||
print(b.build_ir())
|
||||
|
||||
101
asmjit/op.py
101
asmjit/op.py
@@ -15,6 +15,9 @@ init_value_by_llvm_type.update(
|
||||
for vec in [2, 4, 8, 16, 32, 64]})
|
||||
|
||||
|
||||
class NotSerializableError(Exception):
|
||||
pass
|
||||
|
||||
class Operand:
|
||||
def __init__(self, llvm_type):
|
||||
self.llvm_type = llvm_type
|
||||
@@ -28,6 +31,16 @@ class Operand:
|
||||
', '.join(['{}={!r}'.format(k, v) for k, v in self.__dict__.items()
|
||||
if not k.startswith('_')]))
|
||||
|
||||
@staticmethod
|
||||
def from_string(s):
|
||||
options = [Register.from_string, Immediate.from_string, MemoryReference.from_string]
|
||||
for o in options:
|
||||
try:
|
||||
return o(s)
|
||||
except ValueError:
|
||||
continue
|
||||
raise ValueError("No matching operand type found for '{}'.".format(s))
|
||||
|
||||
|
||||
class Immediate(Operand):
|
||||
def __init__(self, llvm_type, value):
|
||||
@@ -37,6 +50,19 @@ class Immediate(Operand):
|
||||
def get_constraint_char(self):
|
||||
return 'i'
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, s):
|
||||
"""
|
||||
Create Immediate object from string.
|
||||
|
||||
:param s: must have the form: "llvm_type:value"
|
||||
"""
|
||||
llvm_type, value = s.split(':', 1)
|
||||
value_regex = r'(0x[0-9a-fA-F]+|[0-9]+(\.[0-9]+)?)'
|
||||
if not re.match(value_regex, value):
|
||||
raise ValueError("Invalid immediate value, must match {!r}".format(value_regex))
|
||||
return cls(llvm_type, value)
|
||||
|
||||
|
||||
class MemoryReference(Operand):
|
||||
"""
|
||||
@@ -78,6 +104,38 @@ class MemoryReference(Operand):
|
||||
def get_constraint_char(self):
|
||||
return 'm'
|
||||
|
||||
def get_registers(self):
|
||||
if self.base:
|
||||
yield self.base
|
||||
if self.index:
|
||||
yield self.index
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, s):
|
||||
"""
|
||||
Create MemoryReference from string.
|
||||
|
||||
:param s: must fulfill the regex: "mem:[bdis]+"
|
||||
"""
|
||||
m = re.match(r"\*([^:]+):([obiw]+)", s)
|
||||
if not m:
|
||||
raise ValueError("Invalid format, must match 'mem:[obiw]+'.")
|
||||
else:
|
||||
llvm_type, features = m.groups()
|
||||
offset = None
|
||||
if 'o' in features:
|
||||
offset = Immediate('i32', 8)
|
||||
base = None
|
||||
if 'b' in features:
|
||||
base = Register('i64', 'r')
|
||||
index = None
|
||||
if 'i' in features:
|
||||
index = Register('i64', 'r')
|
||||
width = None
|
||||
if 'w' in features:
|
||||
width = Immediate('i32', 8)
|
||||
return cls(llvm_type, offset=offset, base=base, index=index, width=width)
|
||||
|
||||
|
||||
class Register(Operand):
|
||||
def __init__(self, llvm_type, constraint_char='r'):
|
||||
@@ -94,7 +152,11 @@ class Register(Operand):
|
||||
|
||||
:param s: must have the form: "llvm_type:constraint_char"
|
||||
"""
|
||||
return cls(*s.split(':', 1))
|
||||
llvm_type, constraint_char = s.split(':', 1)
|
||||
valid_cc = 'rx'
|
||||
if constraint_char not in valid_cc:
|
||||
raise ValueError("Invalid constraint character, must be one of {!r}".format(valid_cc))
|
||||
return cls(llvm_type, constraint_char)
|
||||
|
||||
|
||||
class Synthable:
|
||||
@@ -120,6 +182,9 @@ class Synthable:
|
||||
used_registers.add(name)
|
||||
return name
|
||||
|
||||
def get_default_init_values(self):
|
||||
return [init_value_by_llvm_type[reg.llvm_type] for reg in self.get_source_registers()]
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(
|
||||
self.__class__.__name__,
|
||||
@@ -146,7 +211,9 @@ class Instruction(Operation):
|
||||
self.source_operands = source_operands
|
||||
|
||||
def get_source_registers(self):
|
||||
return [sop for sop in self.source_operands if isinstance(sop, Register)]
|
||||
return [sop for sop in self.source_operands if isinstance(sop, Register)] + \
|
||||
[r for mop in self.source_operands if isinstance(mop, MemoryReference)
|
||||
for r in mop.get_registers()]
|
||||
|
||||
def get_destination_registers(self):
|
||||
if isinstance(self.destination_operand, Register):
|
||||
@@ -154,10 +221,13 @@ class Instruction(Operation):
|
||||
else:
|
||||
return []
|
||||
|
||||
def build_ir(self, dst_reg_names, src_reg_names, used_registers):
|
||||
def build_ir(self, dst_reg_names, src_reg_names, used_registers=None):
|
||||
"""
|
||||
Build IR string based on in and out operand names and types.
|
||||
"""
|
||||
if used_registers is None:
|
||||
used_registers = set(dst_reg_names + src_reg_names)
|
||||
|
||||
# Build constraint string from operands
|
||||
constraints = ','.join(
|
||||
['=' + self.destination_operand.get_constraint_char()] +
|
||||
@@ -176,6 +246,11 @@ class Instruction(Operation):
|
||||
type=sop.llvm_type,
|
||||
repr=src_reg_names[i]))
|
||||
i += 1
|
||||
elif isinstance(sop, MemoryReference):
|
||||
operands.append('{type} {repr}'.format(
|
||||
type=sop.llvm_type,
|
||||
repr=src_reg_names[i]))
|
||||
i += 1
|
||||
else:
|
||||
raise NotImplementedError("Only register and immediate operands are supported.")
|
||||
args = ', '.join(operands)
|
||||
@@ -201,26 +276,30 @@ class Instruction(Operation):
|
||||
# It is important that the match objects are in reverse order, to allow string replacements
|
||||
# based on original match group locations
|
||||
operands = list(reversed(list(re.finditer(r"\{((?:src|dst)+):([^\}]+)\}", s))))
|
||||
# Destination indices start at 0, source indices at "number of destination operands"
|
||||
dst_index, src_index = 0, ['dst' in o.group(1) for o in operands].count(True)
|
||||
# Destination indices start at 0
|
||||
dst_index = 0
|
||||
# Source indices at "number of destination operands"
|
||||
src_index = ['dst' in o.group(1) for o in operands].count(True)
|
||||
|
||||
dst_ops = []
|
||||
src_ops = []
|
||||
for m in operands:
|
||||
direction, register_string = m.group(1, 2)
|
||||
register = Register.from_string(register_string)
|
||||
direction, operand_string = m.group(1, 2)
|
||||
operand = Operand.from_string(operand_string)
|
||||
if 'src' in direction and not 'dst' in direction:
|
||||
src_ops.append(register)
|
||||
src_ops.append(operand)
|
||||
# replace with index string
|
||||
instruction = (instruction[:m.start()] + "${}".format(src_index)
|
||||
+ instruction[m.end():])
|
||||
src_index += 1
|
||||
if 'dst' in direction:
|
||||
dst_ops.append(register)
|
||||
dst_ops.append(operand)
|
||||
# replace with index string
|
||||
instruction = (instruction[:m.start()] + "${}".format(dst_index)
|
||||
+ instruction[m.end():])
|
||||
if 'src' in direction:
|
||||
src_ops.append(Register(register_string.split(':', 1)[0], str(dst_index)))
|
||||
src_ops.append(Register(operand_string.split(':', 1)[0], str(dst_index)))
|
||||
src_index += 1
|
||||
dst_index += 1
|
||||
|
||||
if len(dst_ops) != 1:
|
||||
@@ -310,7 +389,7 @@ class Serialized(Synthable):
|
||||
if not src_match:
|
||||
src_naming.append(init_value_by_llvm_type[src.llvm_type])
|
||||
if not match:
|
||||
raise ValueError("Unable to find match.")
|
||||
raise NotSerializableError("Unable to find match.")
|
||||
|
||||
if i == len(self.synths) - 1:
|
||||
# last destination is passed in from outside
|
||||
|
||||
20
tablegen.py
20
tablegen.py
@@ -473,7 +473,9 @@ def main():
|
||||
instructions_ret_type[instr_op.get_destination_registers()[0].llvm_type][
|
||||
instr_name] = (instr_name, instr_op)
|
||||
# Constructing random benchmarks, one for each return type
|
||||
for t in instructions_ret_type:
|
||||
random.seed(42)
|
||||
parallel_factor = 8
|
||||
for t in sorted(instructions_ret_type):
|
||||
valid = False
|
||||
while not valid:
|
||||
selected_names, selected_instrs = zip(
|
||||
@@ -485,14 +487,22 @@ def main():
|
||||
valid = True
|
||||
|
||||
serial = op.Serialized(selected_instrs)
|
||||
p = op.Parallelized([serial] * 10)
|
||||
p = op.Parallelized([serial] * parallel_factor)
|
||||
|
||||
init_values = [op.init_value_by_llvm_type[reg.llvm_type] for reg in
|
||||
p.get_source_registers()]
|
||||
b = bench.IntegerLoopBenchmark(p, init_values)
|
||||
print(selected_names)
|
||||
pprint(selected_instrs)
|
||||
print(b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2))
|
||||
print('## Selected Instructions')
|
||||
print(', '.join(selected_names))
|
||||
print('## Generated Assembly ({}x parallel)'.format(parallel_factor))
|
||||
print(b.get_assembly())
|
||||
#pprint(selected_instrs)
|
||||
r = b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2)
|
||||
r['parallel_factor'] = parallel_factor
|
||||
print('## Detailed Results')
|
||||
pprint(r)
|
||||
print("minimal throughput: {:.2f} cy".format(
|
||||
min(r['runtimes'])/r['iterations']*r['frequency']/parallel_factor))
|
||||
|
||||
|
||||
def can_serialize(instr):
|
||||
|
||||
Reference in New Issue
Block a user