finalizing CLI, started with mem support

This commit is contained in:
Julian Hammer
2018-07-04 16:53:06 +02:00
parent 7a6d2e91ce
commit 654e2cd62f
4 changed files with 156 additions and 45 deletions

View File

@@ -26,7 +26,10 @@ def main():
parallel_factor=args.parallel,
throughput_serial_factor=args.throughput_serial,
verbosity=args.verbose)
print("Latency: {}\nThroughput: {}\n".format(lat, tp))
print("Latency: {:.2f} cycle\nThroughput: {:.2f} cycle\n".format(lat, tp))
b = bench.IntegerLoopBenchmark(args.instructions[0])
b.get_iaca_analysis()
if __name__ == "__main__":

View File

@@ -5,6 +5,8 @@ import textwrap
import itertools
import re
from pprint import pprint
import tempfile
import subprocess
import llvmlite.binding as llvm
import psutil
@@ -80,6 +82,17 @@ class Benchmark:
def get_function_ctype(self):
return ctypes.CFUNCTYPE(ctypes.c_int64, ctypes.c_int64)
def get_iaca_analysis(self):
"""Compile and return IACA analysis."""
tm = self.get_target_machine()
tmpf = tempfile.NamedTemporaryFile("wb")
tmpf.write(tm.emit_object(self.get_llvm_module()))
tmpf.flush()
# assuming "iaca.sh" to be available
subprocess.check_output(['objdump', tmpf.name])
def build_and_execute(self, repeat=10, min_elapsed=0.1, max_elapsed=0.3):
# Compile the module to machine code using MCJIT
tm = self.get_target_machine()
@@ -130,7 +143,7 @@ class LoopBenchmark(Benchmark):
def __init__(self, root_synth, init_values=None):
super().__init__()
self.root_synth = root_synth
self.init_values = init_values or []
self.init_values = init_values or root_synth.get_default_init_values()
if len(root_synth.get_source_registers()) != len(self.init_values):
raise ValueError("Number of init values and source registers do not match.")
@@ -142,7 +155,7 @@ class LoopBenchmark(Benchmark):
return ['%out.{}'.format(i) for i in
range(len(self.root_synth.get_destination_registers()))]
def get_phi_code(self):
def get_phi_code(self, latency=True):
# Compile loop carried dependencies
lcd = []
# Change in naming (src <-> dst) is on purpose!
@@ -180,7 +193,7 @@ class LoopBenchmark(Benchmark):
init_value=init_value,
src_name=src_name)
# Add extra phi for constant values. Assuming LLVM will optimiz them "away"
# Add extra phi for constant values. Assuming LLVM will optimize them "away"
for dst_idx, dst in enumerate(dsts):
if dst not in [d for d, dn, i, s, sn in lcd]:
code += ('{dst_reg} = phi {llvm_type} [{init_value}, %"entry"], '
@@ -225,28 +238,36 @@ class IntegerLoopBenchmark(LoopBenchmark):
def bench_instructions(instructions, serial_factor=8, parallel_factor=4, throughput_serial_factor=8,
verbosity=0):
# Latency Benchmark
if verbosity > 0:
print('## Latency Benchmark')
p_instrs = []
for i in instructions:
p_instrs.append(op.Serialized([i] * serial_factor))
p = op.Parallelized(p_instrs)
init_values = [op.init_value_by_llvm_type[reg.llvm_type] for reg in p.get_source_registers()]
b = IntegerLoopBenchmark(p, init_values)
if verbosity >= 3:
print('### LLVM IR')
print(b.build_ir())
if verbosity >= 2:
print('### Assembly')
print(b.get_assembly())
result = b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2)
lat = min(*[(t / serial_factor) * result['frequency'] / result['iterations']
for t in result['runtimes']])
if verbosity > 0:
print('### Detailed Results')
pprint(result)
print()
not_serializable = False
try:
# Latency Benchmark
if verbosity > 0:
print('## Latency Benchmark')
p_instrs = []
for i in instructions:
p_instrs.append(op.Serialized([i] * serial_factor))
p = op.Parallelized(p_instrs)
b = IntegerLoopBenchmark(p)
if verbosity >= 3:
print('### LLVM IR')
print(b.build_ir())
if verbosity >= 2:
print('### Assembly')
print(b.get_assembly())
result = b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2)
lat = min(*[(t / serial_factor) * result['frequency'] / result['iterations']
for t in result['runtimes']])
if verbosity > 0:
print('### Detailed Results')
pprint(result)
print()
except op.NotSerializableError as e:
print("Latency measurement not possible:", e)
not_serializable = True
if not_serializable:
throughput_serial_factor = 1
print("WARNING: throughput_serial_factor has be set to 1.")
# Throughput Benchmark
if verbosity > 0:
@@ -255,9 +276,7 @@ def bench_instructions(instructions, serial_factor=8, parallel_factor=4, through
for i in instructions:
p_instrs.append(op.Serialized([i] * throughput_serial_factor))
p = op.Parallelized(p_instrs * parallel_factor)
init_values = [op.init_value_by_llvm_type[reg.llvm_type] for reg in
p.get_source_registers()]
b = IntegerLoopBenchmark(p, init_values)
b = IntegerLoopBenchmark(p)
if verbosity >= 3:
print('### LLVM IR')
print(b.build_ir())

View File

@@ -15,6 +15,9 @@ init_value_by_llvm_type.update(
for vec in [2, 4, 8, 16, 32, 64]})
class NotSerializableError(Exception):
pass
class Operand:
def __init__(self, llvm_type):
self.llvm_type = llvm_type
@@ -28,6 +31,16 @@ class Operand:
', '.join(['{}={!r}'.format(k, v) for k, v in self.__dict__.items()
if not k.startswith('_')]))
@staticmethod
def from_string(s):
options = [Register.from_string, Immediate.from_string, MemoryReference.from_string]
for o in options:
try:
return o(s)
except ValueError:
continue
raise ValueError("No matching operand type found for '{}'.".format(s))
class Immediate(Operand):
def __init__(self, llvm_type, value):
@@ -37,6 +50,19 @@ class Immediate(Operand):
def get_constraint_char(self):
return 'i'
@classmethod
def from_string(cls, s):
"""
Create Immediate object from string.
:param s: must have the form: "llvm_type:value"
"""
llvm_type, value = s.split(':', 1)
value_regex = r'(0x[0-9a-fA-F]+|[0-9]+(\.[0-9]+)?)'
if not re.match(value_regex, value):
raise ValueError("Invalid immediate value, must match {!r}".format(value_regex))
return cls(llvm_type, value)
class MemoryReference(Operand):
"""
@@ -78,6 +104,38 @@ class MemoryReference(Operand):
def get_constraint_char(self):
return 'm'
def get_registers(self):
if self.base:
yield self.base
if self.index:
yield self.index
@classmethod
def from_string(cls, s):
"""
Create MemoryReference from string.
:param s: must fulfill the regex: "mem:[bdis]+"
"""
m = re.match(r"\*([^:]+):([obiw]+)", s)
if not m:
raise ValueError("Invalid format, must match 'mem:[obiw]+'.")
else:
llvm_type, features = m.groups()
offset = None
if 'o' in features:
offset = Immediate('i32', 8)
base = None
if 'b' in features:
base = Register('i64', 'r')
index = None
if 'i' in features:
index = Register('i64', 'r')
width = None
if 'w' in features:
width = Immediate('i32', 8)
return cls(llvm_type, offset=offset, base=base, index=index, width=width)
class Register(Operand):
def __init__(self, llvm_type, constraint_char='r'):
@@ -94,7 +152,11 @@ class Register(Operand):
:param s: must have the form: "llvm_type:constraint_char"
"""
return cls(*s.split(':', 1))
llvm_type, constraint_char = s.split(':', 1)
valid_cc = 'rx'
if constraint_char not in valid_cc:
raise ValueError("Invalid constraint character, must be one of {!r}".format(valid_cc))
return cls(llvm_type, constraint_char)
class Synthable:
@@ -120,6 +182,9 @@ class Synthable:
used_registers.add(name)
return name
def get_default_init_values(self):
return [init_value_by_llvm_type[reg.llvm_type] for reg in self.get_source_registers()]
def __repr__(self):
return '{}({})'.format(
self.__class__.__name__,
@@ -146,7 +211,9 @@ class Instruction(Operation):
self.source_operands = source_operands
def get_source_registers(self):
return [sop for sop in self.source_operands if isinstance(sop, Register)]
return [sop for sop in self.source_operands if isinstance(sop, Register)] + \
[r for mop in self.source_operands if isinstance(mop, MemoryReference)
for r in mop.get_registers()]
def get_destination_registers(self):
if isinstance(self.destination_operand, Register):
@@ -154,10 +221,13 @@ class Instruction(Operation):
else:
return []
def build_ir(self, dst_reg_names, src_reg_names, used_registers):
def build_ir(self, dst_reg_names, src_reg_names, used_registers=None):
"""
Build IR string based on in and out operand names and types.
"""
if used_registers is None:
used_registers = set(dst_reg_names + src_reg_names)
# Build constraint string from operands
constraints = ','.join(
['=' + self.destination_operand.get_constraint_char()] +
@@ -176,6 +246,11 @@ class Instruction(Operation):
type=sop.llvm_type,
repr=src_reg_names[i]))
i += 1
elif isinstance(sop, MemoryReference):
operands.append('{type} {repr}'.format(
type=sop.llvm_type,
repr=src_reg_names[i]))
i += 1
else:
raise NotImplementedError("Only register and immediate operands are supported.")
args = ', '.join(operands)
@@ -201,26 +276,30 @@ class Instruction(Operation):
# It is important that the match objects are in reverse order, to allow string replacements
# based on original match group locations
operands = list(reversed(list(re.finditer(r"\{((?:src|dst)+):([^\}]+)\}", s))))
# Destination indices start at 0, source indices at "number of destination operands"
dst_index, src_index = 0, ['dst' in o.group(1) for o in operands].count(True)
# Destination indices start at 0
dst_index = 0
# Source indices at "number of destination operands"
src_index = ['dst' in o.group(1) for o in operands].count(True)
dst_ops = []
src_ops = []
for m in operands:
direction, register_string = m.group(1, 2)
register = Register.from_string(register_string)
direction, operand_string = m.group(1, 2)
operand = Operand.from_string(operand_string)
if 'src' in direction and not 'dst' in direction:
src_ops.append(register)
src_ops.append(operand)
# replace with index string
instruction = (instruction[:m.start()] + "${}".format(src_index)
+ instruction[m.end():])
src_index += 1
if 'dst' in direction:
dst_ops.append(register)
dst_ops.append(operand)
# replace with index string
instruction = (instruction[:m.start()] + "${}".format(dst_index)
+ instruction[m.end():])
if 'src' in direction:
src_ops.append(Register(register_string.split(':', 1)[0], str(dst_index)))
src_ops.append(Register(operand_string.split(':', 1)[0], str(dst_index)))
src_index += 1
dst_index += 1
if len(dst_ops) != 1:
@@ -310,7 +389,7 @@ class Serialized(Synthable):
if not src_match:
src_naming.append(init_value_by_llvm_type[src.llvm_type])
if not match:
raise ValueError("Unable to find match.")
raise NotSerializableError("Unable to find match.")
if i == len(self.synths) - 1:
# last destination is passed in from outside

View File

@@ -473,7 +473,9 @@ def main():
instructions_ret_type[instr_op.get_destination_registers()[0].llvm_type][
instr_name] = (instr_name, instr_op)
# Constructing random benchmarks, one for each return type
for t in instructions_ret_type:
random.seed(42)
parallel_factor = 8
for t in sorted(instructions_ret_type):
valid = False
while not valid:
selected_names, selected_instrs = zip(
@@ -485,14 +487,22 @@ def main():
valid = True
serial = op.Serialized(selected_instrs)
p = op.Parallelized([serial] * 10)
p = op.Parallelized([serial] * parallel_factor)
init_values = [op.init_value_by_llvm_type[reg.llvm_type] for reg in
p.get_source_registers()]
b = bench.IntegerLoopBenchmark(p, init_values)
print(selected_names)
pprint(selected_instrs)
print(b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2))
print('## Selected Instructions')
print(', '.join(selected_names))
print('## Generated Assembly ({}x parallel)'.format(parallel_factor))
print(b.get_assembly())
#pprint(selected_instrs)
r = b.build_and_execute(repeat=4, min_elapsed=0.1, max_elapsed=0.2)
r['parallel_factor'] = parallel_factor
print('## Detailed Results')
pprint(r)
print("minimal throughput: {:.2f} cy".format(
min(r['runtimes'])/r['iterations']*r['frequency']/parallel_factor))
def can_serialize(instr):