moved db_interface and integrated ibench in it

This commit is contained in:
JanLJL
2019-10-10 17:00:27 +02:00
parent 9656718d7d
commit 5bc201ae3e
7 changed files with 177 additions and 477 deletions

View File

@@ -1,9 +1,8 @@
"""
APIs for handling interfaces to kerncraft, ibench, etc.
APIs for handling interfaces to kerncraft, etc.
Only the classes below will be exported, so please add new semantic tools to __all__.
"""
from .kerncraft_interface import KerncraftAPI
from .db_interface import add_entry_to_db, add_entries_to_db, sanity_check
__all__ = ['KerncraftAPI', 'add_entry_to_db', 'add_entries_to_db', 'sanity_check']
__all__ = ['KerncraftAPI']

View File

@@ -1,29 +0,0 @@
#!/usr/bin/env python3
import os
class AsmBenchAPI(object):
def __init__(self, isa):
# TODO
self.isa = isa.lower()
def create_ubenchmark(self):
# TODO
if self.isa == 'aarch64':
self.create_ubench_aarch64()
elif self.isa == 'x86':
self.create_ubench_x86()
def import_asmbench_output(self, filepath):
# TODO
assert os.path.exists(filepath)
raise NotImplementedError
def create_ubench_aarch(self):
# TODO
raise NotImplementedError
def create_ubench_x86(self):
# TODO
raise NotImplementedError

View File

@@ -1,426 +0,0 @@
#!/usr/bin/env python3
import copy
import os
from jinja2 import Template
from osaca.parser import ParserAArch64v81, ParserX86ATT
BENCHMARK_DIR = os.path.expanduser('~/.osaca/benchmarks')
class IbenchAPI(object):
def __init__(self, isa):
self.isa = isa.lower()
self.ibench_dir = os.path.join(BENCHMARK_DIR, 'ibench', self.isa)
if not os.path.isdir(self.ibench_dir):
os.makedirs(self.ibench_dir)
def create_ubenchmark(self, instruction_form):
if self.isa == 'aarch64':
self.parser = ParserAArch64v81()
tp_bench, lt_bench = self._create_ubench_aarch(instruction_form)
elif self.isa == 'x86':
self.parser = ParserX86ATT()
tp_bench, lt_bench = self._create_ubench_x86(instruction_form)
if tp_bench is None or lt_bench is None:
return
self._write_benchmark(self._get_ibench_name(instruction_form) + '-TP.S', tp_bench)
self._write_benchmark(self._get_ibench_name(instruction_form) + '-LT.S', lt_bench)
def import_ibench_output(self, filepath):
# TODO
assert os.path.exists(filepath)
raise NotImplementedError
##########################################
##########################################
def _write_benchmark(self, filename, content):
with open(os.path.join(self.ibench_dir, filename), 'w') as f:
f.write(content)
def _get_ibench_name(self, instruction_form):
name = ''
op_sep = '_'
name += instruction_form['instruction'].lower() + '-'
operands = (
instruction_form['operands']['operand_list']
if 'operand_list' in instruction_form['operands']
else instruction_form['operands']
)
if self.isa == 'aarch64':
for op in operands:
if 'register' in op:
name += op['register']['prefix']
name += op['register']['shape'] if 'shape' in op['register'] else ''
elif 'immediate' in op:
name += 'i'
else:
raise NotImplementedError
name += op_sep
elif self.isa == 'x86':
for op in operands:
if 'register' in op:
name += (
'r' if self.parser.is_gpr(op['register']) else op['register']['name'][0]
)
elif 'immediate' in op:
name += 'i'
name += op_sep
else:
raise NotImplementedError(
'Currently only AArch64 and x86 architectures are supported.'
)
return name[:-1]
def _create_ubench_aarch(self, instruction_form, num_instructions=8):
loop_kernel_tp = ''
loop_kernel_lt = ''
vector_regs = False
gp_regs = False
mnemonic = instruction_form['instruction']
operands = (
instruction_form['operands']['operand_list']
if 'operand_list' in instruction_form['operands']
else instruction_form['operands']
)
for op in operands:
if 'register' in op:
if self.parser.is_gpr(op['register']):
gp_regs = True
elif self.parser.is_vector_register(op['register']):
vector_regs = True
elif 'memory' in op:
return None, None
num_regs = len([x for x in operands if 'register' in x])
# throughput benchmark
possible_regs_tp = list(range(5, 16)) + list(range(19, 29))
i = 0
while i < num_instructions * num_regs:
ops = []
for op in operands:
name = possible_regs_tp[i % len(possible_regs_tp)]
ops.append(self._get_aarch_op(op, name))
i += 1 if 'register' in op else 0
line = '\tINSTR {}\n'.format(', '.join(ops))
loop_kernel_tp += line
# latency benchmark
possible_regs_lt = list(range(5, 5 + num_regs))
operands_lt = copy.deepcopy(operands)
for i, x in enumerate(operands_lt):
operands_lt[i] = (
self._get_aarch_op(x, possible_regs_lt.pop())
if 'register' in x
else self._get_aarch_op(x, 0)
)
for i in range(0, 6):
line = '\tINSTR {}\n'.format(', '.join(operands_lt))
loop_kernel_lt += line
operands_lt = self._invert_regs(operands, operands_lt)
args_tp = {
'instr': mnemonic,
'ninst': num_instructions,
'vector_regs': vector_regs,
'gp_regs': gp_regs,
'loop_kernel': loop_kernel_tp,
}
args_lt = {
'instr': mnemonic,
'ninst': 6,
'vector_regs': vector_regs,
'gp_regs': gp_regs,
'loop_kernel': loop_kernel_lt,
}
return template_aarch64.render(**args_tp), template_aarch64.render(**args_lt)
def _create_ubench_x86(self, instruction_form, num_instructions=8):
loop_kernel_tp = ''
loop_kernel_lt = ''
gp_regs = False
AVX = False
AVX512 = False
mnemonic = instruction_form['instruction']
operands = list(
reversed(
copy.deepcopy(
instruction_form['operands']['operand_list']
if 'operand_list' in instruction_form['operands']
else instruction_form['operands']
)
)
)
for op in operands:
if 'register' in op:
if self.parser.is_gpr(op['register']):
gp_regs = True
elif op['register']['name'][0].lower() == 'y':
AVX = True
elif op['register']['name'][0].lower() == 'z':
AVX512 = True
elif 'memory' in op:
return None, None
num_regs = len([x for x in operands if 'register' in x])
# throughput benchmark
possible_regs_tp = {
'gpr': ['ax', 'bx', 'cx', 'dx'] + list(range(9, 16)),
'vector': list(range(0, 16)),
}
gpr_i = 0
vector_i = 0
for i in range(num_instructions):
ops = []
for op in operands:
name = 0
if 'register' in op:
if self.parser.is_gpr(op['register']):
name = possible_regs_tp['gpr'][gpr_i % len(possible_regs_tp['gpr'])]
gpr_i += 1
else:
name = possible_regs_tp['vector'][
vector_i % len(possible_regs_tp['vector'])
]
vector_i += 1
ops.append(self._get_x86_op(op, name))
line = '\tINSTR {}\n'.format(', '.join(ops))
loop_kernel_tp += line
# latency benchmark
possible_regs_lt = list(range(9, 9 + num_regs))
operands_lt = copy.deepcopy(operands)
for i, x in enumerate(operands_lt):
operands_lt[i] = (
self._get_x86_op(x, possible_regs_lt.pop())
if 'register' in x
else self._get_x86_op(x, 0)
)
for i in range(0, 6):
line = '\tINSTR {}\n'.format(', '.join(operands_lt))
loop_kernel_lt += line
operands_lt = self._invert_regs(operands, operands_lt)
args_tp = {
'instr': mnemonic,
'ninst': num_instructions,
'gp_regs': gp_regs,
'AVX': AVX,
'AVX512': AVX512,
'loop_kernel': loop_kernel_tp.rstrip(),
}
args_lt = {
'instr': mnemonic,
'ninst': num_instructions,
'gp_regs': gp_regs,
'AVX': AVX,
'AVX512': AVX512,
'loop_kernel': loop_kernel_lt.rstrip(),
}
return template_x86.render(**args_tp), template_x86.render(**args_lt)
def _get_aarch_op(self, operand, name):
operand = copy.deepcopy(operand)
if 'register' in operand:
operand['register']['name'] = name
return self.parser.get_full_reg_name(operand['register'])
elif 'immediate' in operand:
return '#192'
else:
raise NotImplementedError('Only immediates and register in benchmark allowed')
def _get_x86_op(self, operand, name):
operand = copy.deepcopy(operand)
if 'register' in operand:
reg_type = self.parser.get_reg_type(operand['register'])
reg_type = reg_type[-1] if reg_type.startswith('gp') else reg_type
operand['register']['name'] = reg_type + str(name)
return self.parser.get_full_reg_name(operand['register'])
elif 'immediate' in operand:
return '192'
else:
raise NotImplementedError('Only immediates and register in benchmark allowed')
def _invert_regs(self, operands, operand_str_list):
reg_indices = [i for i, op in enumerate(operands) if 'register' in op]
reg_indices_inverted = list(reversed(reg_indices))
operands_tmp = [None for x in operand_str_list]
for i in range(len(operand_str_list)):
operands_tmp[i] = (
operand_str_list[reg_indices_inverted[i]]
if i in reg_indices
else operand_str_list[i]
)
return operands_tmp
template_aarch64 = Template(
'''#define INSTR {{ instr }}
#define NINST {{ ninst }}
#define N x0
.globl ninst
.data
ninst:
.long NINST
.text
.globl benchmark
.type benchmark, @function
.align 32
benchmark:
{% if vector_regs %}
# push callee-save registers onto stack
sub sp, sp, #64
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
sub sp, sp, #64
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
mov x4, N
fmov v0.2d, #1.20000000
fmov v1.2d, #1.23000000
fmov v2.2d, #1.23400000
fmov v3.2d, #1.23410000
fmov v4.2d, #1.23412000
fmov v5.2d, #1.23412300
fmov v6.2d, #1.23412340
fmov v7.2d, #1.23412341
fmov v8.2d, #2.34123412
fmov v9.2d, #2.34123410
fmov v10.2d, #2.34123400
fmov v11.2d, #2.34123000
fmov v12.2d, #2.34120000
fmov v13.2d, #2.34100000
fmov v14.2d, #2.34000000
fmov v15.2d, #2.30000000
{% endif %}
{% if gp_regs %}
# push callee-save register onto stack
push {x19-x28}
{% endif %}
loop:
subs x4, x4, #1
{{ loop_kernel }}
bne loop
done:
{% if vector_regs %}
# pop callee-save registers from stack
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
add sp, sp, #64
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
add sp, sp, #64
{% endif %}
{% if gp_regs %}
pop {x19-x28}
{% endif %}
ret
.size benchmark, .-benchmark
'''
)
template_x86 = Template(
'''#define INSTR {{ instr }}
#define NINST {{ ninst }}
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.text
.globl benchmark
.type benchmark, @function
.align 32
benchmark:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
{% if gp_regs %}
push rax
push rbx
push rcx
push rdx
push r9
push r10
push r11
push r12
push r13
push r14
push r15
mov rax, 1
mov rbx, 2
mov rcx, 3
mov rdx, 4
mov r9, 5
mov r10, 6
mov r11, 7
mov r12, 8
mov r13, 9
mov r14, 10
mov r15, 11
{% endif %}
# create SP 1.0
vpcmpeqw xmm0, xmm0, xmm0 # all ones
vpslld xmm0, xmm0, 25 # logical left shift: 11111110..0 (25 = 32 - (8 - 1))
vpsrld xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is 0
{% if AVX or AVX512 %}
# expand from SSE to AVX
vinsertf128 ymm0, ymm0, xmm0, 0x1
{% endif %}
{% if AVX512 %}
# expand from AVX to AVX-512
vinsertf64x4 zmm0, zmm0, ymm0, 0x1
{% endif %}
{% if not AVX and not AVX512 %}
# create SP 2.0
vaddps xmm1, xmm0, xmm0
# create SP 0.5
vdivps xmm2, xmm0, xmm1
{% endif %}
{% if AVX and not AVX512 %}
# create SP 2.0
vaddps ymm1, ymm0, ymm0
# create SP 0.5
vdivps ymm2, ymm0, ymm1
{% endif %}
{% if AVX512 %}
# create AVX-512 DP 2.0
vaddps zmm1, zmm0, zmm0
# create AVX-512 DP 0.5
vdivps zmm2, zmm0, zmm1
{% endif %}
loop:
inc i
{{ loop_kernel }}
cmp i, N
jl loop
done:
{% if gp_regs %}
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop rdx
pop rcx
pop rbx
pop rax
{% endif %}
mov rsp, rbp
pop rbp
ret
.size benchmark, .-benchmark
'''
)

View File

@@ -8,7 +8,7 @@ from itertools import groupby, product
from ruamel import yaml
from osaca.api import add_entries_to_db
from osaca.db_interface import add_entries_to_db
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.semantics import MachineModel
@@ -136,9 +136,7 @@ def extract_paramters(instruction_tag, arch):
parameters.append(parameter)
elif p_type == 'reg':
parameter['class'] = 'register'
possible_regs = [
parser.parse_register('%' + r) for r in parameter_tag.text.split(',')
]
possible_regs = [parser.parse_register('%' + r) for r in parameter_tag.text.split(',')]
if possible_regs[0] is None:
raise ValueError(
'Unknown register type for {} with {}.'.format(
@@ -147,7 +145,9 @@ def extract_paramters(instruction_tag, arch):
)
if isa == 'x86':
if parser.is_vector_register(possible_regs[0]['register']):
possible_regs[0]['register']['name'] = possible_regs[0]['register']['name'].lower()[:3]
possible_regs[0]['register']['name'] = possible_regs[0]['register'][
'name'
].lower()[:3]
if 'mask' in possible_regs[0]['register']:
possible_regs[0]['register']['mask'] = True
else:

View File

@@ -1,10 +1,11 @@
#!/usr/bin/env python3
import math
import os
import sys
import warnings
from ruamel import yaml
import ruamel.yaml
from osaca.semantics import MachineModel
@@ -23,8 +24,9 @@ def add_entry_to_db(arch: str, entry):
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/' + arch + '.yml'))
assert os.path.exists(filepath)
yaml = _create_yaml_object()
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
data = yaml.load(f)
# check parameter of entry
if 'name' not in entry:
raise ValueError('No name for instruction specified. No import possible')
@@ -39,7 +41,9 @@ def add_entry_to_db(arch: str, entry):
if 'uops' not in entry:
entry['uops'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
# __dump_data_to_yaml(filepath, data)
with open(filepath, 'w') as f:
yaml.dump(data)
def add_entries_to_db(arch: str, entries: list) -> None:
@@ -56,8 +60,9 @@ def add_entries_to_db(arch: str, entries: list) -> None:
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/' + arch + '.yml'))
assert os.path.exists(filepath)
yaml = _create_yaml_object()
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
data = yaml.load(f)
# check parameter of entry and append it to list
for entry in entries:
if 'name' not in entry:
@@ -79,7 +84,9 @@ def add_entries_to_db(arch: str, entries: list) -> None:
if 'uops' not in entry:
entry['uops'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
# __dump_data_to_yaml(filepath, data)
with open(filepath, 'w') as f:
yaml.dump(data)
def sanity_check(arch: str, verbose=False):
@@ -115,6 +122,137 @@ def sanity_check(arch: str, verbose=False):
)
def import_benchmark_output(arch, bench_type, filepath):
supported_bench_outputs = ['ibench', 'asmbench']
assert os.path.exists(filepath)
if bench_type not in supported_bench_outputs:
raise ValueError('Benchmark type is not supported.')
with open(filepath, 'r') as f:
input_data = f.readlines()
db_entries = None
if bench_type == 'ibench':
db_entries = _get_ibench_output(input_data)
elif bench_type == 'asmbench':
raise NotImplementedError
# write entries to DB
add_entries_to_db(arch, list(db_entries.values()))
##################
# HELPERS IBENCH #
##################
def _get_ibench_output(input_data):
db_entries = {}
for line in input_data:
if 'Using frequency' in line or len(line) == 0:
continue
instruction = line.split(':')[0]
key = '-'.join(instruction.split('-')[:2])
if key in db_entries:
# add only TP/LT value
entry = db_entries[key]
else:
mnemonic = instruction.split('-')[0]
operands = instruction.split('-')[1].split('_')
operands = [_create_db_operand(op) for op in operands]
entry = {
'name': mnemonic,
'operands': operands,
'throughput': None,
'latency': None,
'port_pressure': None,
}
if 'TP' in instruction:
entry['throughput'] = _validate_measurement(float(line.split()[1]), True)
if not entry['throughput']:
warnings.warn(
'Your THROUGHPUT measurement for {} looks suspicious'.format(key)
+ ' and was not added. Please inspect your benchmark.'
)
elif 'LT' in instruction:
entry['latency'] = _validate_measurement(float(line.split()[1]), False)
if not entry['latency']:
warnings.warn(
'Your LATENCY measurement for {} looks suspicious'.format(key)
+ ' and was not added. Please inspect your benchmark.'
)
db_entries[key] = entry
def _validate_measurement(self, measurement, is_tp):
if not is_tp:
if (
math.floor(measurement) * 1.05 >= measurement
or math.ceil(measurement) * 0.95 <= measurement
):
# Value is probably correct, so round it to the estimated value
return float(round(measurement))
# Check reciprocal only if it is a throughput value
else:
reciprocals = [1 / x for x in range(1, 11)]
for reci in reciprocals:
if reci * 0.95 <= measurement <= reci * 1.05:
# Value is probably correct, so round it to the estimated value
return round(reci, 5)
# No value close to an integer or its reciprocal found, we assume the
# measurement is incorrect
return None
def _create_db_operand(self, operand):
if self.isa == 'aarch64':
return self._create_db_operand_aarch64(operand)
elif self.isa == 'x86':
return self._create_db_operand_x86(operand)
def _create_db_operand_aarch64(self, operand):
if operand == 'i':
return {'class': 'immediate', 'imd': 'int'}
elif operand in 'wxbhsdq':
return {'class': 'register', 'prefix': operand}
elif operand.startswith('v'):
return {'class': 'register', 'prefix': 'v', 'shape': operand[1:2]}
elif operand.startswith('m'):
return {
'class': 'memory',
'base': 'gpr' if 'b' in operand else None,
'offset': 'imd' if 'o' in operand else None,
'index': 'gpr' if 'i' in operand else None,
'scale': 8 if 's' in operand else 1,
'pre-indexed': True if 'r' in operand else False,
'post-indexed': True if 'p' in operand else False,
}
else:
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
def _create_db_operand_x86(self, operand):
if operand == 'r':
return {'class': 'register', 'name': 'gpr'}
elif operand in 'xyz':
return {'class': 'register', 'name': operand + 'mm'}
elif operand == 'i':
return {'class': 'immediate', 'imd': 'int'}
elif operand.startswith('m'):
return {
'class': 'memory',
'base': 'gpr' if 'b' in operand else None,
'offset': 'imd' if 'o' in operand else None,
'index': 'gpr' if 'i' in operand else None,
'scale': 8 if 's' in operand else 1,
}
else:
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
########################
# HELPERS SANITY CHECK #
########################
def _check_sanity_arch_db(arch_mm, isa_mm):
suspicious_prefixes_x86 = ['vfm', 'fm']
suspicious_prefixes_arm = ['fml', 'ldp', 'stp', 'str']
@@ -276,6 +414,11 @@ def _print_sanity_report_verbose(
print('{}{}{}'.format(CYAN, _get_full_instruction_name(instr_form), WHITE))
###################
# GENERIC HELPERS #
###################
def _get_full_instruction_name(instruction_form):
operands = []
for op in instruction_form['operands']:
@@ -287,20 +430,30 @@ def _get_full_instruction_name(instruction_form):
return '{} {}'.format(instruction_form['name'], ','.join(operands))
def __represent_none(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:null', u'~')
def _create_yaml_object():
yaml_obj = ruamel.yaml.YAML()
yaml_obj.representer.add_representer(type(None), __represent_none)
return yaml_obj
def __dump_data_to_yaml(filepath, data):
# first add 'normal' meta data in the right order (no ordered dict yet)
meta_data = dict(data)
del meta_data['instruction_forms']
del meta_data['port_model_scheme']
with open(filepath, 'w') as f:
yaml.dump(meta_data, f, allow_unicode=True)
ruamel.yaml.dump(meta_data, f, allow_unicode=True)
with open(filepath, 'a') as f:
# now add port model scheme in |-scheme for better readability
yaml.dump(
ruamel.yaml.dump(
{'port_model_scheme': data['port_model_scheme']},
f,
allow_unicode=True,
default_style='|',
)
# finally, add instruction forms
yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True)
ruamel.yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True)

View File

@@ -8,7 +8,7 @@ import sys
from filecmp import dircmp
from subprocess import call
from osaca.api import sanity_check
from osaca.db_interface import sanity_check, import_benchmark_output
from osaca.frontend import Frontend
from osaca.parser import BaseParser, ParserAArch64v81, ParserX86ATT
from osaca.semantics import (KernelDG, MachineModel, SemanticsAppender,
@@ -138,8 +138,11 @@ def check_user_dir():
)
def import_data(benchmark_type, filepath):
raise NotImplementedError
def import_data(benchmark_type, arch, filepath):
if benchmark_type.lower() == 'ibench':
import_benchmark_output(arch, 'ibench', filepath)
else:
raise NotImplementedError('This benchmark input variant is not implemented yet.')
def insert_byte_marker(args):
@@ -206,7 +209,7 @@ def run(args, output_file=sys.stdout):
sanity_check(args.arch, verbose=verbose)
if 'import_data' in args:
# Import microbench output file into DB
import_data(args.import_data, args.file)
import_data(args.import_data, args.arch, args.file)
if args.insert_marker:
# Try to add IACA marker
insert_byte_marker(args)

View File

@@ -316,14 +316,14 @@ class SemanticsAppender(object):
def _get_regular_source_x86ATT(self, instruction_form):
# return all but last operand
sources = [
op for op in instruction_form['operands'][0: len(instruction_form['operands']) - 1]
op for op in instruction_form['operands'][0:len(instruction_form['operands']) - 1]
]
return sources
def _get_regular_source_AArch64(self, instruction_form):
# return all but first operand
sources = [
op for op in instruction_form['operands'][1: len(instruction_form['operands'])]
op for op in instruction_form['operands'][1:len(instruction_form['operands'])]
]
return sources