more interfaces

This commit is contained in:
JanLJL
2019-08-20 18:50:57 +02:00
parent e468db4a0d
commit de2ba87d6b
7 changed files with 507 additions and 43 deletions

29
osaca/api/asmbench_interface.py Executable file
View File

@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import os
class AsmBenchAPI(object):
def __init__(self, isa):
# TODO
self.isa = isa.lower()
def create_ubenchmark(self):
# TODO
if self.isa == 'aarch64':
self.create_ubench_aarch64()
elif self.isa == 'x86':
self.create_ubench_x86()
def import_asmbench_output(self, filepath):
# TODO
assert os.path.exists(filepath)
raise NotImplementedError
def create_ubench_aarch(self):
# TODO
raise NotImplementedError
def create_ubench_x86(self):
# TODO
raise NotImplementedError

View File

@@ -40,6 +40,44 @@ def add_entry_to_db(arch: str, entry):
__dump_data_to_yaml(filepath, data)
def add_entries_to_db(arch: str, entries: list) -> None:
"""Adds entries to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entries: :class:`list` of DB entries which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure', 'uops'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
# check parameter of entry and append it to list
for entry in entries:
if 'name' not in entry:
print(
'No name for instruction \n\t{}\nspecified. No import possible'.format(entry),
file=sys.stderr,
)
# remove entry from list
entries.remove(entry)
continue
if 'operands' not in entry:
entry['operands'] = None
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
def sanity_check(arch: str, verbose=False):
# load arch machine model
arch_mm = MachineModel(arch=arch)
@@ -245,44 +283,6 @@ def _get_full_instruction_name(instruction_form):
return '{} {}'.format(instruction_form['name'], ','.join(operands))
def add_entries_to_db(arch: str, entries: list) -> None:
"""Adds entries to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entries: :class:`list` of DB entries which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
# check parameter of entry and append it to list
for entry in entries:
if 'name' not in entry:
print(
'No name for instruction \n\t{}\nspecified. No import possible'.format(entry),
file=sys.stderr,
)
# remove entry from list
entries.remove(entry)
continue
if 'operands' not in entry:
entry['operands'] = None
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
def __dump_data_to_yaml(filepath, data):
# first add 'normal' meta data in the right order (no ordered dict yet)
meta_data = dict(data)

101
osaca/api/ibench_interface.py Executable file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
import os
from jinja2 import Template
class IbenchAPI(object):
def __init__(self, isa):
# TODO
self.isa = isa.lower()
def create_ubenchmark(self):
# TODO
if self.isa == 'aarch64':
self.create_ubench_aarch64()
elif self.isa == 'x86':
self.create_ubench_x86()
def import_ibench_output(self, filepath):
# TODO
assert os.path.exists(filepath)
raise NotImplementedError
def create_ubench_aarch(self):
# TODO
raise NotImplementedError
def create_ubench_x86(self):
# TODO
raise NotImplementedError
# TODO
# template_x86 = Template()
template_aarch64 = Template(
'''
#define INSTR {{ instr }}
#define NINST {{ ninst }}
#define N x0
.globl ninst
.data
ninst:
.long NINST
{% if imd %}
IMD:
.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9
{% endif %}
.text
.globl latency
.type latency, @function
.align 32
latency:
{% if vector_regs %}
# push callee-save registers onto stack
sub sp, sp, #64
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
sub sp, sp, #64
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
mov x4, N
fmov v0.2d, #1.00000000
fmov v1.2d, #1.00000000
fmov v2.2d, #1.00000000
fmov v3.2d, #1.00000000
fmov v4.2d, #1.00000000
fmov v5.2d, #1.00000000
fmov v6.2d, #1.00000000
fmov v7.2d, #1.00000000
fmov v8.2d, #1.00000000
fmov v9.2d, #1.00000000
fmov v10.2d, #1.00000000
fmov v11.2d, #1.00000000
fmov v12.2d, #1.00000000
fmov v13.2d, #1.00000000
fmov v14.2d, #1.00000000
fmov v15.2d, #1.00000000
{% endif %}
{% if gp_regs %}
{% endif %}
loop:
{{ loop_kernel }}
subs x4, x4, #1
bne loop
done:
{% if vector_regs %}
# pop callee-save registers from stack
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
add sp, sp, #64
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
add sp, sp, #64
{% endif %}
{% if gp_regs %}
{% endif %}
ret
.size latency, .-latency
'''
)

2
osaca/api/kerncraft_interface.py Normal file → Executable file
View File

@@ -2,7 +2,7 @@
import collections
from osaca import Frontend
from osaca.frontend import Frontend
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, SemanticsAppender,
reduce_to_section)

294
osaca/data/model_importer.py Executable file
View File

@@ -0,0 +1,294 @@
#!/usr/bin/env python3
import argparse
import re
import sys
import xml.etree.ElementTree as ET
from distutils.version import StrictVersion
from itertools import groupby, product
from ruamel import yaml
from osaca.api import add_entries_to_db
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.semantics import MachineModel
ARCH_DICT = {
'vulcan': 'aarch64',
'snb': 'x86',
'ivb': 'x86',
'hsw': 'x86',
'bdw': 'x86',
'skl': 'x86',
'skx': 'x86',
'csx': 'x86',
}
def port_pressure_from_tag_attributes(attrib, arch, ports):
# apply cycles for D ports
data_port = re.compile(r'[0-9]D$')
data_ports = [x[:-1] for x in filter(data_port.match, ports)]
# format attributes
cycles = attrib['ports'].split('+')
cycles = [c.split('*') for c in cycles]
for i, c in enumerate(cycles):
cycles[i][0] = int(c[0])
if str(c[1]).startswith('p'):
cycles[i][1] = [p for p in c[1][1:]]
if data_ports and data_ports == cycles[i][1]:
# uops for data ports
cycles.append([c[0], [x + 'D' for x in data_ports]])
cycles[i][0] = [
cycles[i][0] / num for num in range(1, len(cycles[i][1]) + 1) for _ in range(num)
]
cycles = [list(product(c[0], c[1])) for c in cycles]
all_options = []
# iterate over all combinations of all uop options
for cycles_combs in cycles:
options = []
tmp_opt = []
total = cycles_combs[0][0]
# iterate over all combinations of each uop option
for comb in cycles_combs:
# add options until they reach the total num of uops
tmp_opt.append(comb)
if sum([c[0] for c in tmp_opt]) == total:
# copy this option as one of several to the cycle option list
options.append(tmp_opt.copy())
tmp_opt = []
if len(tmp_opt) != 0:
raise ValueError('Cannot compute port pressure')
options = [x for x, _ in groupby(options)]
all_options.append(options)
all_options = list(product(*all_options))
# find best scheduling
port_pressure = {}
for p in ports:
port_pressure[p] = 0.0
first = calculate_port_pressure(all_options[0])
for key in first:
port_pressure[key] = first[key]
for option in all_options[1:]:
tmp = calculate_port_pressure(option)
if (max(list(tmp.values())) <= max(list(port_pressure.values()))) and (
len(tmp) > len([x for x in port_pressure.values() if x != 0.0])
):
for k in port_pressure:
port_pressure[k] = tmp[k] if k in tmp else 0.0
# check if calculation equals given throughput
if abs(max(list(port_pressure.values())) - float(attrib['TP_ports'])) > 0.01:
print('Contradicting TP value compared to port_pressure. Ignore port pressure.')
for p in port_pressure:
port_pressure[p] = 0.0
return port_pressure
# Also consider DIV pipeline
if 'div_cycles' in attrib:
div_port = re.compile(r'[0-9]DV$')
div_ports = [x for x in filter(div_port.match, ports)]
for dp in div_ports:
port_pressure[dp] += int(attrib['div_cycles']) / len(div_ports)
return port_pressure
def calculate_port_pressure(pp_option):
ports = {}
for option in pp_option:
for port in option:
if port[1] in ports:
ports[port[1]] += port[0]
else:
ports[port[1]] = port[0]
return ports
def extract_paramters(instruction_tag, arch):
isa = ARCH_DICT[arch.lower()]
parser = ParserX86ATT()
if isa == 'aarch64':
parser = ParserAArch64v81()
elif isa == 'x86':
parser = ParserX86ATT()
# Extract parameter components
parameters = [] # used to store string representations
parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib['idx']))
for parameter_tag in parameter_tags:
parameter = {}
# Ignore parameters with suppressed=1
if int(parameter_tag.attrib.get('suppressed', '0')):
continue
p_type = parameter_tag.attrib['type']
if p_type == 'imm':
parameter['class'] = 'immediate'
parameter['imd'] = 'int'
parameters.append(parameter)
elif p_type == 'mem':
parameter['class'] = 'memory'
parameter['base'] = 'gpr'
parameter['offset'] = None
parameter['index'] = None
parameter['scale'] = 1
parameters.append(parameter)
elif p_type == 'reg':
parameter['class'] = 'register'
possible_regs = [
parser.parse_register('%' + r) for r in parameter_tag.text.split(',')
]
if possible_regs[0] is None:
raise ValueError(
'Unknown register type for {} with {}.'.format(
parameter_tag.attrib, parameter_tag.text
)
)
if isa == 'x86':
if parser.is_vector_register(possible_regs[0]['register']):
possible_regs[0]['register']['name'] = possible_regs[0]['register']['name'].lower()[:3]
if 'mask' in possible_regs[0]['register']:
possible_regs[0]['register']['mask'] = True
else:
possible_regs[0]['register']['name'] = 'gpr'
elif isa == 'aarch64':
del possible_regs['register']['name']
for key in possible_regs[0]['register']:
parameter[key] = possible_regs[0]['register'][key]
parameters.append(parameter)
elif p_type == 'relbr':
parameter['class'] = 'identifier'
parameters.append(parameter)
elif p_type == 'agen':
# FIXME actually only address generation
parameter['class'] = 'memory'
parameter['base'] = 'gpr'
parameter['offset'] = None
parameter['index'] = None
parameter['scale'] = 1
parameters.append(parameter)
parameters.append(parameter)
else:
raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
return parameters
def extract_model(tree, arch):
mm = MachineModel(arch.lower())
ports = mm._data['ports']
model_data = []
for instruction_tag in tree.findall('.//instruction'):
ignore = False
mnemonic = instruction_tag.attrib['asm']
# Extract parameter components
try:
parameters = extract_paramters(instruction_tag, arch)
if ARCH_DICT[arch.lower()] == 'x86':
parameters.reverse()
except ValueError as e:
print(e, file=sys.stderr)
# Extract port occupation, throughput and latency
port_pressure, throughput, latency, uops = [], None, None, None
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
if arch_tag is None:
continue
# We collect all measurement and IACA information and compare them later
for measurement_tag in arch_tag.iter('measurement'):
if 'TP_ports' in measurement_tag.attrib:
throughput = measurement_tag.attrib['TP_ports']
else:
throughput = (
measurement_tag.attrib['TP'] if 'TP' in measurement_tag.attrib else None
)
uops = (
int(measurement_tag.attrib['uops']) if 'uops' in measurement_tag.attrib else None
)
if 'ports' in measurement_tag.attrib:
port_pressure.append(
port_pressure_from_tag_attributes(measurement_tag.attrib, arch, ports)
)
latencies = [
int(l_tag.attrib['cycles'])
for l_tag in measurement_tag.iter('latency')
if 'cycles' in l_tag.attrib
]
if len(latencies) == 0:
latencies = [
int(l_tag.attrib['max_cycles'])
for l_tag in measurement_tag.iter('latency')
if 'max_cycles' in l_tag.attrib
]
if latencies[1:] != latencies[:-1]:
print("Contradicting latencies found:", mnemonic, file=sys.stderr)
ignore = True
elif latencies:
latency = latencies[0]
# Ordered by IACA version (newest last)
for iaca_tag in sorted(
arch_tag.iter('IACA'), key=lambda i: StrictVersion(i.attrib['version'])
):
if 'ports' in iaca_tag.attrib:
port_pressure.append(
port_pressure_from_tag_attributes(iaca_tag.attrib, arch, ports)
)
if ignore:
continue
# Check if all are equal
if port_pressure:
if port_pressure[1:] != port_pressure[:-1]:
print(
"Contradicting port occupancies, using latest IACA:", mnemonic, file=sys.stderr
)
port_pressure = port_pressure[-1]
throughput = max(list(port_pressure.values()) + [0.0])
else:
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
continue
# ---------------------------------------------
model_data.append(
{
'name': mnemonic,
'operands': parameters,
'uops': uops,
'throughput': throughput,
'latency': latency,
'port_pressure': port_pressure,
}
)
return model_data
def architectures(tree):
return set([a.attrib['name'] for a in tree.findall('.//architecture')])
def main():
parser = argparse.ArgumentParser()
parser.add_argument('xml', help='path of instructions.xml from http://uops.info')
parser.add_argument(
'arch',
nargs='?',
help='architecture to extract, use IACA abbreviations (e.g., SNB). '
'if not given, all will be extracted and saved to file in CWD.',
)
args = parser.parse_args()
tree = ET.parse(args.xml)
if args.arch:
model_data = extract_model(tree, args.arch)
print(yaml.dump(model_data, allow_unicode=True))
else:
for arch in architectures(tree):
model_data = extract_model(tree, arch)
add_entries_to_db(arch, model_data)
if __name__ == '__main__':
main()

View File

@@ -391,6 +391,15 @@ class ParserAArch64v81(BaseParser):
exponent *= -1
return float(ieee_val['mantissa']) * (10 ** exponent)
def parse_register(self, register_string):
raise NotImplementedError
def is_gpr(self, register):
raise NotImplementedError
def is_vector_register(self, register):
raise NotImplementedError
def is_reg_dependend_of(self, reg_a, reg_b):
prefixes_gpr = 'wx'
prefixes_vec = 'bhsdqv'

View File

@@ -97,6 +97,24 @@ class ParserX86ATT(BaseParser):
+ pp.Optional(self.comment)
)
def parse_register(self, register_string):
register = pp.Group(
pp.Literal('%')
+ pp.Word(pp.alphanums).setResultsName('name')
+ pp.Optional(
pp.Literal('{')
+ pp.Literal('%')
+ pp.Word(pp.alphanums).setResultsName('mask')
+ pp.Literal('}')
)
).setResultsName(self.REGISTER_ID)
try:
return self.process_operand(
register.parseString(register_string, parseAll=True).asDict()
)
except pp.ParseException:
return None
def parse_line(self, line, line_number=None):
"""
Parse line and return instruction form.
@@ -129,9 +147,7 @@ class ParserX86ATT(BaseParser):
# 2. Parse label
if result is None:
try:
result = self.process_operand(
self.label.parseString(line, parseAll=True).asDict()
)
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
result = AttrDict.convert_dict(result)
instruction_form[self.LABEL_ID] = result[self.LABEL_ID]['name']
if self.COMMENT_ID in result[self.LABEL_ID]:
@@ -297,7 +313,22 @@ class ParserX86ATT(BaseParser):
return False
return True
def is_gpr(self, register):
gpr_parser = (
pp.CaselessLiteral('R')
+ pp.Word(pp.nums).setResultsName('id')
+ pp.Optional(pp.Word('dwbDWB', exact=1))
)
if self.is_basic_gpr(register):
return True
else:
try:
gpr_parser.parseString(register['name'], parseAll=True)
return True
except pp.ParseException:
return False
def is_vector_register(self, register):
if len(register['name']) > 2 and register['name'][1:3] == 'mm':
if len(register['name']) > 2 and register['name'][1:3].lower() == 'mm':
return True
return False