finished DB sanity check and cleaned DBs

This commit is contained in:
JanLJL
2019-08-13 18:13:41 +02:00
parent daf2242b7c
commit a18a122aa2
9 changed files with 473 additions and 108 deletions

9
osaca/api/__init__.py Normal file
View File

@@ -0,0 +1,9 @@
"""
APIs for handling interfaces to kerncraft, ibench, etc.
Only the classes below will be exported, so please add new semantic tools to __all__.
"""
from .kerncraft_interface import KerncraftAPI
from .db_interface import add_entry_to_db, add_entries_to_db, sanity_check
__all__ = ['KerncraftAPI', 'add_entry_to_db', 'add_entries_to_db', 'sanity_check']

302
osaca/api/db_interface.py Executable file
View File

@@ -0,0 +1,302 @@
#!/usr/bin/env python3
import os
import sys
import warnings
from ruamel import yaml
from osaca.semantics import MachineModel
def add_entry_to_db(arch: str, entry):
"""Adds entry to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entry: DB entry which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
# check parameter of entry
if 'name' not in entry:
raise ValueError('No name for instruction specified. No import possible')
if 'operands' not in entry:
entry['operands'] = None
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
def sanity_check(arch: str, verbose=False):
# load arch machine model
arch_mm = MachineModel(arch=arch)
data = arch_mm['instruction_forms']
# load isa machine model
isa = arch_mm.get_ISA()
isa_mm = MachineModel(arch='isa/{}'.format(isa))
num_of_instr = len(data)
# check arch DB entries
(
missing_throughput,
missing_latency,
missing_port_pressure,
suspicious_instructions,
duplicate_instr_arch,
) = _check_sanity_arch_db(arch_mm, isa_mm)
# check ISA DB entries
duplicate_instr_isa, only_in_isa = _check_sanity_isa_db(arch_mm, isa_mm)
_print_sanity_report(
num_of_instr,
missing_throughput,
missing_latency,
missing_port_pressure,
suspicious_instructions,
duplicate_instr_arch,
duplicate_instr_isa,
only_in_isa,
verbose=verbose,
)
def _check_sanity_arch_db(arch_mm, isa_mm):
suspicious_prefixes_x86 = ['vfm', 'fm']
suspicious_prefixes_arm = ['fml', 'ldp', 'stp', 'str']
if arch_mm.get_ISA() == 'AArch64':
suspicious_prefixes = suspicious_prefixes_arm
if arch_mm.get_ISA() == 'x86':
suspicious_prefixes = suspicious_prefixes_x86
port_num = len(arch_mm['ports'])
# returned lists
missing_throughput = []
missing_latency = []
missing_port_pressure = []
suspicious_instructions = []
duplicate_instr_arch = []
for instr_form in arch_mm['instruction_forms']:
# check value in DB entry
if instr_form['throughput'] is None:
missing_throughput.append(instr_form)
if instr_form['latency'] is None:
missing_latency.append(instr_form)
if instr_form['port_pressure'] is None:
missing_port_pressure.append(instr_form)
elif len(instr_form['port_pressure']) != port_num:
warnings.warn(
'Invalid number of ports:\n {}'.format(_get_full_instruction_name(instr_form))
)
# check entry against ISA DB
for prefix in suspicious_prefixes:
if instr_form['name'].startswith(prefix):
# check if instruction in ISA DB
if isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
# if not, mark them as suspicious and print it on the screen
suspicious_instructions.append(instr_form)
# check for duplicates in DB
if arch_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
duplicate_instr_arch.append(instr_form)
# every entry exists twice --> uniquify
tmp_list = []
for i in range(0, len(duplicate_instr_arch)):
tmp = duplicate_instr_arch.pop()
if tmp not in duplicate_instr_arch:
tmp_list.append(tmp)
duplicate_instr_arch = tmp_list
return (
missing_throughput,
missing_latency,
missing_port_pressure,
suspicious_instructions,
duplicate_instr_arch,
)
def _check_sanity_isa_db(arch_mm, isa_mm):
# returned lists
duplicate_instr_isa = []
only_in_isa = []
for instr_form in isa_mm['instruction_forms']:
# check if instr is missing in arch DB
if arch_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
only_in_isa.append(instr_form)
# check for duplicates
if isa_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
duplicate_instr_isa.append(instr_form)
# every entry exists twice --> uniquify
tmp_list = []
for i in range(0, len(duplicate_instr_isa)):
tmp = duplicate_instr_isa.pop()
if tmp not in duplicate_instr_isa:
tmp_list.append(tmp)
duplicate_instr_isa = tmp_list
return duplicate_instr_isa, only_in_isa
def _print_sanity_report(
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False
):
# non-verbose summary
print('SUMMARY\n----------------------')
print(
'{}% ({}/{}) of instruction forms have no throughput value.'.format(
round(100 * len(m_tp) / total), len(m_tp), total
)
)
print(
'{}% ({}/{}) of instruction forms have no latency value.'.format(
round(100 * len(m_l) / total), len(m_l), total
)
)
print(
'{}% ({}/{}) of instruction forms have no port pressure assignment.'.format(
round(100 * len(m_pp) / total), len(m_pp), total
)
)
print(
'{}% ({}/{}) of instruction forms might miss an ISA DB entry.'.format(
round(100 * len(suspic_instr) / total), len(suspic_instr), total
)
)
print('{} duplicate instruction forms in uarch DB.'.format(len(dup_arch)))
print('{} duplicate instruction forms in ISA DB.'.format(len(dup_isa)))
print(
'{} instruction forms in ISA DB are not referenced by instruction '.format(len(only_isa))
+ 'forms in uarch DB.'
)
print('----------------------\n')
# verbose version
if verbose:
_print_sanity_report_verbose(
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
)
def _print_sanity_report_verbose(
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
):
BRIGHT_CYAN = '\033[1;36;1m'
BRIGHT_BLUE = '\033[1;34;1m'
BRIGHT_RED = '\033[1;31;1m'
BRIGHT_MAGENTA = '\033[1;35;1m'
BRIGHT_YELLOW = '\033[1;33;1m'
CYAN = '\033[36m'
YELLOW = '\033[33m'
WHITE = '\033[0m'
print('Instruction forms without throughput value:\n' if len(m_tp) != 0 else '', end='')
for instr_form in m_tp:
print('{}{}{}'.format(BRIGHT_BLUE, _get_full_instruction_name(instr_form), WHITE))
print('Instruction forms without latency value:\n' if len(m_l) != 0 else '', end='')
for instr_form in m_l:
print('{}{}{}'.format(BRIGHT_RED, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms without port pressure assignment:\n' if len(m_pp) != 0 else '', end=''
)
for instr_form in m_pp:
print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms which might miss an ISA DB entry:\n' if len(suspic_instr) != 0 else '',
end='',
)
for instr_form in suspic_instr:
print('{}{}{}'.format(BRIGHT_CYAN, _get_full_instruction_name(instr_form), WHITE))
print('Duplicate instruction forms in uarch DB:\n' if len(dup_arch) != 0 else '', end='')
for instr_form in dup_arch:
print('{}{}{}'.format(YELLOW, _get_full_instruction_name(instr_form), WHITE))
print('Duplicate instruction forms in ISA DB:\n' if len(dup_isa) != 0 else '', end='')
for instr_form in dup_isa:
print('{}{}{}'.format(BRIGHT_YELLOW, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms existing in ISA DB but not in uarch DB:\n'
if len(only_isa) != 0
else '',
end='',
)
for instr_form in only_isa:
print('{}{}{}'.format(CYAN, _get_full_instruction_name(instr_form), WHITE))
def _get_full_instruction_name(instruction_form):
operands = []
for op in instruction_form['operands']:
op_attrs = [
y + ':' + str(op[y])
for y in list(filter(lambda x: True if x != 'class' else False, op))
]
operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
return '{} {}'.format(instruction_form['name'], ','.join(operands))
def add_entries_to_db(arch: str, entries: list) -> None:
"""Adds entries to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entries: :class:`list` of DB entries which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
# check parameter of entry and append it to list
for entry in entries:
if 'name' not in entry:
print(
'No name for instruction \n\t{}\nspecified. No import possible'.format(entry),
file=sys.stderr,
)
# remove entry from list
entries.remove(entry)
continue
if 'operands' not in entry:
entry['operands'] = None
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
def __dump_data_to_yaml(filepath, data):
# first add 'normal' meta data in the right order (no ordered dict yet)
meta_data = dict(data)
del meta_data['instruction_forms']
del meta_data['port_model_scheme']
with open(filepath, 'w') as f:
yaml.dump(meta_data, f, allow_unicode=True)
with open(filepath, 'a') as f:
# now add port model scheme in |-scheme for better readability
yaml.dump(
{'port_model_scheme': data['port_model_scheme']},
f,
allow_unicode=True,
default_style='|',
)
# finally, add instruction forms
yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True)

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env python3
import collections
from osaca import Frontend
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, SemanticsAppender,
reduce_to_section)
class KerncraftAPI(object):
def __init__(self, arch):
self.machine_model = MachineModel(arch=arch)
self.semantics = SemanticsAppender(self.machine_model)
isa = self.machine_model.get_ISA()
if isa == 'AArch64':
self.parser = ParserAArch64v81()
elif isa == 'x86':
self.parser = ParserX86ATT()
def analyze_code(self, code):
parsed_code = self.parser.parse_file(code)
kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA())
for i in range(len(kernel)):
self.semantics.assign_src_dst(kernel[i])
self.semantics.assign_tp_lt(kernel[i])
return kernel
def create_output(self, kernel, show_lineno=False):
kernel_graph = KernelDG(kernel, self.parser, self.machine_model)
frontend = Frontend(arch=self.machine_model.get_arch())
frontend.print_throughput_analysis(kernel, show_lineno=show_lineno)
frontend.print_latency_analysis(kernel_graph.get_critical_path())
def get_unmatched_instruction_ratio(self, kernel):
unmatched_counter = 0
for instruction in kernel:
if (
INSTR_FLAGS.TP_UNKWN in instruction['flags']
and INSTR_FLAGS.LT_UNKWN in instruction['flags']
):
unmatched_counter += 1
return unmatched_counter / len(kernel)
def get_port_occupation_cycles(self, kernel):
throughput_values = self.semantics.get_throughput_sum(kernel)
port_names = self.machine_model['ports']
return collections.OrderedDict(list(zip(port_names, throughput_values)))
def get_total_throughput(self, kernel):
return max(self.semantics.get_throughput_sum(kernel))
def get_latency(self, kernel):
kernel_graph = KernelDG(kernel, self.parser, self.machine_model)
return sum([x if x['latency'] is not None else 0 for x in kernel_graph])

View File

@@ -1,94 +0,0 @@
#!/usr/bin/env python3
import os
import sys
from ruamel import yaml
def add_entry_to_db(arch: str, entry):
"""Adds entry to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entry: DB entry which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
# check parameter of entry
if 'name' not in entry:
raise ValueError('No name for instruction specified. No import possible')
if 'operands' not in entry:
entry['operands'] = None
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
def add_entries_to_db(arch: str, entries: list) -> None:
"""Adds entries to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entries: :class:`list` of DB entries which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
data = yaml.load(f, Loader=yaml.Loader)
# check parameter of entry and append it to list
for entry in entries:
if 'name' not in entry:
print(
'No name for instruction \n\t{}\nspecified. No import possible'.format(entry),
file=sys.stderr,
)
# remove entry from list
entries.remove(entry)
continue
if 'operands' not in entry:
entry['operands'] = None
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
data['instruction_forms'].append(entry)
__dump_data_to_yaml(filepath, data)
def __dump_data_to_yaml(filepath, data):
# first add 'normal' meta data in the right order (no ordered dict yet)
meta_data = dict(data)
del meta_data['instruction_forms']
del meta_data['port_model_scheme']
with open(filepath, 'w') as f:
yaml.dump(meta_data, f, allow_unicode=True)
with open(filepath, 'a') as f:
# now add port model scheme in |-scheme for better readability
yaml.dump(
{'port_model_scheme': data['port_model_scheme']},
f,
allow_unicode=True,
default_style='|',
)
# finally, add instruction forms
yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True)

View File

@@ -1,5 +1,6 @@
osaca_version: 0.3.0
micro_architecture: "Cascade Lake SP"
arch_code: "CSX"
isa: "x86"
port_model_scheme: |
┌------------------------------------------------------------------------┐

View File

@@ -57,6 +57,25 @@ instruction_forms:
pre-indexed: false
post-indexed: false
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "d"
source: false
destination: true
- class: "register"
prefix: "d"
source: false
destination: true
- class: "memory"
base: "x"
offset: "imd"
index: ~
scale: 1
pre-indexed: false
post-indexed: true
source: true
destination: false
- name: "ldp"
operands:
@@ -115,6 +134,25 @@ instruction_forms:
post-indexed: true
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "x"
offset: ~
index: ~
scale: 1
pre-indexed: false
post-indexed: false
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
@@ -191,6 +229,25 @@ instruction_forms:
post-indexed: false
source: false
destination: true
- name: "stp"
operands:
- class: "register"
prefix: "q"
source: true
destination: false
- class: "register"
prefix: "q"
source: true
destination: false
- class: "memory"
base: "x"
offset: ~
index: ~
scale: 1
pre-indexed: false
post-indexed: True
source: false
destination: true
- name: "stp"
operands:
- class: "register"
@@ -252,7 +309,7 @@ instruction_forms:
index: ~
scale: 1
pre-indexed: false
post-indexed: false
post-indexed: true
source: false
destination: true
- name: "str"
@@ -264,7 +321,7 @@ instruction_forms:
- class: "memory"
base: "x"
offset: ~
index: "x"
index: ~
scale: 1
pre-indexed: false
post-indexed: true

View File

@@ -83,16 +83,6 @@ instruction_forms:
name: "xmm"
source: true
destination: false
- name: vaddsd
operands:
- class: "register"
name: "xmm"
source: true
destination: true
- class: "register"
name: "xmm"
source: true
destination: false
- name: vfmadd132pd
operands:
- class: "memory"

View File

@@ -293,6 +293,22 @@ instruction_forms:
throughput: 1.0
latency: ~ # 0 0DV 1 1DV 2 3 4 5
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0]
- name: "ldp"
operands:
- class: "register"
prefix: "d"
- class: "register"
prefix: "d"
- class: "memory"
base: "x"
offset: ~
index: ~
scale: 1
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: ~ # 0 0DV 1 1DV 2 3 4 5
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0]
- name: "ldr"
operands:
- class: "register"

View File

@@ -71,6 +71,17 @@ class MachineModel(object):
######################################################
def _check_for_duplicate(self, name, operands):
matches = [
instruction_form
for instruction_form in self._data['instruction_forms']
if instruction_form['name'] == name
and self._match_operands(instruction_form['operands'], operands)
]
if len(matches) > 1:
return True
return False
def _match_operands(self, i_operands, operands):
if isinstance(operands, dict):
operands = operands['operand_list']
@@ -86,12 +97,15 @@ class MachineModel(object):
return False
def _check_operands(self, i_operands, operands):
if self._data['isa'] == 'AArch64':
if self._data['isa'].lower() == 'aarch64':
return self._check_AArch64_operands(i_operands, operands)
if self._data['isa'] == 'x86':
if self._data['isa'].lower() == 'x86':
return self._check_x86_operands(i_operands, operands)
def _check_AArch64_operands(self, i_operand, operand):
if 'class' in operand:
# compare two DB entries
return self._compare_db_entries(i_operand, operand)
# register
if 'register' in operand:
if i_operand['class'] != 'register':
@@ -120,6 +134,9 @@ class MachineModel(object):
return False
def _check_x86_operands(self, i_operand, operand):
if 'class' in operand:
# compare two DB entries
return self._compare_db_entries(i_operand, operand)
# register
if 'register' in operand:
if i_operand['class'] != 'register':
@@ -137,6 +154,18 @@ class MachineModel(object):
if 'identifier' in operand:
return i_operand['class'] == 'identifier'
def _compare_db_entries(self, operand_1, operand_2):
operand_attributes = list(
filter(lambda x: True if x != 'source' and x != 'destination' else False, operand_1)
)
for key in operand_attributes:
try:
if operand_1[key] != operand_2[key]:
return False
except KeyError:
return False
return True
def _is_AArch64_reg_type(self, i_reg, reg):
if reg['prefix'] != i_reg['prefix']:
return False