diff --git a/osaca/api/__init__.py b/osaca/api/__init__.py new file mode 100644 index 0000000..8b4f510 --- /dev/null +++ b/osaca/api/__init__.py @@ -0,0 +1,9 @@ +""" +APIs for handling interfaces to kerncraft, ibench, etc. + +Only the classes below will be exported, so please add new semantic tools to __all__. +""" +from .kerncraft_interface import KerncraftAPI +from .db_interface import add_entry_to_db, add_entries_to_db, sanity_check + +__all__ = ['KerncraftAPI', 'add_entry_to_db', 'add_entries_to_db', 'sanity_check'] diff --git a/osaca/api/db_interface.py b/osaca/api/db_interface.py new file mode 100755 index 0000000..9ee31fb --- /dev/null +++ b/osaca/api/db_interface.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 + +import os +import sys +import warnings + +from ruamel import yaml + +from osaca.semantics import MachineModel + + +def add_entry_to_db(arch: str, entry): + """Adds entry to the user database in ~/.osaca/data + + Args: + arch: string representation of the architecture as abbreviation. + Database for this architecture must already exist. + entry: DB entry which will be added. Should consist at best out of + 'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...), + 'throughput', 'latency', 'port_pressure'. + """ + # load yaml + arch = arch.lower() + filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml')) + assert os.path.exists(filepath) + with open(filepath, 'r') as f: + data = yaml.load(f, Loader=yaml.Loader) + # check parameter of entry + if 'name' not in entry: + raise ValueError('No name for instruction specified. No import possible') + if 'operands' not in entry: + entry['operands'] = None + if 'throughput' not in entry: + entry['throughput'] = None + if 'latency' not in entry: + entry['latency'] = None + if 'port_pressure' not in entry: + entry['port_pressure'] = None + data['instruction_forms'].append(entry) + __dump_data_to_yaml(filepath, data) + + +def sanity_check(arch: str, verbose=False): + # load arch machine model + arch_mm = MachineModel(arch=arch) + data = arch_mm['instruction_forms'] + # load isa machine model + isa = arch_mm.get_ISA() + isa_mm = MachineModel(arch='isa/{}'.format(isa)) + num_of_instr = len(data) + + # check arch DB entries + ( + missing_throughput, + missing_latency, + missing_port_pressure, + suspicious_instructions, + duplicate_instr_arch, + ) = _check_sanity_arch_db(arch_mm, isa_mm) + # check ISA DB entries + duplicate_instr_isa, only_in_isa = _check_sanity_isa_db(arch_mm, isa_mm) + + _print_sanity_report( + num_of_instr, + missing_throughput, + missing_latency, + missing_port_pressure, + suspicious_instructions, + duplicate_instr_arch, + duplicate_instr_isa, + only_in_isa, + verbose=verbose, + ) + + +def _check_sanity_arch_db(arch_mm, isa_mm): + suspicious_prefixes_x86 = ['vfm', 'fm'] + suspicious_prefixes_arm = ['fml', 'ldp', 'stp', 'str'] + if arch_mm.get_ISA() == 'AArch64': + suspicious_prefixes = suspicious_prefixes_arm + if arch_mm.get_ISA() == 'x86': + suspicious_prefixes = suspicious_prefixes_x86 + port_num = len(arch_mm['ports']) + + # returned lists + missing_throughput = [] + missing_latency = [] + missing_port_pressure = [] + suspicious_instructions = [] + duplicate_instr_arch = [] + + for instr_form in arch_mm['instruction_forms']: + # check value in DB entry + if instr_form['throughput'] is None: + missing_throughput.append(instr_form) + if instr_form['latency'] is None: + missing_latency.append(instr_form) + if instr_form['port_pressure'] is None: + missing_port_pressure.append(instr_form) + elif len(instr_form['port_pressure']) != port_num: + warnings.warn( + 'Invalid number of ports:\n {}'.format(_get_full_instruction_name(instr_form)) + ) + # check entry against ISA DB + for prefix in suspicious_prefixes: + if instr_form['name'].startswith(prefix): + # check if instruction in ISA DB + if isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None: + # if not, mark them as suspicious and print it on the screen + suspicious_instructions.append(instr_form) + # check for duplicates in DB + if arch_mm._check_for_duplicate(instr_form['name'], instr_form['operands']): + duplicate_instr_arch.append(instr_form) + # every entry exists twice --> uniquify + tmp_list = [] + for i in range(0, len(duplicate_instr_arch)): + tmp = duplicate_instr_arch.pop() + if tmp not in duplicate_instr_arch: + tmp_list.append(tmp) + duplicate_instr_arch = tmp_list + return ( + missing_throughput, + missing_latency, + missing_port_pressure, + suspicious_instructions, + duplicate_instr_arch, + ) + + +def _check_sanity_isa_db(arch_mm, isa_mm): + # returned lists + duplicate_instr_isa = [] + only_in_isa = [] + + for instr_form in isa_mm['instruction_forms']: + # check if instr is missing in arch DB + if arch_mm.get_instruction(instr_form['name'], instr_form['operands']) is None: + only_in_isa.append(instr_form) + # check for duplicates + if isa_mm._check_for_duplicate(instr_form['name'], instr_form['operands']): + duplicate_instr_isa.append(instr_form) + # every entry exists twice --> uniquify + tmp_list = [] + for i in range(0, len(duplicate_instr_isa)): + tmp = duplicate_instr_isa.pop() + if tmp not in duplicate_instr_isa: + tmp_list.append(tmp) + duplicate_instr_isa = tmp_list + + return duplicate_instr_isa, only_in_isa + + +def _print_sanity_report( + total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False +): + # non-verbose summary + print('SUMMARY\n----------------------') + print( + '{}% ({}/{}) of instruction forms have no throughput value.'.format( + round(100 * len(m_tp) / total), len(m_tp), total + ) + ) + print( + '{}% ({}/{}) of instruction forms have no latency value.'.format( + round(100 * len(m_l) / total), len(m_l), total + ) + ) + print( + '{}% ({}/{}) of instruction forms have no port pressure assignment.'.format( + round(100 * len(m_pp) / total), len(m_pp), total + ) + ) + print( + '{}% ({}/{}) of instruction forms might miss an ISA DB entry.'.format( + round(100 * len(suspic_instr) / total), len(suspic_instr), total + ) + ) + print('{} duplicate instruction forms in uarch DB.'.format(len(dup_arch))) + print('{} duplicate instruction forms in ISA DB.'.format(len(dup_isa))) + print( + '{} instruction forms in ISA DB are not referenced by instruction '.format(len(only_isa)) + + 'forms in uarch DB.' + ) + print('----------------------\n') + # verbose version + if verbose: + _print_sanity_report_verbose( + total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa + ) + + +def _print_sanity_report_verbose( + total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa +): + BRIGHT_CYAN = '\033[1;36;1m' + BRIGHT_BLUE = '\033[1;34;1m' + BRIGHT_RED = '\033[1;31;1m' + BRIGHT_MAGENTA = '\033[1;35;1m' + BRIGHT_YELLOW = '\033[1;33;1m' + CYAN = '\033[36m' + YELLOW = '\033[33m' + WHITE = '\033[0m' + + print('Instruction forms without throughput value:\n' if len(m_tp) != 0 else '', end='') + for instr_form in m_tp: + print('{}{}{}'.format(BRIGHT_BLUE, _get_full_instruction_name(instr_form), WHITE)) + print('Instruction forms without latency value:\n' if len(m_l) != 0 else '', end='') + for instr_form in m_l: + print('{}{}{}'.format(BRIGHT_RED, _get_full_instruction_name(instr_form), WHITE)) + print( + 'Instruction forms without port pressure assignment:\n' if len(m_pp) != 0 else '', end='' + ) + for instr_form in m_pp: + print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE)) + print( + 'Instruction forms which might miss an ISA DB entry:\n' if len(suspic_instr) != 0 else '', + end='', + ) + for instr_form in suspic_instr: + print('{}{}{}'.format(BRIGHT_CYAN, _get_full_instruction_name(instr_form), WHITE)) + print('Duplicate instruction forms in uarch DB:\n' if len(dup_arch) != 0 else '', end='') + for instr_form in dup_arch: + print('{}{}{}'.format(YELLOW, _get_full_instruction_name(instr_form), WHITE)) + print('Duplicate instruction forms in ISA DB:\n' if len(dup_isa) != 0 else '', end='') + for instr_form in dup_isa: + print('{}{}{}'.format(BRIGHT_YELLOW, _get_full_instruction_name(instr_form), WHITE)) + print( + 'Instruction forms existing in ISA DB but not in uarch DB:\n' + if len(only_isa) != 0 + else '', + end='', + ) + for instr_form in only_isa: + print('{}{}{}'.format(CYAN, _get_full_instruction_name(instr_form), WHITE)) + + +def _get_full_instruction_name(instruction_form): + operands = [] + for op in instruction_form['operands']: + op_attrs = [ + y + ':' + str(op[y]) + for y in list(filter(lambda x: True if x != 'class' else False, op)) + ] + operands.append('{}({})'.format(op['class'], ','.join(op_attrs))) + return '{} {}'.format(instruction_form['name'], ','.join(operands)) + + +def add_entries_to_db(arch: str, entries: list) -> None: + """Adds entries to the user database in ~/.osaca/data + + Args: + arch: string representation of the architecture as abbreviation. + Database for this architecture must already exist. + entries: :class:`list` of DB entries which will be added. Should consist at best out of + 'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...), + 'throughput', 'latency', 'port_pressure'. + """ + # load yaml + arch = arch.lower() + filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml')) + assert os.path.exists(filepath) + with open(filepath, 'r') as f: + data = yaml.load(f, Loader=yaml.Loader) + # check parameter of entry and append it to list + for entry in entries: + if 'name' not in entry: + print( + 'No name for instruction \n\t{}\nspecified. No import possible'.format(entry), + file=sys.stderr, + ) + # remove entry from list + entries.remove(entry) + continue + if 'operands' not in entry: + entry['operands'] = None + if 'throughput' not in entry: + entry['throughput'] = None + if 'latency' not in entry: + entry['latency'] = None + if 'port_pressure' not in entry: + entry['port_pressure'] = None + data['instruction_forms'].append(entry) + __dump_data_to_yaml(filepath, data) + + +def __dump_data_to_yaml(filepath, data): + # first add 'normal' meta data in the right order (no ordered dict yet) + meta_data = dict(data) + del meta_data['instruction_forms'] + del meta_data['port_model_scheme'] + with open(filepath, 'w') as f: + yaml.dump(meta_data, f, allow_unicode=True) + with open(filepath, 'a') as f: + # now add port model scheme in |-scheme for better readability + yaml.dump( + {'port_model_scheme': data['port_model_scheme']}, + f, + allow_unicode=True, + default_style='|', + ) + # finally, add instruction forms + yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True) diff --git a/osaca/api/kerncraft_interface.py b/osaca/api/kerncraft_interface.py new file mode 100644 index 0000000..6debdf0 --- /dev/null +++ b/osaca/api/kerncraft_interface.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import collections + +from osaca import Frontend +from osaca.parser import ParserAArch64v81, ParserX86ATT +from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, SemanticsAppender, + reduce_to_section) + + +class KerncraftAPI(object): + def __init__(self, arch): + self.machine_model = MachineModel(arch=arch) + self.semantics = SemanticsAppender(self.machine_model) + isa = self.machine_model.get_ISA() + if isa == 'AArch64': + self.parser = ParserAArch64v81() + elif isa == 'x86': + self.parser = ParserX86ATT() + + def analyze_code(self, code): + parsed_code = self.parser.parse_file(code) + kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA()) + for i in range(len(kernel)): + self.semantics.assign_src_dst(kernel[i]) + self.semantics.assign_tp_lt(kernel[i]) + return kernel + + def create_output(self, kernel, show_lineno=False): + kernel_graph = KernelDG(kernel, self.parser, self.machine_model) + frontend = Frontend(arch=self.machine_model.get_arch()) + frontend.print_throughput_analysis(kernel, show_lineno=show_lineno) + frontend.print_latency_analysis(kernel_graph.get_critical_path()) + + def get_unmatched_instruction_ratio(self, kernel): + unmatched_counter = 0 + for instruction in kernel: + if ( + INSTR_FLAGS.TP_UNKWN in instruction['flags'] + and INSTR_FLAGS.LT_UNKWN in instruction['flags'] + ): + unmatched_counter += 1 + return unmatched_counter / len(kernel) + + def get_port_occupation_cycles(self, kernel): + throughput_values = self.semantics.get_throughput_sum(kernel) + port_names = self.machine_model['ports'] + return collections.OrderedDict(list(zip(port_names, throughput_values))) + + def get_total_throughput(self, kernel): + return max(self.semantics.get_throughput_sum(kernel)) + + def get_latency(self, kernel): + kernel_graph = KernelDG(kernel, self.parser, self.machine_model) + return sum([x if x['latency'] is not None else 0 for x in kernel_graph]) diff --git a/osaca/create_db_entry.py b/osaca/create_db_entry.py deleted file mode 100755 index 956b07e..0000000 --- a/osaca/create_db_entry.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys - -from ruamel import yaml - - -def add_entry_to_db(arch: str, entry): - """Adds entry to the user database in ~/.osaca/data - - Args: - arch: string representation of the architecture as abbreviation. - Database for this architecture must already exist. - entry: DB entry which will be added. Should consist at best out of - 'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...), - 'throughput', 'latency', 'port_pressure'. - """ - # load yaml - arch = arch.lower() - filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml')) - assert os.path.exists(filepath) - with open(filepath, 'r') as f: - data = yaml.load(f, Loader=yaml.Loader) - # check parameter of entry - if 'name' not in entry: - raise ValueError('No name for instruction specified. No import possible') - if 'operands' not in entry: - entry['operands'] = None - if 'throughput' not in entry: - entry['throughput'] = None - if 'latency' not in entry: - entry['latency'] = None - if 'port_pressure' not in entry: - entry['port_pressure'] = None - data['instruction_forms'].append(entry) - __dump_data_to_yaml(filepath, data) - - -def add_entries_to_db(arch: str, entries: list) -> None: - """Adds entries to the user database in ~/.osaca/data - - Args: - arch: string representation of the architecture as abbreviation. - Database for this architecture must already exist. - entries: :class:`list` of DB entries which will be added. Should consist at best out of - 'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...), - 'throughput', 'latency', 'port_pressure'. - """ - # load yaml - arch = arch.lower() - filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml')) - assert os.path.exists(filepath) - with open(filepath, 'r') as f: - data = yaml.load(f, Loader=yaml.Loader) - # check parameter of entry and append it to list - for entry in entries: - if 'name' not in entry: - print( - 'No name for instruction \n\t{}\nspecified. No import possible'.format(entry), - file=sys.stderr, - ) - # remove entry from list - entries.remove(entry) - continue - if 'operands' not in entry: - entry['operands'] = None - if 'throughput' not in entry: - entry['throughput'] = None - if 'latency' not in entry: - entry['latency'] = None - if 'port_pressure' not in entry: - entry['port_pressure'] = None - data['instruction_forms'].append(entry) - __dump_data_to_yaml(filepath, data) - - -def __dump_data_to_yaml(filepath, data): - # first add 'normal' meta data in the right order (no ordered dict yet) - meta_data = dict(data) - del meta_data['instruction_forms'] - del meta_data['port_model_scheme'] - with open(filepath, 'w') as f: - yaml.dump(meta_data, f, allow_unicode=True) - with open(filepath, 'a') as f: - # now add port model scheme in |-scheme for better readability - yaml.dump( - {'port_model_scheme': data['port_model_scheme']}, - f, - allow_unicode=True, - default_style='|', - ) - # finally, add instruction forms - yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True) diff --git a/osaca/data/csx.yml b/osaca/data/csx.yml index c728017..e50d16f 100644 --- a/osaca/data/csx.yml +++ b/osaca/data/csx.yml @@ -1,5 +1,6 @@ osaca_version: 0.3.0 micro_architecture: "Cascade Lake SP" +arch_code: "CSX" isa: "x86" port_model_scheme: | ┌------------------------------------------------------------------------┐ diff --git a/osaca/data/isa/AArch64.yml b/osaca/data/isa/aarch64.yml similarity index 83% rename from osaca/data/isa/AArch64.yml rename to osaca/data/isa/aarch64.yml index 362caa8..dae5f04 100644 --- a/osaca/data/isa/AArch64.yml +++ b/osaca/data/isa/aarch64.yml @@ -57,6 +57,25 @@ instruction_forms: pre-indexed: false post-indexed: false source: true + destination: false + - name: "ldp" + operands: + - class: "register" + prefix: "d" + source: false + destination: true + - class: "register" + prefix: "d" + source: false + destination: true + - class: "memory" + base: "x" + offset: "imd" + index: ~ + scale: 1 + pre-indexed: false + post-indexed: true + source: true destination: false - name: "ldp" operands: @@ -115,6 +134,25 @@ instruction_forms: post-indexed: true source: true destination: false + - name: "ldp" + operands: + - class: "register" + prefix: "q" + source: false + destination: true + - class: "register" + prefix: "q" + source: false + destination: true + - class: "memory" + base: "x" + offset: ~ + index: ~ + scale: 1 + pre-indexed: false + post-indexed: false + source: true + destination: false - name: "ldp" operands: - class: "register" @@ -191,6 +229,25 @@ instruction_forms: post-indexed: false source: false destination: true + - name: "stp" + operands: + - class: "register" + prefix: "q" + source: true + destination: false + - class: "register" + prefix: "q" + source: true + destination: false + - class: "memory" + base: "x" + offset: ~ + index: ~ + scale: 1 + pre-indexed: false + post-indexed: True + source: false + destination: true - name: "stp" operands: - class: "register" @@ -252,7 +309,7 @@ instruction_forms: index: ~ scale: 1 pre-indexed: false - post-indexed: false + post-indexed: true source: false destination: true - name: "str" @@ -264,7 +321,7 @@ instruction_forms: - class: "memory" base: "x" offset: ~ - index: "x" + index: ~ scale: 1 pre-indexed: false post-indexed: true diff --git a/osaca/data/isa/x86.yml b/osaca/data/isa/x86.yml index 3e0d7b6..482fbbf 100644 --- a/osaca/data/isa/x86.yml +++ b/osaca/data/isa/x86.yml @@ -83,16 +83,6 @@ instruction_forms: name: "xmm" source: true destination: false - - name: vaddsd - operands: - - class: "register" - name: "xmm" - source: true - destination: true - - class: "register" - name: "xmm" - source: true - destination: false - name: vfmadd132pd operands: - class: "memory" diff --git a/osaca/data/vulcan.yml b/osaca/data/vulcan.yml index ac96a3a..b05d7f6 100644 --- a/osaca/data/vulcan.yml +++ b/osaca/data/vulcan.yml @@ -293,6 +293,22 @@ instruction_forms: throughput: 1.0 latency: ~ # 0 0DV 1 1DV 2 3 4 5 port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0] + - name: "ldp" + operands: + - class: "register" + prefix: "d" + - class: "register" + prefix: "d" + - class: "memory" + base: "x" + offset: ~ + index: ~ + scale: 1 + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: ~ # 0 0DV 1 1DV 2 3 4 5 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0] - name: "ldr" operands: - class: "register" diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index f23f097..878d0c2 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -71,6 +71,17 @@ class MachineModel(object): ###################################################### + def _check_for_duplicate(self, name, operands): + matches = [ + instruction_form + for instruction_form in self._data['instruction_forms'] + if instruction_form['name'] == name + and self._match_operands(instruction_form['operands'], operands) + ] + if len(matches) > 1: + return True + return False + def _match_operands(self, i_operands, operands): if isinstance(operands, dict): operands = operands['operand_list'] @@ -86,12 +97,15 @@ class MachineModel(object): return False def _check_operands(self, i_operands, operands): - if self._data['isa'] == 'AArch64': + if self._data['isa'].lower() == 'aarch64': return self._check_AArch64_operands(i_operands, operands) - if self._data['isa'] == 'x86': + if self._data['isa'].lower() == 'x86': return self._check_x86_operands(i_operands, operands) def _check_AArch64_operands(self, i_operand, operand): + if 'class' in operand: + # compare two DB entries + return self._compare_db_entries(i_operand, operand) # register if 'register' in operand: if i_operand['class'] != 'register': @@ -120,6 +134,9 @@ class MachineModel(object): return False def _check_x86_operands(self, i_operand, operand): + if 'class' in operand: + # compare two DB entries + return self._compare_db_entries(i_operand, operand) # register if 'register' in operand: if i_operand['class'] != 'register': @@ -137,6 +154,18 @@ class MachineModel(object): if 'identifier' in operand: return i_operand['class'] == 'identifier' + def _compare_db_entries(self, operand_1, operand_2): + operand_attributes = list( + filter(lambda x: True if x != 'source' and x != 'destination' else False, operand_1) + ) + for key in operand_attributes: + try: + if operand_1[key] != operand_2[key]: + return False + except KeyError: + return False + return True + def _is_AArch64_reg_type(self, i_reg, reg): if reg['prefix'] != i_reg['prefix']: return False