From 2d97c1c09a3636401bd12b61ef1cfcb31b24498c Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 29 Aug 2019 14:03:16 +0200 Subject: [PATCH] enhanced frontend and added hidable load port --- osaca/api/ibench_interface.py | 1 + osaca/api/kerncraft_interface.py | 13 ++--- osaca/data/csx.yml | 6 ++- osaca/data/vulcan.yml | 1 + osaca/data/zen1.yml | 15 +++++- osaca/frontend.py | 62 +++++++++++++++++---- osaca/parser/__init__.py | 3 +- osaca/parser/parser_AArch64v81.py | 3 +- osaca/parser/parser_x86att.py | 3 +- osaca/semantics/hw_model.py | 32 +++++++++++ osaca/semantics/kernel_dg.py | 2 +- osaca/semantics/semanticsAppender.py | 80 +++++++++++++++++++++++++++- 12 files changed, 194 insertions(+), 27 deletions(-) diff --git a/osaca/api/ibench_interface.py b/osaca/api/ibench_interface.py index 3d2ca29..794b253 100755 --- a/osaca/api/ibench_interface.py +++ b/osaca/api/ibench_interface.py @@ -30,6 +30,7 @@ class IbenchAPI(object): # TODO raise NotImplementedError + # TODO # template_x86 = Template() template_aarch64 = Template( diff --git a/osaca/api/kerncraft_interface.py b/osaca/api/kerncraft_interface.py index 4b57ef1..e065281 100755 --- a/osaca/api/kerncraft_interface.py +++ b/osaca/api/kerncraft_interface.py @@ -4,8 +4,8 @@ import collections from osaca.frontend import Frontend from osaca.parser import ParserAArch64v81, ParserX86ATT -from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, SemanticsAppender, - reduce_to_section) +from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, + SemanticsAppender, reduce_to_section) class KerncraftAPI(object): @@ -21,16 +21,13 @@ class KerncraftAPI(object): def analyze_code(self, code): parsed_code = self.parser.parse_file(code) kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA()) - for i in range(len(kernel)): - self.semantics.assign_src_dst(kernel[i]) - self.semantics.assign_tp_lt(kernel[i]) + self.semantics.add_semantics(kernel) return kernel - def create_output(self, kernel, show_lineno=False): + def create_output(self, kernel, verbose=False): kernel_graph = KernelDG(kernel, self.parser, self.machine_model) frontend = Frontend(arch=self.machine_model.get_arch()) - frontend.print_throughput_analysis(kernel, show_lineno=show_lineno) - frontend.print_latency_analysis(kernel_graph.get_critical_path()) + frontend.print_full_analysis(kernel, kernel_graph, verbose=verbose) def get_unmatched_instruction_ratio(self, kernel): unmatched_counter = 0 diff --git a/osaca/data/csx.yml b/osaca/data/csx.yml index e50d16f..b290410 100644 --- a/osaca/data/csx.yml +++ b/osaca/data/csx.yml @@ -2,6 +2,11 @@ osaca_version: 0.3.0 micro_architecture: "Cascade Lake SP" arch_code: "CSX" isa: "x86" +ROB_size: 224 +retired_uOps_per_cycle: 4 +scheduler_size: 97 +hidden_loads: false +ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"] port_model_scheme: | ┌------------------------------------------------------------------------┐ | 97 entry unified scheduler | @@ -38,7 +43,6 @@ port_model_scheme: | | VNNI | ┌-------┐ └-------┘ | VNNI | └-------┘ -ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"] instruction_forms: - name: addsd operands: diff --git a/osaca/data/vulcan.yml b/osaca/data/vulcan.yml index bf0085d..a9c354c 100644 --- a/osaca/data/vulcan.yml +++ b/osaca/data/vulcan.yml @@ -5,6 +5,7 @@ isa: "AArch64" ROB_size: 180 retired_uOps_per_cycle: 4 scheduler_size: 60 +hidden_loads: false ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"] port_model_scheme: | ┌-----------------------------------------------------------┐ diff --git a/osaca/data/zen1.yml b/osaca/data/zen1.yml index d4e3310..a4abc50 100644 --- a/osaca/data/zen1.yml +++ b/osaca/data/zen1.yml @@ -2,6 +2,8 @@ osaca_version: 0.3.0 micro_architecture: "AMD Zen (family 17h)" arch_code: "ZEN1" isa: "x86" +hidden_loads: true +ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "8D", "9", "9D"] port_model_scheme: | ┌--------------------------------------┐ ┌-----------------------------------------------┐ | 96 entries OoO scheduler | | 84 entries OoO scheduler | @@ -22,7 +24,6 @@ port_model_scheme: | | SHUF | ┌-------------┐ └-------┘ | STORE | └-------------┘ -ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "8D", "9", "9D"] instruction_forms: - name: add operands: @@ -102,6 +103,18 @@ instruction_forms: throughput: 0.0 latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 8D 9 9D port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + - name: movl + operands: + - class: "memory" + base: "gpr" + offset: "imd" + index: ~ + scale: 1 + - class: "register" + name: "gpr" + throughput: 0.5 + latency: 3.0 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5] - name: mulsd operands: - class: "register" diff --git a/osaca/frontend.py b/osaca/frontend.py index 3b85350..103ad7c 100755 --- a/osaca/frontend.py +++ b/osaca/frontend.py @@ -2,14 +2,17 @@ import os import re +from datetime import datetime as dt from ruamel import yaml -from osaca.semantics import INSTR_FLAGS, SemanticsAppender +import osaca +from osaca.semantics import INSTR_FLAGS, KernelDG, SemanticsAppender class Frontend(object): - def __init__(self, arch=None, path_to_yaml=None): + def __init__(self, filename, arch=None, path_to_yaml=None): + self._filename = filename if not arch and not path_to_yaml: raise ValueError('Either arch or path_to_yaml required.') if arch and path_to_yaml: @@ -43,13 +46,20 @@ class Frontend(object): return instruction_form['comment'] is not None and instruction_form['instruction'] is None def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True): - print() lineno_filler = ' ' if show_lineno else '' port_len = self._get_max_port_len(kernel) separator = '-' * sum([x + 3 for x in port_len]) + '-' separator += '--' + len(str(kernel[-1]['line_number'])) * '-' if show_lineno else '' col_sep = '|' sep_list = self._get_separator_list(col_sep) + headline = 'Port pressure in cycles' + headline_str = '{{:^{}}}'.format(len(separator)) + + print( + '\n\nThroughput Analysis Report\n' + + '--------------------------' + ) + print(headline_str.format(headline)) print(lineno_filler + self._get_port_number_line(port_len)) print(separator) for instruction_form in kernel: @@ -86,6 +96,7 @@ class Frontend(object): string_result = '' string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else '' string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else '' + string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else '' # TODO add other flags string_result += ' ' if len(string_result) == 0 else '' return string_result @@ -120,7 +131,10 @@ class Frontend(object): return string_result def print_latency_analysis(self, cp_kernel, separator='|'): - print('\n\n------------------------') + print( + '\n\nLatency Analysis Report\n' + + '-----------------------' + ) for instruction_form in cp_kernel: print( '{:4d} {} {:4.1f} {}{}{} {}'.format( @@ -142,7 +156,10 @@ class Frontend(object): ) def print_loopcarried_dependencies(self, dep_tuplelist, separator='|'): - print('\n\n------------------------') + print( + '\n\nLoop-Carried Dependencies Analysis Report\n' + + '-----------------------------------------' + ) for tup in dep_tuplelist: print( '{:4d} {} {:4.1f} {} {:36}{} {}'.format( @@ -161,11 +178,38 @@ class Frontend(object): ) ) - def print_list_summary(self): - raise NotImplementedError + def _print_header_report(self): + version = osaca.osaca.get_version() + adjust = 20 + header = '' + header += 'Open Source Architecture Code Analyzer (OSACA) - {}\n'.format(version) + header += 'Analyzed file:'.ljust(adjust) + '{}\n'.format(self._filename) + header += 'Architecture:'.ljust(adjust) + '{}\n'.format(self._arch) + header += 'Timestamp:'.ljust(adjust) + '{}\n'.format( + dt.utcnow().strftime('%Y-%m-%d %H:%M:%S') + ) + print(header) - def _print_header_throughput_report(self): - raise NotImplementedError + def _print_symbol_map(self): + symbol_dict = { + INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port', + INSTR_FLAGS.TP_UNKWN: 'No throughput/latency information for this instruction in ' + + 'data file', + INSTR_FLAGS.HIDDEN_LD: 'Throughput of LOAD operation can be hidden behind a past ' + + 'or future STORE instruction', + } + symbol_map = '' + for flag in sorted(symbol_dict.keys()): + symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]), symbol_dict[flag]) + + print(symbol_map, end='') def _print_port_binding_summary(self): raise NotImplementedError + + def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False): + self._print_header_report() + self._print_symbol_map() + self.print_throughput_analysis(kernel, show_lineno=True) + self.print_latency_analysis(kernel_dg.get_critical_path()) + self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies()) diff --git a/osaca/parser/__init__.py b/osaca/parser/__init__.py index 5437530..33ac13a 100644 --- a/osaca/parser/__init__.py +++ b/osaca/parser/__init__.py @@ -4,7 +4,8 @@ Collection of parsers supported by OSACA. Only the parser below will be exported, so please add new parsers to __all__. """ from .attr_dict import AttrDict +from .base_parser import BaseParser from .parser_x86att import ParserX86ATT from .parser_AArch64v81 import ParserAArch64v81 -__all__ = ['AttrDict', 'ParserX86ATT', 'ParserAArch64v81'] +__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64v81'] diff --git a/osaca/parser/parser_AArch64v81.py b/osaca/parser/parser_AArch64v81.py index fc0e99a..36b1c15 100755 --- a/osaca/parser/parser_AArch64v81.py +++ b/osaca/parser/parser_AArch64v81.py @@ -3,8 +3,7 @@ import pyparsing as pp -from .attr_dict import AttrDict -from .base_parser import BaseParser +from osaca.parser import AttrDict, BaseParser class ParserAArch64v81(BaseParser): diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 3d2d8ae..c9274f8 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -2,8 +2,7 @@ import pyparsing as pp -from .attr_dict import AttrDict -from .base_parser import BaseParser +from osaca.parser import AttrDict, BaseParser class ParserX86ATT(BaseParser): diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 7216346..217be43 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os +import re from ruamel import yaml @@ -71,6 +72,37 @@ class MachineModel(object): def get_arch(self): return self._data['arch_code'] + def get_ports(self): + return self._data['ports'] + + def has_hidden_loads(self): + if 'hidden_loads' in self._data: + return self._data['hidden_loads'] + return False + + def get_data_ports(self): + data_port = re.compile(r'^[0-9]+D$') + data_ports = [x for x in filter(data_port.match, self._data['ports'])] + return data_ports + + @staticmethod + def get_isa_for_arch(arch): + arch_dict = { + 'vulcan': 'aarch64', + 'zen1': 'x86', + 'snb': 'x86', + 'ivb': 'x86', + 'hsw': 'x86', + 'bdw': 'x86', + 'skl': 'x86', + 'skx': 'x86', + 'csx': 'x86', + } + arch = arch.lower() + if arch in arch_dict: + return arch_dict[arch].lower() + return None + ###################################################### def _check_for_duplicate(self, name, operands): diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index 9e9f5ff..c4ab26d 100755 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 import copy +from itertools import chain, product import networkx as nx -from itertools import chain, product from osaca.parser import AttrDict from osaca.semantics import MachineModel diff --git a/osaca/semantics/semanticsAppender.py b/osaca/semantics/semanticsAppender.py index 364b734..5f518ff 100755 --- a/osaca/semantics/semanticsAppender.py +++ b/osaca/semantics/semanticsAppender.py @@ -5,8 +5,7 @@ import warnings from functools import reduce from osaca.parser import AttrDict - -from .hw_model import MachineModel +from osaca.semantics import MachineModel class INSTR_FLAGS: @@ -18,6 +17,8 @@ class INSTR_FLAGS: LT_UNKWN = 'lt_unkown' NOT_BOUND = 'not_bound' HIDDEN_LD = 'hidden_load' + HAS_LD = 'performs_load' + HAS_ST = 'performs_store' class SemanticsAppender(object): @@ -36,6 +37,50 @@ class SemanticsAppender(object): assert os.path.exists(name) return name + # SUMMARY FUNCTION + def add_semantics(self, kernel): + for instruction_form in kernel: + self.assign_src_dst(instruction_form) + self.assign_tp_lt(instruction_form) + if self._machine_model.has_hidden_loads(): + self.set_hidden_loads(kernel) + + def set_hidden_loads(self, kernel): + loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr['flags']] + stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr['flags']] + # Filter instructions including load and store + load_ids = [instr['line_number'] for instr in loads] + store_ids = [instr['line_number'] for instr in stores] + shared_ldst = list(set(load_ids).intersection(set(store_ids))) + loads = [instr for instr in loads if instr['line_number'] not in shared_ldst] + stores = [instr for instr in stores if instr['line_number'] not in shared_ldst] + + if len(stores) == 0 or len(loads) == 0: + # nothing to do + return + if len(loads) < len(stores): + # Hide all loads + for load in loads: + load['flags'] += [INSTR_FLAGS.HIDDEN_LD] + load['port_pressure'] = self._nullify_data_ports(load['port_pressure']) + else: + for store in stores: + # Get 'closest' load instruction + min_distance_load = min( + [ + ( + abs(load_instr['line_number'] - store['line_number']), + load_instr['line_number'], + ) + for load_instr in loads + if INSTR_FLAGS.HIDDEN_LD not in load_instr['flags'] + ] + ) + load = [instr for instr in kernel if instr['line_number'] == min_distance_load[1]][0] + # Hide load + load['flags'] += [INSTR_FLAGS.HIDDEN_LD] + load['port_pressure'] = self._nullify_data_ports(load['port_pressure']) + # get parser result and assign throughput and latency value to instruction form # mark instruction form with semantic flags def assign_tp_lt(self, instruction_form): @@ -125,6 +170,37 @@ class SemanticsAppender(object): # store operand list in dict and reassign operand key/value pair op_dict['operand_list'] = operands instruction_form['operands'] = AttrDict.convert_dict(op_dict) + # assign LD/ST flags + instruction_form['flags'] = ( + instruction_form['flags'] if 'flags' in instruction_form else [] + ) + if self._has_load(instruction_form): + instruction_form['flags'] += [INSTR_FLAGS.HAS_LD] + if self._has_store(instruction_form): + instruction_form['flags'] += [INSTR_FLAGS.HAS_ST] + + def _nullify_data_ports(self, port_pressure): + data_ports = self._machine_model.get_data_ports() + for port in data_ports: + index = self._machine_model.get_ports().index(port) + port_pressure[index] = 0.0 + return port_pressure + + def _has_load(self, instruction_form): + for operand in ( + instruction_form['operands']['source'] + instruction_form['operands']['src_dst'] + ): + if 'memory' in operand: + return True + return False + + def _has_store(self, instruction_form): + for operand in ( + instruction_form['operands']['destination'] + instruction_form['operands']['src_dst'] + ): + if 'memory' in operand: + return True + return False def _get_regular_source_operands(self, instruction_form): if self._isa == 'x86':