diff --git a/osaca/data/bdw.yml b/osaca/data/bdw.yml index 6bce1e7..c104250 100644 --- a/osaca/data/bdw.yml +++ b/osaca/data/bdw.yml @@ -6,7 +6,7 @@ ROB_size: 192 retired_uOps_per_cycle: 4 scheduler_size: 64 hidden_loads: false -load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0} +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0} load_throughput: - {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]} - {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]} diff --git a/osaca/data/csx.yml b/osaca/data/csx.yml index 42b48aa..f6d645f 100644 --- a/osaca/data/csx.yml +++ b/osaca/data/csx.yml @@ -6,7 +6,7 @@ ROB_size: 224 retired_uOps_per_cycle: 4 scheduler_size: 97 hidden_loads: false -load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0} +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0} load_throughput: - {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]} - {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]} @@ -77,7 +77,7 @@ instruction_forms: throughput: 0.5 latency: 4.0 # 1*p01 port_pressure: [[1, '01']] -- name: addl +- name: add operands: - class: immediate imd: int @@ -86,39 +86,21 @@ instruction_forms: throughput: 0.25 latency: 1.0 # 1*p0156 port_pressure: [[1, '0156']] -- name: addq +- name: cmp operands: - - class: immediate - imd: int + - class: register + name: gpr - class: register name: gpr throughput: 0.25 latency: 1.0 # 1*p0156 port_pressure: [[1, '0156']] -- name: cmpl - operands: - - class: register - name: gpr - - class: register - name: gpr - throughput: 0.25 - latency: ~ # 1*p0156 - port_pressure: [[1, '0156']] -- name: cmpq - operands: - - class: register - name: gpr - - class: register - name: gpr - throughput: 0.25 - latency: ~ # 1*p0156 - port_pressure: [[1, '0156']] -- name: incq +- name: inc operands: - class: register name: gpr throughput: 0.25 - latency: ~ # 1*p0156 + latency: 1.0 # 1*p0156 port_pressure: [[1, '0156']] - name: ja operands: diff --git a/osaca/data/hsw.yml b/osaca/data/hsw.yml index 8735b91..f1f9e75 100644 --- a/osaca/data/hsw.yml +++ b/osaca/data/hsw.yml @@ -6,7 +6,7 @@ ROB_size: 192 retired_uOps_per_cycle: 4 scheduler_size: 60 hidden_loads: false -load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0} +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0} load_throughput: - {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '23'], [1, [2D, 3D]]]} - {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '23'], [1, [2D, 3D]]]} diff --git a/osaca/data/isa/x86.yml b/osaca/data/isa/x86.yml index b3b24ad..a55e994 100644 --- a/osaca/data/isa/x86.yml +++ b/osaca/data/isa/x86.yml @@ -232,6 +232,19 @@ instruction_forms: scale: 1 source: true destination: true + - name: cmp + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "memory" + base: "gpr" + offset: ~ + index: ~ + scale: 1 + source: true + destination: true - name: cmp operands: - class: "register" @@ -255,6 +268,19 @@ instruction_forms: scale: 1 source: true destination: false + - name: cmp + operands: + - class: "register" + name: "gpr" + source: true + destination: false + - class: "memory" + base: "gpr" + offset: ~ + index: ~ + scale: 1 + source: true + destination: false - name: dec operands: - class: "register" diff --git a/osaca/data/ivb.yml b/osaca/data/ivb.yml index a3b2513..758297e 100644 --- a/osaca/data/ivb.yml +++ b/osaca/data/ivb.yml @@ -6,7 +6,7 @@ ROB_size: 168 retired_uOps_per_cycle: 4 scheduler_size: 54 hidden_loads: false -load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0} +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0} load_throughput: - {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '23'], [1, [2D, 3D]]]} - {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '23'], [1, [2D, 3D]]]} diff --git a/osaca/data/skx.yml b/osaca/data/skx.yml index b4f3a7b..5eb217f 100644 --- a/osaca/data/skx.yml +++ b/osaca/data/skx.yml @@ -6,7 +6,7 @@ ROB_size: 224 retired_uOps_per_cycle: 4 scheduler_size: 97 hidden_loads: false -load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0} +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0} load_throughput: - {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]} - {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]} diff --git a/osaca/data/snb.yml b/osaca/data/snb.yml index 2543b69..e59d02e 100644 --- a/osaca/data/snb.yml +++ b/osaca/data/snb.yml @@ -6,7 +6,7 @@ ROB_size: 168 retired_uOps_per_cycle: 4 scheduler_size: 54 hidden_loads: false -load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0} +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0} load_throughput: - {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '23'], [1, [2D, 3D]]]} - {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '23'], [1, [2D, 3D]]]} diff --git a/osaca/data/zen1.yml b/osaca/data/zen1.yml index e710240..55db5be 100644 --- a/osaca/data/zen1.yml +++ b/osaca/data/zen1.yml @@ -2,7 +2,7 @@ osaca_version: 0.3.1.dev1 micro_architecture: AMD Zen (family 17h) arch_code: ZEN1 isa: x86 -load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0} +load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0} load_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0} load_throughput: - {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]} diff --git a/osaca/frontend.py b/osaca/frontend.py index 75cfca1..cc69c56 100755 --- a/osaca/frontend.py +++ b/osaca/frontend.py @@ -144,7 +144,9 @@ class Frontend(object): ) ) - def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False): + def print_full_analysis( + self, kernel, kernel_dg: KernelDG, ignore_unknown=False, verbose=False + ): """ Prints the full analysis report including header, the symbol map, the combined TP/CP/LCD view and the list based LCD view. @@ -153,17 +155,25 @@ class Frontend(object): :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` - :param verbose: verbose output flag, defaults to `False` - :type verbose: bool, optional + :param ignore_unknown: flag for ignore warning if performance data is missing, defaults to + `False` + :type ignore_unknown: boolean, optional + :param verbose: flag for verbosity level, defaults to False + :type verbose: boolean, optional """ self._print_header_report() self._print_symbol_map() self.print_combined_view( - kernel, kernel_dg.get_critical_path(), kernel_dg.get_loopcarried_dependencies() + kernel, + kernel_dg.get_critical_path(), + kernel_dg.get_loopcarried_dependencies(), + ignore_unknown, ) self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies()) - def print_combined_view(self, kernel, cp_kernel: KernelDG, dep_dict, show_cmnts=True): + def print_combined_view( + self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True + ): """ Prints the combined view of the kernel including the port pressure (TP), a CP column and a LCD column. @@ -174,6 +184,9 @@ class Frontend(object): :type kernel_dg: :class:`~osaca.semantics.KernelDG` :param dep_dict: dictionary with first instruction in LCD as key and the deps as value :type dep_dict: dict + :param ignore_unknown: flag for showing result despite of missing instructions, defaults to + `False` + :type ignore_unknown: bool, optional :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True` :type show_cmnts: bool, optional """ @@ -199,8 +212,9 @@ class Frontend(object): ) lcd_sum = max(sums.values()) if len(sums) > 0 else 0.0 lcd_lines = [] - longest_lcd = [line_no for line_no in sums if sums[line_no] == lcd_sum][0] - lcd_lines = [d['line_number'] for d in dep_dict[longest_lcd]['dependencies']] + if len(dep_dict) > 0: + longest_lcd = [line_no for line_no in sums if sums[line_no] == lcd_sum][0] + lcd_lines = [d['line_number'] for d in dep_dict[longest_lcd]['dependencies']] print(headline_str.format(headline)) print( @@ -232,19 +246,41 @@ class Frontend(object): ) print(line) print() - # lcd_sum already calculated before - tp_sum = ArchSemantics.get_throughput_sum(kernel) - cp_sum = sum([x['latency_cp'] for x in cp_kernel]) - print( - lineno_filler - + self._get_port_pressure(tp_sum, port_len, separator=' ') - + ' {:^6} {:^6}'.format(cp_sum, lcd_sum) - ) + # check for unknown instructions and throw warning if called without --ignore-unknown + if not ignore_unknown and INSTR_FLAGS.TP_UNKWN in [ + flag for instr in kernel for flag in instr['flags'] + ]: + num_missing = len( + [instr['flags'] for instr in kernel if INSTR_FLAGS.TP_UNKWN in instr['flags']] + ) + self._print_missing_instruction_error(num_missing) + else: + # lcd_sum already calculated before + tp_sum = ArchSemantics.get_throughput_sum(kernel) + cp_sum = sum([x['latency_cp'] for x in cp_kernel]) + print( + lineno_filler + + self._get_port_pressure(tp_sum, port_len, separator=' ') + + ' {:^6} {:^6}'.format(cp_sum, lcd_sum) + ) #################### # HELPER FUNCTIONS #################### + def _print_missing_instruction_error(self, amount): + print( + ( + '------------------ WARNING: The performance data for {} instructions is missing.' + '------------------\n' + ' No final analysis is given. If you want to ignore this\n' + ' warning and run the analysis anyway, start osaca with\n' + ' --ignore_unknown flag.\n' + '--------------------------------------------------------------------------------' + '----------------{}' + ).format(amount, '-' * len(str(amount))) + ) + def _get_separator_list(self, separator, separator_2=' '): """Creates column view for seperators in the TP/combined view.""" separator_list = [] diff --git a/osaca/osaca.py b/osaca/osaca.py index 543f4bb..56f90c8 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -6,10 +6,10 @@ import os import re import sys -from osaca.db_interface import sanity_check, import_benchmark_output +from osaca.db_interface import import_benchmark_output, sanity_check from osaca.frontend import Frontend from osaca.parser import BaseParser, ParserAArch64v81, ParserX86ATT -from osaca.semantics import (KernelDG, MachineModel, ArchSemantics, +from osaca.semantics import (ArchSemantics, KernelDG, MachineModel, reduce_to_section) MODULE_DATA_DIR = os.path.join( @@ -65,15 +65,13 @@ def create_parser(): '-V', '--version', action='version', version='%(prog)s ' + __find_version('__init__.py') ) parser.add_argument( - '--arch', - type=str, - help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, TX2).', + '--arch', type=str, help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, TX2).' ) parser.add_argument( '--fixed', action='store_true', help='Run the throughput analysis with fixed probabilities for all suitable ports per ' - 'instruction. Otherwise, OSACA will print out the optimal port utilization for the kernel.' + 'instruction. Otherwise, OSACA will print out the optimal port utilization for the kernel.', ) parser.add_argument( '--db-check', @@ -108,6 +106,12 @@ def create_parser(): help='Output path for .dot file export. If "." is given, the file will be stored as ' '"./osaca_dg.dot"', ) + parser.add_argument( + '--ignore-unknown', + dest='ignore_unknown', + action='store_true', + help='Ignore if instructions cannot be found in the data file and print analysis anyway.', + ) parser.add_argument( '--verbose', '-v', action='count', default=0, help='Increases verbosity level.' ) @@ -204,6 +208,7 @@ def inspect(args): arch = args.arch isa = MachineModel.get_isa_for_arch(arch) verbose = args.verbose + ignore_unknown = args.ignore_unknown # Read file code = args.file.read() @@ -226,7 +231,9 @@ def inspect(args): kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None) # Print analysis frontend = Frontend(args.file.name, arch=arch) - frontend.print_full_analysis(kernel, kernel_graph, verbose=verbose) + frontend.print_full_analysis( + kernel, kernel_graph, ignore_unknown=ignore_unknown, verbose=verbose + ) def run(args, output_file=sys.stdout): diff --git a/osaca/parser/base_parser.py b/osaca/parser/base_parser.py index 4aa423e..3e67f3b 100755 --- a/osaca/parser/base_parser.py +++ b/osaca/parser/base_parser.py @@ -9,6 +9,7 @@ class BaseParser(object): LABEL_ID = 'label' MEMORY_ID = 'memory' REGISTER_ID = 'register' + SEGMENT_EXT_ID = 'segment_extension' INSTRUCTION_ID = 'instruction' OPERANDS_ID = 'operands' diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 38437b8..3bc8bba 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -38,6 +38,7 @@ class ParserX86ATT(BaseParser): self.register = pp.Group( pp.Literal('%') + pp.Word(pp.alphanums).setResultsName('name') + + pp.Optional(pp.Literal('(') + pp.Word(pp.nums) + pp.Literal(')')) + pp.Optional( pp.Literal('{') + pp.Literal('%') @@ -55,15 +56,41 @@ class ParserX86ATT(BaseParser): self.IMMEDIATE_ID ) scale = pp.Word('1248', exact=1) + # Segment register extension + segment_extension = ( + hex_number + ^ pp.Word(pp.nums) + ^ pp.Group( + pp.Optional(offset.setResultsName('offset')) + + pp.Literal('(') + + pp.Optional(self.register.setResultsName('base')) + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(self.register.setResultsName('index')) + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(scale.setResultsName('scale')) + + pp.Literal(')') + ) + ) + memory_segmentation = ( + self.register.setResultsName('base') + + pp.Literal(':') + + segment_extension.setResultsName(self.SEGMENT_EXT_ID) + ) + memory = pp.Group( - pp.Optional(offset.setResultsName('offset')) - + pp.Literal('(') - + pp.Optional(self.register.setResultsName('base')) - + pp.Optional(pp.Suppress(pp.Literal(','))) - + pp.Optional(self.register.setResultsName('index')) - + pp.Optional(pp.Suppress(pp.Literal(','))) - + pp.Optional(scale.setResultsName('scale')) - + pp.Literal(')') + ( + pp.Optional(pp.Suppress(pp.Literal('*'))) + + pp.Optional(offset.setResultsName('offset')) + + pp.Literal('(') + + pp.Optional(self.register.setResultsName('base')) + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(self.register.setResultsName('index')) + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(scale.setResultsName('scale')) + + pp.Literal(')') + ) + | memory_segmentation + | (hex_number | pp.Word(pp.nums)).setResultsName('offset') ).setResultsName(self.MEMORY_ID) # Directive @@ -235,7 +262,12 @@ class ParserX86ATT(BaseParser): base = None if 'base' not in memory_address else memory_address['base'] index = None if 'index' not in memory_address else memory_address['index'] scale = 1 if 'scale' not in memory_address else int(memory_address['scale']) + if isinstance(offset, str) and base is None and index is None: + offset = {'value': offset} new_dict = AttrDict({'offset': offset, 'base': base, 'index': index, 'scale': scale}) + # Add segmentation extension if existing + if self.SEGMENT_EXT_ID in memory_address: + new_dict[self.SEGMENT_EXT_ID] = memory_address[self.SEGMENT_EXT_ID] return AttrDict({self.MEMORY_ID: new_dict}) def substitute_label(self, label): @@ -312,6 +344,8 @@ class ParserX86ATT(BaseParser): return True def is_gpr(self, register): + if register is None: + return False gpr_parser = ( pp.CaselessLiteral('R') + pp.Word(pp.nums).setResultsName('id') @@ -327,13 +361,20 @@ class ParserX86ATT(BaseParser): return False def is_vector_register(self, register): - if len(register['name']) > 2 and register['name'][1:3].lower() == 'mm': + if register is None: + return False + if ( + len(register['name']) > 2 + and ''.join([_ for _ in register['name'] if not _.isdigit()])[-2:].lower() == 'mm' + ): return True return False def get_reg_type(self, register): + if register is None: + return False if self.is_gpr(register): return 'gpr' elif self.is_vector_register(register): - return register['name'][:3].lower() + return ''.join([_ for _ in register['name'] if not _.isdigit()]).lower() raise ValueError diff --git a/osaca/semantics/arch_semantics.py b/osaca/semantics/arch_semantics.py index 095db9d..29ddb11 100755 --- a/osaca/semantics/arch_semantics.py +++ b/osaca/semantics/arch_semantics.py @@ -167,11 +167,11 @@ class ArchSemantics(ISASemantics): ) if instruction_data_reg: assign_unknown = False - reg_types = [ - self._parser.get_reg_type(op['register']) - for op in operands - if 'register' in op - ] + reg_type = self._parser.get_reg_type( + instruction_data_reg['operands'][ + operands.index(self._create_reg_wildcard()) + ] + ) load_port_uops = self._machine_model.get_load_throughput( [ x['memory'] @@ -185,14 +185,14 @@ class ArchSemantics(ISASemantics): ) if 'load_throughput_multiplier' in self._machine_model: multiplier = self._machine_model['load_throughput_multiplier'][ - reg_types[0] + reg_type ] load_port_pressure = [pp * multiplier for pp in load_port_pressure] throughput = max( max(load_port_pressure), instruction_data_reg['throughput'] ) latency = ( - self._machine_model.get_load_latency(reg_types[0]) + self._machine_model.get_load_latency(reg_type) + instruction_data_reg['latency'] ) latency_wo_load = instruction_data_reg['latency'] diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index 3856d88..8879106 100755 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -21,6 +21,8 @@ class INSTR_FLAGS: class ISASemantics(object): + GAS_SUFFIXES = 'bswlqt' + def __init__(self, isa, path_to_yaml=None): self._isa = isa.lower() path = utils.find_file('isa/' + self._isa + '.yml') if not path_to_yaml else path_to_yaml @@ -41,8 +43,8 @@ class ISASemantics(object): # - source/destination def assign_src_dst(self, instruction_form): """Update instruction form dictionary with source, destination and flag information.""" - # if the instruction form doesn't have operands, there's nothing to do - if instruction_form['operands'] is None: + # if the instruction form doesn't have operands or is None, there's nothing to do + if instruction_form['operands'] is None or instruction_form['instruction'] is None: instruction_form['semantic_operands'] = AttrDict( {'source': [], 'destination': [], 'src_dst': []}) return @@ -51,6 +53,11 @@ class ISASemantics(object): isa_data = self._isa_model.get_instruction( instruction_form['instruction'], instruction_form['operands'] ) + if isa_data is None and instruction_form['instruction'][-1] in self.GAS_SUFFIXES: + # Check for instruction without GAS suffix + isa_data = self._isa_model.get_instruction( + instruction_form['instruction'][:-1], instruction_form['operands'] + ) operands = instruction_form['operands'] op_dict = {} if isa_data is None: @@ -99,6 +106,9 @@ class ISASemantics(object): return False def _get_regular_source_operands(self, instruction_form): + # if there is only one operand, assume it is a source operand + if len(instruction_form['operands']) == 1: + return [instruction_form['operands'][0]] if self._isa == 'x86': # return all but last operand return [op for op in instruction_form['operands'][0:-1]] @@ -108,6 +118,9 @@ class ISASemantics(object): raise ValueError("Unsupported ISA {}.".format(self._isa)) def _get_regular_destination_operands(self, instruction_form): + # if there is only one operand, assume no destination + if len(instruction_form['operands']) == 1: + return [] if self._isa == 'x86': # return last operand return instruction_form['operands'][-1:]