From ed0def3ce03788f72ee48beb575b786e1b59906f Mon Sep 17 00:00:00 2001 From: JanLJL Date: Fri, 27 Sep 2019 17:15:04 +0200 Subject: [PATCH] added visual graph export, YMM LD support for ZEN and support for non-dyn loads in DB --- osaca/data/csx.yml | 12 +++ osaca/data/vulcan.yml | 106 +++++++++++++++++++-------- osaca/data/zen1.yml | 50 ++++++++++++- osaca/osaca.py | 18 ++++- osaca/semantics/kernel_dg.py | 99 ++++++++++++++++++++++++- osaca/semantics/semanticsAppender.py | 27 ++++--- 6 files changed, 263 insertions(+), 49 deletions(-) diff --git a/osaca/data/csx.yml b/osaca/data/csx.yml index 6546c47..6efef50 100644 --- a/osaca/data/csx.yml +++ b/osaca/data/csx.yml @@ -439,6 +439,18 @@ instruction_forms: throughput: 0.0 latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + - name: vmovsd + operands: + - class: "memory" + base: "gpr" + offset: "imd" + index: "gpr" + scale: 1 + - class: "register" + name: "xmm" + throughput: 0.5 + latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - name: vmovsd operands: - class: "register" diff --git a/osaca/data/vulcan.yml b/osaca/data/vulcan.yml index d10234b..46820b0 100644 --- a/osaca/data/vulcan.yml +++ b/osaca/data/vulcan.yml @@ -8,38 +8,38 @@ scheduler_size: 60 hidden_loads: false load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 4.0, q: 4.0, v: 4.0} load_throughput: - - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} - - {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0,0.5,0.5,0]} ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"] port_model_scheme: | ┌-----------------------------------------------------------┐ @@ -413,6 +413,48 @@ instruction_forms: throughput: 1.0 latency: ~ # 0 0DV 1 1DV 2 3 4 5 port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0] + - name: ldr + operands: + - class: "register" + prefix: "d" + - class: "memory" + base: "x" + offset: ~ + index: ~ + scale: 1 + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 4.0 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] + - name: ldr + operands: + - class: "register" + prefix: "d" + - class: "memory" + base: "x" + offset: "imd" + index: ~ + scale: 1 + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 4.0 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] + - name: ldr + operands: + - class: "register" + prefix: "d" + - class: "memory" + base: "x" + offset: ~ + index: "x" + scale: 8 + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 4.0 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] - name: "ldr" operands: - class: "register" diff --git a/osaca/data/zen1.yml b/osaca/data/zen1.yml index 7e1482f..6b19a42 100644 --- a/osaca/data/zen1.yml +++ b/osaca/data/zen1.yml @@ -3,7 +3,7 @@ micro_architecture: "AMD Zen (family 17h)" arch_code: "ZEN1" isa: "x86" load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0} -# TODO ADD YMM latencies [0,0,0,0,0,0,0,0,0,1,1,1,1,0] +load_throughput_multiplier: {gpr: 1.0, xmm: 1.0, ymm: 2.0} load_throughput: - {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} - {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} @@ -345,6 +345,18 @@ instruction_forms: throughput: 1.0 latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + - name: vmovapd + operands: + - class: "memory" + base: "gpr" + offset: ~ + index: "gpr" + scale: 1 + - class: "register" + name: "ymm" + throughput: 0.5 + latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - name: vmovapd operands: - class: "register" @@ -423,6 +435,30 @@ instruction_forms: throughput: 1.0 latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1.0] + - name: vmovupd + operands: + - class: "memory" + base: "gpr" + offset: ~ + index: "gpr" + scale: 1 + - class: "register" + name: "ymm" + throughput: 0.5 + latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + - name: vmovupd + operands: + - class: "memory" + base: "gpr" + offset: "imd" + index: "gpr" + scale: 1 + - class: "register" + name: "ymm" + throughput: 0.5 + latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - name: vmovupd operands: - class: "register" @@ -468,6 +504,18 @@ instruction_forms: throughput: 0.0 latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + - name: vmovsd + operands: + - class: "memory" + base: "gpr" + offset: "imd" + index: "gpr" + scale: 1 + - class: "register" + name: "xmm" + throughput: 0.5 + latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] - name: vmovsd operands: - class: "register" diff --git a/osaca/osaca.py b/osaca/osaca.py index 4b6422e..f8ab5d6 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -11,7 +11,8 @@ from subprocess import call from osaca.api import sanity_check from osaca.frontend import Frontend from osaca.parser import BaseParser, ParserAArch64v81, ParserX86ATT -from osaca.semantics import KernelDG, MachineModel, reduce_to_section, SemanticsAppender +from osaca.semantics import (KernelDG, MachineModel, SemanticsAppender, + reduce_to_section) MODULE_DATA_DIR = os.path.join( os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/' @@ -56,7 +57,6 @@ def create_parser(): parser.add_argument( '--arch', type=str, - required=True, help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, VULCAN).', ) parser.add_argument( @@ -83,6 +83,15 @@ def create_parser(): help='Try to find assembly block containing the loop to analyse and insert byte ' 'marker by using Kerncraft.', ) + parser.add_argument( + '--export-graph', + metavar='EXPORT_PATH', + dest='dotpath', + default=None, + type=str, + help='Output path for .dot file export. If "." is given, the file will be stored as ' + '"./osaca_dg.dot"', + ) parser.add_argument( '--verbose', '-v', action='count', default=0, help='Increases verbosity level.' ) @@ -98,7 +107,7 @@ def check_arguments(args, parser): supported_archs = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'VULCAN'] supported_import_files = ['ibench', 'asmbench', 'uopsinfo'] - if args.arch.upper() not in supported_archs: + if 'arch' in args and args.arch.upper() not in supported_archs: parser.error( 'Microarchitecture not supported. Please see --help for all valid architecture codes.' ) @@ -180,6 +189,8 @@ def inspect(args): # Create DiGrahps kernel_graph = KernelDG(kernel, parser, machine_model) + if args.dotpath is not None: + kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None) # Print analysis frontend = Frontend(args.file.name, arch=arch) frontend.print_full_analysis(kernel, kernel_graph, verbose=verbose) @@ -209,6 +220,7 @@ def _create_parser(arch) -> BaseParser: elif isa == 'aarch64': return ParserAArch64v81() + # --------------------------------------------------- diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index f8c684e..1047e86 100755 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -26,7 +26,11 @@ class KernelDG(nx.DiGraph): dg.add_node(instruction_form['line_number']) dg.nodes[instruction_form['line_number']]['instruction_form'] = instruction_form # add load as separate node if existent - if 'performs_load' in instruction_form['flags']: + # TODO use INSTR_FLAGS here + if ( + 'performs_load' in instruction_form['flags'] + and 'is_load_instruction' not in instruction_form['flags'] + ): # add new node dg.add_node(instruction_form['line_number'] + 0.1) dg.nodes[instruction_form['line_number'] + 0.1][ @@ -38,7 +42,7 @@ class KernelDG(nx.DiGraph): instruction_form['line_number'], latency=instruction_form['latency'] - instruction_form['latency_wo_load'], ) - for dep in self.find_depending(instruction_form, kernel[i + 1 :]): + for dep in self.find_depending(instruction_form, kernel[i + 1:]): edge_weight = ( instruction_form['latency'] if 'latency_wo_load' not in instruction_form @@ -238,3 +242,94 @@ class KernelDG(nx.DiGraph): self.parser.is_reg_dependend_of(register, src.memory.base) or is_written ) return is_written + + def export_graph(self, filepath=None): + graph = copy.deepcopy(self.dg) + cp = self.get_critical_path() + cp_line_numbers = [x['line_number'] for x in cp] + lcd = self.get_loopcarried_dependencies() + lcd_line_numbers = {} + for dep in lcd: + lcd_line_numbers[dep] = [x['line_number'] for x in lcd[dep]['dependencies']] + # add color scheme + graph.graph['node'] = {'colorscheme': 'accent8'} + graph.graph['edge'] = {'colorscheme': 'accent8'} + + # create LCD edges + for dep in lcd_line_numbers: + min_line_number = min(lcd_line_numbers[dep]) + max_line_number = max(lcd_line_numbers[dep]) + graph.add_edge(max_line_number, min_line_number) + graph.edges[max_line_number, min_line_number]['latency'] = [ + x for x in lcd[dep]['dependencies'] if x['line_number'] == max_line_number + ][0]['latency_lcd'] + + # add label to edges + for e in graph.edges: + graph.edges[e]['label'] = graph.edges[e]['latency'] + + # add CP values to graph + for n in cp: + graph.nodes[n['line_number']]['instruction_form']['latency_cp'] = n['latency_cp'] + + # color CP and LCD + for n in graph.nodes: + if n in cp_line_numbers: + # graph.nodes[n]['color'] = 1 + graph.nodes[n]['style'] = 'bold' + graph.nodes[n]['penwidth'] = 4 + for col, dep in enumerate(lcd): + if n in lcd_line_numbers[dep]: + if 'style' not in graph.nodes[n]: + graph.nodes[n]['style'] = 'filled' + else: + graph.nodes[n]['style'] += ',filled' + graph.nodes[n]['fillcolor'] = 2 + col + + # color edges + for e in graph.edges: + if ( + graph.nodes[e[0]]['instruction_form']['line_number'] in cp_line_numbers + and graph.nodes[e[1]]['instruction_form']['line_number'] in cp_line_numbers + and e[0] < e[1] + ): + bold_edge = True + for i in range(e[0] + 1, e[1]): + if i in cp_line_numbers: + bold_edge = False + if bold_edge: + graph.edges[e]['style'] = 'bold' + graph.edges[e]['penwidth'] = 3 + for dep in lcd_line_numbers: + if ( + graph.nodes[e[0]]['instruction_form']['line_number'] in lcd_line_numbers[dep] + and graph.nodes[e[1]]['instruction_form']['line_number'] + in lcd_line_numbers[dep] + ): + graph.edges[e]['color'] = graph.nodes[e[1]]['fillcolor'] + + # rename node from [idx] to [idx mnemonic] and add shape + mapping = {} + for n in graph.nodes: + if int(n) != n: + mapping[n] = '{}: LOAD'.format(int(n)) + graph.nodes[n]['fontname'] = 'italic' + graph.nodes[n]['fontsize'] = 11.0 + else: + node = graph.nodes[n]['instruction_form'] + if node['instruction'] is not None: + mapping[n] = '{}: {}'.format(n, node['instruction']) + else: + label = 'label' if node['label'] else None + label = 'directive' if node['directive'] else label + label = 'comment' if node['comment'] and label is None else label + mapping[n] = '{}: {}'.format(n, label) + graph.nodes[n]['fontname'] = 'italic' + graph.nodes[n]['fontsize'] = 11.0 + graph.nodes[n]['shape'] = 'rectangle' + + nx.relabel.relabel_nodes(graph, mapping, copy=False) + if filepath: + nx.drawing.nx_agraph.write_dot(graph, filepath) + else: + nx.drawing.nx_agraph.write_dot(graph, 'osaca_dg.dot') diff --git a/osaca/semantics/semanticsAppender.py b/osaca/semantics/semanticsAppender.py index 1beb384..c9a7e82 100755 --- a/osaca/semantics/semanticsAppender.py +++ b/osaca/semantics/semanticsAppender.py @@ -13,6 +13,7 @@ class INSTR_FLAGS: Flags used for unknown or special instructions """ + LD = 'is_load_instruction' TP_UNKWN = 'tp_unknown' LT_UNKWN = 'lt_unknown' NOT_BOUND = 'not_bound' @@ -128,6 +129,8 @@ class SemanticsAppender(object): latency = 0.0 latency_wo_load = latency flags.append(INSTR_FLAGS.LT_UNKWN) + if INSTR_FLAGS.HAS_LD in instruction_form['flags']: + flags.append(INSTR_FLAGS.LD) else: # instruction could not be found in DB assign_unknown = True @@ -140,6 +143,11 @@ class SemanticsAppender(object): ) if instruction_data_reg: assign_unknown = False + reg_types = [ + self._parser.get_reg_type(op['register']) + for op in operands['operand_list'] + if 'register' in op + ] load_port_pressure = self._machine_model.get_load_throughput( [ x['memory'] @@ -147,19 +155,16 @@ class SemanticsAppender(object): if 'memory' in x ][0] ) - if load_port_pressure is None: - import pdb; pdb.set_trace() + if 'load_throughput_multiplier' in self._machine_model: + multiplier = self._machine_model['load_throughput_multiplier'][ + reg_types[0] + ] + load_port_pressure = [pp * multiplier for pp in load_port_pressure] throughput = max( max(load_port_pressure), instruction_data_reg['throughput'] ) latency = ( - self._machine_model.get_load_latency( - [ - self._parser.get_reg_type(op['register']) - for op in operands['operand_list'] - if 'register' in op - ][0] - ) + self._machine_model.get_load_latency(reg_types[0]) + instruction_data_reg['latency'] ) latency_wo_load = instruction_data_reg['latency'] @@ -311,14 +316,14 @@ class SemanticsAppender(object): def _get_regular_source_x86ATT(self, instruction_form): # return all but last operand sources = [ - op for op in instruction_form['operands'][0 : len(instruction_form['operands']) - 1] + op for op in instruction_form['operands'][0: len(instruction_form['operands']) - 1] ] return sources def _get_regular_source_AArch64(self, instruction_form): # return all but first operand sources = [ - op for op in instruction_form['operands'][1 : len(instruction_form['operands'])] + op for op in instruction_form['operands'][1: len(instruction_form['operands'])] ] return sources