mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-05 10:40:06 +01:00
enhanced frontend and added hidable load port
This commit is contained in:
@@ -30,6 +30,7 @@ class IbenchAPI(object):
|
||||
# TODO
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
# TODO
|
||||
# template_x86 = Template()
|
||||
template_aarch64 = Template(
|
||||
|
||||
@@ -4,8 +4,8 @@ import collections
|
||||
|
||||
from osaca.frontend import Frontend
|
||||
from osaca.parser import ParserAArch64v81, ParserX86ATT
|
||||
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, SemanticsAppender,
|
||||
reduce_to_section)
|
||||
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel,
|
||||
SemanticsAppender, reduce_to_section)
|
||||
|
||||
|
||||
class KerncraftAPI(object):
|
||||
@@ -21,16 +21,13 @@ class KerncraftAPI(object):
|
||||
def analyze_code(self, code):
|
||||
parsed_code = self.parser.parse_file(code)
|
||||
kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA())
|
||||
for i in range(len(kernel)):
|
||||
self.semantics.assign_src_dst(kernel[i])
|
||||
self.semantics.assign_tp_lt(kernel[i])
|
||||
self.semantics.add_semantics(kernel)
|
||||
return kernel
|
||||
|
||||
def create_output(self, kernel, show_lineno=False):
|
||||
def create_output(self, kernel, verbose=False):
|
||||
kernel_graph = KernelDG(kernel, self.parser, self.machine_model)
|
||||
frontend = Frontend(arch=self.machine_model.get_arch())
|
||||
frontend.print_throughput_analysis(kernel, show_lineno=show_lineno)
|
||||
frontend.print_latency_analysis(kernel_graph.get_critical_path())
|
||||
frontend.print_full_analysis(kernel, kernel_graph, verbose=verbose)
|
||||
|
||||
def get_unmatched_instruction_ratio(self, kernel):
|
||||
unmatched_counter = 0
|
||||
|
||||
@@ -2,6 +2,11 @@ osaca_version: 0.3.0
|
||||
micro_architecture: "Cascade Lake SP"
|
||||
arch_code: "CSX"
|
||||
isa: "x86"
|
||||
ROB_size: 224
|
||||
retired_uOps_per_cycle: 4
|
||||
scheduler_size: 97
|
||||
hidden_loads: false
|
||||
ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"]
|
||||
port_model_scheme: |
|
||||
┌------------------------------------------------------------------------┐
|
||||
| 97 entry unified scheduler |
|
||||
@@ -38,7 +43,6 @@ port_model_scheme: |
|
||||
| VNNI | ┌-------┐
|
||||
└-------┘ | VNNI |
|
||||
└-------┘
|
||||
ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"]
|
||||
instruction_forms:
|
||||
- name: addsd
|
||||
operands:
|
||||
|
||||
@@ -5,6 +5,7 @@ isa: "AArch64"
|
||||
ROB_size: 180
|
||||
retired_uOps_per_cycle: 4
|
||||
scheduler_size: 60
|
||||
hidden_loads: false
|
||||
ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"]
|
||||
port_model_scheme: |
|
||||
┌-----------------------------------------------------------┐
|
||||
|
||||
@@ -2,6 +2,8 @@ osaca_version: 0.3.0
|
||||
micro_architecture: "AMD Zen (family 17h)"
|
||||
arch_code: "ZEN1"
|
||||
isa: "x86"
|
||||
hidden_loads: true
|
||||
ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "8D", "9", "9D"]
|
||||
port_model_scheme: |
|
||||
┌--------------------------------------┐ ┌-----------------------------------------------┐
|
||||
| 96 entries OoO scheduler | | 84 entries OoO scheduler |
|
||||
@@ -22,7 +24,6 @@ port_model_scheme: |
|
||||
| SHUF | ┌-------------┐
|
||||
└-------┘ | STORE |
|
||||
└-------------┘
|
||||
ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "8D", "9", "9D"]
|
||||
instruction_forms:
|
||||
- name: add
|
||||
operands:
|
||||
@@ -102,6 +103,18 @@ instruction_forms:
|
||||
throughput: 0.0
|
||||
latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 8D 9 9D
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: movl
|
||||
operands:
|
||||
- class: "memory"
|
||||
base: "gpr"
|
||||
offset: "imd"
|
||||
index: ~
|
||||
scale: 1
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
throughput: 0.5
|
||||
latency: 3.0
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5]
|
||||
- name: mulsd
|
||||
operands:
|
||||
- class: "register"
|
||||
|
||||
@@ -2,14 +2,17 @@
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime as dt
|
||||
|
||||
from ruamel import yaml
|
||||
|
||||
from osaca.semantics import INSTR_FLAGS, SemanticsAppender
|
||||
import osaca
|
||||
from osaca.semantics import INSTR_FLAGS, KernelDG, SemanticsAppender
|
||||
|
||||
|
||||
class Frontend(object):
|
||||
def __init__(self, arch=None, path_to_yaml=None):
|
||||
def __init__(self, filename, arch=None, path_to_yaml=None):
|
||||
self._filename = filename
|
||||
if not arch and not path_to_yaml:
|
||||
raise ValueError('Either arch or path_to_yaml required.')
|
||||
if arch and path_to_yaml:
|
||||
@@ -43,13 +46,20 @@ class Frontend(object):
|
||||
return instruction_form['comment'] is not None and instruction_form['instruction'] is None
|
||||
|
||||
def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
|
||||
print()
|
||||
lineno_filler = ' ' if show_lineno else ''
|
||||
port_len = self._get_max_port_len(kernel)
|
||||
separator = '-' * sum([x + 3 for x in port_len]) + '-'
|
||||
separator += '--' + len(str(kernel[-1]['line_number'])) * '-' if show_lineno else ''
|
||||
col_sep = '|'
|
||||
sep_list = self._get_separator_list(col_sep)
|
||||
headline = 'Port pressure in cycles'
|
||||
headline_str = '{{:^{}}}'.format(len(separator))
|
||||
|
||||
print(
|
||||
'\n\nThroughput Analysis Report\n'
|
||||
+ '--------------------------'
|
||||
)
|
||||
print(headline_str.format(headline))
|
||||
print(lineno_filler + self._get_port_number_line(port_len))
|
||||
print(separator)
|
||||
for instruction_form in kernel:
|
||||
@@ -86,6 +96,7 @@ class Frontend(object):
|
||||
string_result = ''
|
||||
string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else ''
|
||||
string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else ''
|
||||
string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else ''
|
||||
# TODO add other flags
|
||||
string_result += ' ' if len(string_result) == 0 else ''
|
||||
return string_result
|
||||
@@ -120,7 +131,10 @@ class Frontend(object):
|
||||
return string_result
|
||||
|
||||
def print_latency_analysis(self, cp_kernel, separator='|'):
|
||||
print('\n\n------------------------')
|
||||
print(
|
||||
'\n\nLatency Analysis Report\n'
|
||||
+ '-----------------------'
|
||||
)
|
||||
for instruction_form in cp_kernel:
|
||||
print(
|
||||
'{:4d} {} {:4.1f} {}{}{} {}'.format(
|
||||
@@ -142,7 +156,10 @@ class Frontend(object):
|
||||
)
|
||||
|
||||
def print_loopcarried_dependencies(self, dep_tuplelist, separator='|'):
|
||||
print('\n\n------------------------')
|
||||
print(
|
||||
'\n\nLoop-Carried Dependencies Analysis Report\n'
|
||||
+ '-----------------------------------------'
|
||||
)
|
||||
for tup in dep_tuplelist:
|
||||
print(
|
||||
'{:4d} {} {:4.1f} {} {:36}{} {}'.format(
|
||||
@@ -161,11 +178,38 @@ class Frontend(object):
|
||||
)
|
||||
)
|
||||
|
||||
def print_list_summary(self):
|
||||
raise NotImplementedError
|
||||
def _print_header_report(self):
|
||||
version = osaca.osaca.get_version()
|
||||
adjust = 20
|
||||
header = ''
|
||||
header += 'Open Source Architecture Code Analyzer (OSACA) - {}\n'.format(version)
|
||||
header += 'Analyzed file:'.ljust(adjust) + '{}\n'.format(self._filename)
|
||||
header += 'Architecture:'.ljust(adjust) + '{}\n'.format(self._arch)
|
||||
header += 'Timestamp:'.ljust(adjust) + '{}\n'.format(
|
||||
dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
|
||||
)
|
||||
print(header)
|
||||
|
||||
def _print_header_throughput_report(self):
|
||||
raise NotImplementedError
|
||||
def _print_symbol_map(self):
|
||||
symbol_dict = {
|
||||
INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port',
|
||||
INSTR_FLAGS.TP_UNKWN: 'No throughput/latency information for this instruction in '
|
||||
+ 'data file',
|
||||
INSTR_FLAGS.HIDDEN_LD: 'Throughput of LOAD operation can be hidden behind a past '
|
||||
+ 'or future STORE instruction',
|
||||
}
|
||||
symbol_map = ''
|
||||
for flag in sorted(symbol_dict.keys()):
|
||||
symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]), symbol_dict[flag])
|
||||
|
||||
print(symbol_map, end='')
|
||||
|
||||
def _print_port_binding_summary(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False):
|
||||
self._print_header_report()
|
||||
self._print_symbol_map()
|
||||
self.print_throughput_analysis(kernel, show_lineno=True)
|
||||
self.print_latency_analysis(kernel_dg.get_critical_path())
|
||||
self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())
|
||||
|
||||
@@ -4,7 +4,8 @@ Collection of parsers supported by OSACA.
|
||||
Only the parser below will be exported, so please add new parsers to __all__.
|
||||
"""
|
||||
from .attr_dict import AttrDict
|
||||
from .base_parser import BaseParser
|
||||
from .parser_x86att import ParserX86ATT
|
||||
from .parser_AArch64v81 import ParserAArch64v81
|
||||
|
||||
__all__ = ['AttrDict', 'ParserX86ATT', 'ParserAArch64v81']
|
||||
__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64v81']
|
||||
|
||||
@@ -3,8 +3,7 @@
|
||||
|
||||
import pyparsing as pp
|
||||
|
||||
from .attr_dict import AttrDict
|
||||
from .base_parser import BaseParser
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
|
||||
|
||||
class ParserAArch64v81(BaseParser):
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
|
||||
import pyparsing as pp
|
||||
|
||||
from .attr_dict import AttrDict
|
||||
from .base_parser import BaseParser
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
|
||||
|
||||
class ParserX86ATT(BaseParser):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from ruamel import yaml
|
||||
|
||||
@@ -71,6 +72,37 @@ class MachineModel(object):
|
||||
def get_arch(self):
|
||||
return self._data['arch_code']
|
||||
|
||||
def get_ports(self):
|
||||
return self._data['ports']
|
||||
|
||||
def has_hidden_loads(self):
|
||||
if 'hidden_loads' in self._data:
|
||||
return self._data['hidden_loads']
|
||||
return False
|
||||
|
||||
def get_data_ports(self):
|
||||
data_port = re.compile(r'^[0-9]+D$')
|
||||
data_ports = [x for x in filter(data_port.match, self._data['ports'])]
|
||||
return data_ports
|
||||
|
||||
@staticmethod
|
||||
def get_isa_for_arch(arch):
|
||||
arch_dict = {
|
||||
'vulcan': 'aarch64',
|
||||
'zen1': 'x86',
|
||||
'snb': 'x86',
|
||||
'ivb': 'x86',
|
||||
'hsw': 'x86',
|
||||
'bdw': 'x86',
|
||||
'skl': 'x86',
|
||||
'skx': 'x86',
|
||||
'csx': 'x86',
|
||||
}
|
||||
arch = arch.lower()
|
||||
if arch in arch_dict:
|
||||
return arch_dict[arch].lower()
|
||||
return None
|
||||
|
||||
######################################################
|
||||
|
||||
def _check_for_duplicate(self, name, operands):
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import copy
|
||||
from itertools import chain, product
|
||||
|
||||
import networkx as nx
|
||||
from itertools import chain, product
|
||||
|
||||
from osaca.parser import AttrDict
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
@@ -5,8 +5,7 @@ import warnings
|
||||
from functools import reduce
|
||||
|
||||
from osaca.parser import AttrDict
|
||||
|
||||
from .hw_model import MachineModel
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
class INSTR_FLAGS:
|
||||
@@ -18,6 +17,8 @@ class INSTR_FLAGS:
|
||||
LT_UNKWN = 'lt_unkown'
|
||||
NOT_BOUND = 'not_bound'
|
||||
HIDDEN_LD = 'hidden_load'
|
||||
HAS_LD = 'performs_load'
|
||||
HAS_ST = 'performs_store'
|
||||
|
||||
|
||||
class SemanticsAppender(object):
|
||||
@@ -36,6 +37,50 @@ class SemanticsAppender(object):
|
||||
assert os.path.exists(name)
|
||||
return name
|
||||
|
||||
# SUMMARY FUNCTION
|
||||
def add_semantics(self, kernel):
|
||||
for instruction_form in kernel:
|
||||
self.assign_src_dst(instruction_form)
|
||||
self.assign_tp_lt(instruction_form)
|
||||
if self._machine_model.has_hidden_loads():
|
||||
self.set_hidden_loads(kernel)
|
||||
|
||||
def set_hidden_loads(self, kernel):
|
||||
loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr['flags']]
|
||||
stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr['flags']]
|
||||
# Filter instructions including load and store
|
||||
load_ids = [instr['line_number'] for instr in loads]
|
||||
store_ids = [instr['line_number'] for instr in stores]
|
||||
shared_ldst = list(set(load_ids).intersection(set(store_ids)))
|
||||
loads = [instr for instr in loads if instr['line_number'] not in shared_ldst]
|
||||
stores = [instr for instr in stores if instr['line_number'] not in shared_ldst]
|
||||
|
||||
if len(stores) == 0 or len(loads) == 0:
|
||||
# nothing to do
|
||||
return
|
||||
if len(loads) < len(stores):
|
||||
# Hide all loads
|
||||
for load in loads:
|
||||
load['flags'] += [INSTR_FLAGS.HIDDEN_LD]
|
||||
load['port_pressure'] = self._nullify_data_ports(load['port_pressure'])
|
||||
else:
|
||||
for store in stores:
|
||||
# Get 'closest' load instruction
|
||||
min_distance_load = min(
|
||||
[
|
||||
(
|
||||
abs(load_instr['line_number'] - store['line_number']),
|
||||
load_instr['line_number'],
|
||||
)
|
||||
for load_instr in loads
|
||||
if INSTR_FLAGS.HIDDEN_LD not in load_instr['flags']
|
||||
]
|
||||
)
|
||||
load = [instr for instr in kernel if instr['line_number'] == min_distance_load[1]][0]
|
||||
# Hide load
|
||||
load['flags'] += [INSTR_FLAGS.HIDDEN_LD]
|
||||
load['port_pressure'] = self._nullify_data_ports(load['port_pressure'])
|
||||
|
||||
# get parser result and assign throughput and latency value to instruction form
|
||||
# mark instruction form with semantic flags
|
||||
def assign_tp_lt(self, instruction_form):
|
||||
@@ -125,6 +170,37 @@ class SemanticsAppender(object):
|
||||
# store operand list in dict and reassign operand key/value pair
|
||||
op_dict['operand_list'] = operands
|
||||
instruction_form['operands'] = AttrDict.convert_dict(op_dict)
|
||||
# assign LD/ST flags
|
||||
instruction_form['flags'] = (
|
||||
instruction_form['flags'] if 'flags' in instruction_form else []
|
||||
)
|
||||
if self._has_load(instruction_form):
|
||||
instruction_form['flags'] += [INSTR_FLAGS.HAS_LD]
|
||||
if self._has_store(instruction_form):
|
||||
instruction_form['flags'] += [INSTR_FLAGS.HAS_ST]
|
||||
|
||||
def _nullify_data_ports(self, port_pressure):
|
||||
data_ports = self._machine_model.get_data_ports()
|
||||
for port in data_ports:
|
||||
index = self._machine_model.get_ports().index(port)
|
||||
port_pressure[index] = 0.0
|
||||
return port_pressure
|
||||
|
||||
def _has_load(self, instruction_form):
|
||||
for operand in (
|
||||
instruction_form['operands']['source'] + instruction_form['operands']['src_dst']
|
||||
):
|
||||
if 'memory' in operand:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _has_store(self, instruction_form):
|
||||
for operand in (
|
||||
instruction_form['operands']['destination'] + instruction_form['operands']['src_dst']
|
||||
):
|
||||
if 'memory' in operand:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_regular_source_operands(self, instruction_form):
|
||||
if self._isa == 'x86':
|
||||
|
||||
Reference in New Issue
Block a user