Merge branch 'master' of github.com:RRZE-HPC/OSACA

This commit is contained in:
Julian Hammer
2019-11-13 12:52:34 +01:00
11 changed files with 85824 additions and 57 deletions

View File

@@ -92,7 +92,7 @@ The usage of OSACA can be listed as:
Keep in mind you have to provide a (dummy) filename in anyway.
--import MICROBENCH
Import a given microbenchmark output file into the corresponding architecture instruction database.
Define the type of microbenchmark either as "ibench", "asmbench" or "uopsinfo".
Define the type of microbenchmark either as "ibench" or "asmbench".
--insert-marker
OSACA calls the Kerncraft module for the interactively insertion of `IACA <https://software.intel.com/en-us/articles/intel-architecture-code-analyzer>`_ marker in suggested assembly blocks.
--export-graph EXPORT_PATH

14084
osaca/data/bdw.yml Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -27,12 +27,12 @@ port_model_scheme: |
| ALU | | ALU | | LD | | LD | | ST | | ALU | | ALU & | | AGU |
└-------┘ └-------┘ └-----┘ └-----┘ └-----┘ └-------┘ | Shift | └-----┘
┌-------┐ ┌-------┐ ┌-----┐ ┌-----┐ ┌-------┐ └--------┘
| 2ND | | Fast | | AGU | | AGU | | Fast |
| BRANCH| | LEA | └-----┘ └-----┘ | LEA |
└-------┘ └-------┘ └-------┘
┌-------┐ ┌-------┐ ┌-------┐
|AVX DIV| |AVX FMA| | AVX |
└-------┘ └-------┘ | SHUF |
| 2ND | | Fast | | AGU | | AGU | | Fast | ┌--------┐
| BRANCH| | LEA | └-----┘ └-----┘ | LEA | | BRANCH |
└-------┘ └-------┘ └-------┘ └--------┘
┌-------┐ ┌-------┐ ┌-------┐
|AVX DIV| |AVX FMA| | AVX |
└-------┘ └-------┘ | SHUF |
┌-------┐ ┌-------┐ └-------┘
|AVX FMA| |AVX MUL| ┌-------┐
└-------┘ └-------┘ |AVX-512|
@@ -50,9 +50,9 @@ port_model_scheme: |
| Shift | | Slow | |AVX-512|
└-------┘ | LEA | | ALU |
┌-------┐ └-------┘ └-------┘
| VNNI | ┌-------┐
└-------┘ | VNNI |
└-------┘
| VNNI | ┌-------┐
└-------┘ | VNNI |
└-------┘
instruction_forms:
- name: addsd
operands:

13756
osaca/data/hsw.yml Normal file

File diff suppressed because it is too large Load Diff

10482
osaca/data/ivb.yml Normal file

File diff suppressed because it is too large Load Diff

36883
osaca/data/skx.yml Normal file

File diff suppressed because it is too large Load Diff

10433
osaca/data/snb.yml Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -11,6 +11,15 @@ from osaca.semantics import MachineModel
def sanity_check(arch: str, verbose=False):
"""
Checks the database for missing TP/LT values, instructions might missing int the ISA DB and
duplicate instructions.
:param arch: micro-arch key to define DB to check
:type arch: str
:param verbose: verbose output flag, defaults to `False`
:type verbose: bool, optional
"""
# load arch machine model
arch_mm = MachineModel(arch=arch)
data = arch_mm['instruction_forms']
@@ -24,7 +33,6 @@ def sanity_check(arch: str, verbose=False):
missing_throughput,
missing_latency,
missing_port_pressure,
wrong_port,
suspicious_instructions,
duplicate_instr_arch,
) = _check_sanity_arch_db(arch_mm, isa_mm)
@@ -36,7 +44,6 @@ def sanity_check(arch: str, verbose=False):
missing_throughput,
missing_latency,
missing_port_pressure,
wrong_port,
suspicious_instructions,
duplicate_instr_arch,
duplicate_instr_isa,
@@ -46,6 +53,16 @@ def sanity_check(arch: str, verbose=False):
def import_benchmark_output(arch, bench_type, filepath):
"""
Import benchmark results from micro-benchmarks.
:param arch: target architecture key
:type arch: str
:param bench_type: key for defining type of benchmark output
:type bench_type: str
:param filepath: filepath to the output file
:type filepath: str
"""
supported_bench_outputs = ['ibench', 'asmbench']
assert os.path.exists(filepath)
if bench_type not in supported_bench_outputs:
@@ -120,6 +137,7 @@ def _get_asmbench_output(input_data, isa):
def _get_ibench_output(input_data, isa):
"""Parse the standard output of ibench and add instructions to DB."""
db_entries = {}
for line in input_data:
if 'Using frequency' in line or len(line) == 0:
@@ -242,7 +260,6 @@ def _check_sanity_arch_db(arch_mm, isa_mm):
missing_throughput = []
missing_latency = []
missing_port_pressure = []
wrong_port = []
suspicious_instructions = []
duplicate_instr_arch = []
@@ -254,12 +271,9 @@ def _check_sanity_arch_db(arch_mm, isa_mm):
missing_latency.append(instr_form)
if instr_form['port_pressure'] is None:
missing_port_pressure.append(instr_form)
else:
if _check_for_wrong_port(arch_mm['ports'], instr_form):
wrong_port.append(instr_form)
# check entry against ISA DB
for prefix in suspicious_prefixes:
if instr_form['name'].startswith(prefix):
if instr_form['name'].lower().startswith(prefix):
# check if instruction in ISA DB
if isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
# if not, mark them as suspicious and print it on the screen
@@ -278,20 +292,11 @@ def _check_sanity_arch_db(arch_mm, isa_mm):
missing_throughput,
missing_latency,
missing_port_pressure,
wrong_port,
suspicious_instructions,
duplicate_instr_arch,
)
def _check_for_wrong_port(port_list, instr_form):
for cycles, ports in instr_form['port_pressure']:
for p in ports:
if p not in port_list:
return False
return True
def _check_sanity_isa_db(arch_mm, isa_mm):
# returned lists
duplicate_instr_isa = []
@@ -316,7 +321,7 @@ def _check_sanity_isa_db(arch_mm, isa_mm):
def _print_sanity_report(
total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False
):
# non-verbose summary
print('SUMMARY\n----------------------')
@@ -335,11 +340,6 @@ def _print_sanity_report(
round(100 * len(m_pp) / total), len(m_pp), total
)
)
print(
'{}% ({}/{}) of instruction forms have an invalid port identifier.'.format(
round(100 * len(wrong_pp) / total), len(wrong_pp), total
)
)
print(
'{}% ({}/{}) of instruction forms might miss an ISA DB entry.'.format(
round(100 * len(suspic_instr) / total), len(suspic_instr), total
@@ -355,12 +355,12 @@ def _print_sanity_report(
# verbose version
if verbose:
_print_sanity_report_verbose(
total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
)
def _print_sanity_report_verbose(
total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
):
BRIGHT_CYAN = '\033[1;36;1m'
BRIGHT_BLUE = '\033[1;34;1m'
@@ -382,14 +382,6 @@ def _print_sanity_report_verbose(
)
for instr_form in m_pp:
print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms with invalid port identifiers in port pressure:\n'
if len(wrong_pp) != 0
else '',
end='',
)
for instr_form in wrong_pp:
print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms which might miss an ISA DB entry:\n' if len(suspic_instr) != 0 else '',
end='',

View File

@@ -1,5 +1,7 @@
#!/usr/bin/env python3
"""
Frontend interface for OSACA. Does everything necessary for printing analysis to the terminal.
"""
import re
from datetime import datetime as dt
@@ -8,6 +10,16 @@ from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel
class Frontend(object):
def __init__(self, filename='', arch=None, path_to_yaml=None):
"""
Constructor method.
:param filename: path to the analyzed kernel file for documentation, defaults to ''
:type filename: str, optional
:param arch: micro-arch code for getting the machine model, defaults to None
:type arch: str, optional
:param path_to_yaml: path to the YAML file for getting the machine model, defaults to None
:type path_to_yaml: str, optional
"""
self._filename = filename
if not arch and not path_to_yaml:
raise ValueError('Either arch or path_to_yaml required.')
@@ -22,9 +34,25 @@ class Frontend(object):
self._arch = self._machine_model.get_arch()
def _is_comment(self, instruction_form):
"""
Checks if instruction form is a comment-only line.
:param instruction_form: instruction form as dict
:returns: `True` if comment line, `False` otherwise
"""
return instruction_form['comment'] is not None and instruction_form['instruction'] is None
def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
"""
Print throughput analysis only.
:param kernel: Kernel to print throughput analysis for.
:type kernel: list
:param show_lineno: flag for showing the line number of instructions, defaults to `False`
:type show_lineno: bool, optional
:param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
:type show_cmnts: bool, optional
"""
lineno_filler = ' ' if show_lineno else ''
port_len = self._get_max_port_len(kernel)
separator = '-' * sum([x + 3 for x in port_len]) + '-'
@@ -58,6 +86,14 @@ class Frontend(object):
print(lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' '))
def print_latency_analysis(self, cp_kernel, separator='|'):
"""
Print a list-based CP analysis to the terminal.
:param cp_kernel: loop kernel containing the CP information for each instruction form
:type cp_kernel: list
:separator: separator symbol for the columns, defaults to '|'
:type separator: str, optional
"""
print('\n\nLatency Analysis Report\n' + '-----------------------')
for instruction_form in cp_kernel:
print(
@@ -80,6 +116,14 @@ class Frontend(object):
)
def print_loopcarried_dependencies(self, dep_dict, separator='|'):
"""
Print a list-based LCD analysis to the terminal.
:param dep_dict: dictionary with first instruction in LCD as key and the deps as value
:type dep_dict: dict
:separator: separator symbol for the columns, defaults to '|'
:type separator: str, optional
"""
print(
'\n\nLoop-Carried Dependencies Analysis Report\n'
+ '-----------------------------------------'
@@ -101,6 +145,17 @@ class Frontend(object):
)
def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False):
"""
Prints the full analysis report including header, the symbol map, the combined TP/CP/LCD
view and the list based LCD view.
:param kernel: kernel to print
:type kernel: list
:param kernel_dg: directed graph containing CP and LCD
:type kernel_dg: :class:`~osaca.semantics.KernelDG`
:param verbose: verbose output flag, defaults to `False`
:type verbose: bool, optional
"""
self._print_header_report()
self._print_symbol_map()
self.print_combined_view(
@@ -109,9 +164,20 @@ class Frontend(object):
self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())
def print_combined_view(self, kernel, cp_kernel: KernelDG, dep_dict, show_cmnts=True):
self._print_header_report()
self._print_symbol_map()
print('\n\nCombined Analysis Report\n' + '-----------------------')
"""
Prints the combined view of the kernel including the port pressure (TP), a CP column and a
LCD column.
:param kernel: kernel to print
:type kernel: list
:param kernel_dg: directed graph containing CP and LCD
:type kernel_dg: :class:`~osaca.semantics.KernelDG`
:param dep_dict: dictionary with first instruction in LCD as key and the deps as value
:type dep_dict: dict
:param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
:type show_cmnts: bool, optional
"""
print('\n\nCombined Analysis Report\n' + '------------------------')
lineno_filler = ' '
port_len = self._get_max_port_len(kernel)
# Separator for ports
@@ -180,6 +246,7 @@ class Frontend(object):
####################
def _get_separator_list(self, separator, separator_2=' '):
"""Creates column view for seperators in the TP/combined view."""
separator_list = []
for i in range(len(self._machine_model.get_ports()) - 1):
match_1 = re.search(r'\d+', self._machine_model.get_ports()[i])
@@ -192,6 +259,7 @@ class Frontend(object):
return separator_list
def _get_flag_symbols(self, flag_obj):
"""Returns flags for a flag object of an instruction"""
string_result = ''
string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else ''
string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else ''
@@ -201,6 +269,7 @@ class Frontend(object):
return string_result
def _get_port_pressure(self, ports, port_len, used_ports=[], separator='|'):
"""Returns line of port pressure for an instruction."""
if not isinstance(separator, list):
separator = [separator for x in ports]
string_result = '{} '.format(separator[-1])
@@ -214,20 +283,23 @@ class Frontend(object):
return string_result[:-1]
def _get_node_by_lineno(self, lineno, kernel):
"""Returns instruction form from kernel by its line number."""
nodes = [instr for instr in kernel if instr['line_number'] == lineno]
return nodes[0] if len(nodes) > 0 else None
def _get_lcd_cp_ports(self, line_number, cp_dg, dependency, separator='|'):
"""Returns the CP and LCD line for one instruction."""
lat_cp = lat_lcd = ''
if cp_dg:
lat_cp = self._get_node_by_lineno(line_number, cp_dg)['latency_cp']
lat_cp = float(self._get_node_by_lineno(line_number, cp_dg)['latency_cp'])
if dependency:
lat_lcd = self._get_node_by_lineno(line_number, dependency['dependencies'])[
'latency_lcd'
]
lat_lcd = float(
self._get_node_by_lineno(line_number, dependency['dependencies'])['latency_lcd']
)
return '{} {:>4} {} {:>4} {}'.format(separator, lat_cp, separator, lat_lcd, separator)
def _get_max_port_len(self, kernel):
"""Returns the maximal length needed to print all throughputs of the kernel."""
port_len = [4 for x in self._machine_model.get_ports()]
for instruction_form in kernel:
for i, port in enumerate(instruction_form['port_pressure']):
@@ -236,6 +308,7 @@ class Frontend(object):
return port_len
def _get_port_number_line(self, port_len, separator='|'):
"""Returns column view of port identificators of machine_model."""
string_result = separator
separator_list = self._get_separator_list(separator, '-')
for i, length in enumerate(port_len):
@@ -244,6 +317,7 @@ class Frontend(object):
return string_result
def _print_header_report(self):
"""Prints header information"""
version = 'v0.3'
adjust = 20
header = ''
@@ -256,6 +330,7 @@ class Frontend(object):
print(header)
def _print_symbol_map(self):
"""Prints instruction flag map."""
symbol_dict = {
INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port',
INSTR_FLAGS.TP_UNKWN: 'No throughput/latency information for this instruction in '

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""CLI for OSACA"""
import argparse
import io
import os
@@ -22,6 +22,7 @@ DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
# Stolen from pip
def __read(*names, **kwargs):
"""Reads in file"""
with io.open(
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
) as fp:
@@ -30,6 +31,7 @@ def __read(*names, **kwargs):
# Stolen from pip
def __find_version(*file_paths):
"""Searches for a version attribute in the given file(s)"""
version_file = __read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
if version_match:
@@ -38,11 +40,20 @@ def __find_version(*file_paths):
def get_version():
"""
Gets the current OSACA version stated in the __init__ file
:returns: str -- the version string.
"""
return __find_version('__init__.py')
def create_parser():
"""Return argparse parser."""
"""
Return argparse parser.
:returns: The newly created :class:`~Argparse.ArgumentParser` object.
"""
# Create parser
parser = argparse.ArgumentParser(
description='Analyzes a marked innermost loop snippet for a given architecture type.',
@@ -79,8 +90,8 @@ def create_parser():
type=str,
default=argparse.SUPPRESS,
help='Import a given microbenchmark output file into the corresponding architecture '
'instruction database. Define the type of microbenchmark either as "ibench", '
'"asmbench" or "uopsinfo".',
'instruction database. Define the type of microbenchmark either as "ibench" or '
'"asmbench".',
)
parser.add_argument(
'--insert-marker',
@@ -109,9 +120,14 @@ def create_parser():
def check_arguments(args, parser):
"""Check arguments passed by user that are not checked by argparse itself."""
"""
Check arguments passed by user that are not checked by argparse itself.
:param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
:param parser: :class:`~argparse.ArgumentParser` object
"""
supported_archs = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'TX2']
supported_import_files = ['ibench', 'asmbench', 'uopsinfo']
supported_import_files = ['ibench', 'asmbench']
if 'arch' in args and args.arch.upper() not in supported_archs:
parser.error(
@@ -125,6 +141,10 @@ def check_arguments(args, parser):
def check_user_dir():
"""
Creates user directory if it does not exist and copies all not already existing YAML files
into it.
"""
# Check if data files are already in usr dir, otherwise create them
if not os.path.isdir(DATA_DIR):
os.makedirs(DATA_DIR)
@@ -134,15 +154,30 @@ def check_user_dir():
def import_data(benchmark_type, arch, filepath):
"""
Imports benchmark results from micro-benchmarks.
:param benchmark_type: key for defining type of benchmark output
:type benchmark_type: str
:param arch: target architecture to put the data into the right database
:type arch: str
:param filepath: filepath of the output file"
:type filepath: str
"""
if benchmark_type.lower() == 'ibench':
import_benchmark_output(arch, 'ibench', filepath)
elif benchmark_type.lower() == 'asmbench':
import_benchmark_output(arch, 'asmbench', filepath)
else:
raise NotImplementedError('This benchmark input variant is not implemented yet.')
raise NotImplementedError('This benchmark input variant is not supported.')
def insert_byte_marker(args):
"""
Inserts byte markers into an assembly file using kerncraft.
:param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
"""
if MachineModel.get_isa_for_arch(args.arch) != 'x86':
print('Marker insertion for non-x86 is not yet supported by Kerncraft.', file=sys.stderr)
sys.exit(1)
@@ -174,6 +209,12 @@ def insert_byte_marker(args):
def inspect(args):
"""
Does the actual throughput and critical path analysis of OSACA and prints it to the
terminal.
:param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
"""
arch = args.arch
isa = MachineModel.get_isa_for_arch(arch)
verbose = args.verbose
@@ -203,6 +244,12 @@ def inspect(args):
def run(args, output_file=sys.stdout):
"""
Main entry point for OSACAs workflow. Decides whether to run an analysis or other things.
:param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
:param output_file: Define the stream for output, defaults to :class:`sys.stdout`
"""
if args.check_db:
# Sanity check on DB
verbose = True if args.verbose > 0 else False
@@ -220,6 +267,13 @@ def run(args, output_file=sys.stdout):
# ---------------------------------------------------
def get_asm_parser(arch) -> BaseParser:
"""
Helper function to create the right parser for a specific architecture.
:param arch: architecture code
:type arch: str
:returns: :class:`~osaca.parser.BaseParser` object
"""
isa = MachineModel.get_isa_for_arch(arch)
if isa == 'x86':
return ParserX86ATT()

View File

@@ -125,6 +125,14 @@ class ArchSemantics(ISASemantics):
instruction_data = self._machine_model.get_instruction(
instruction_form['instruction'], instruction_form['operands']
)
if (
not instruction_data
and self._isa == 'x86'
and instruction_form['instruction'][-1] in 'bwlq'
):
instruction_data = self._machine_model.get_instruction(
instruction_form['instruction'][:-1], instruction_form['operands']
)
if instruction_data:
# instruction form in DB
throughput = instruction_data['throughput']