enhanced frontend and added hidable load port

This commit is contained in:
JanLJL
2019-08-29 14:03:16 +02:00
parent 5395d4eadc
commit 2d97c1c09a
12 changed files with 194 additions and 27 deletions

View File

@@ -30,6 +30,7 @@ class IbenchAPI(object):
# TODO
raise NotImplementedError
# TODO
# template_x86 = Template()
template_aarch64 = Template(

View File

@@ -4,8 +4,8 @@ import collections
from osaca.frontend import Frontend
from osaca.parser import ParserAArch64v81, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, SemanticsAppender,
reduce_to_section)
from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel,
SemanticsAppender, reduce_to_section)
class KerncraftAPI(object):
@@ -21,16 +21,13 @@ class KerncraftAPI(object):
def analyze_code(self, code):
parsed_code = self.parser.parse_file(code)
kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA())
for i in range(len(kernel)):
self.semantics.assign_src_dst(kernel[i])
self.semantics.assign_tp_lt(kernel[i])
self.semantics.add_semantics(kernel)
return kernel
def create_output(self, kernel, show_lineno=False):
def create_output(self, kernel, verbose=False):
kernel_graph = KernelDG(kernel, self.parser, self.machine_model)
frontend = Frontend(arch=self.machine_model.get_arch())
frontend.print_throughput_analysis(kernel, show_lineno=show_lineno)
frontend.print_latency_analysis(kernel_graph.get_critical_path())
frontend.print_full_analysis(kernel, kernel_graph, verbose=verbose)
def get_unmatched_instruction_ratio(self, kernel):
unmatched_counter = 0

View File

@@ -2,6 +2,11 @@ osaca_version: 0.3.0
micro_architecture: "Cascade Lake SP"
arch_code: "CSX"
isa: "x86"
ROB_size: 224
retired_uOps_per_cycle: 4
scheduler_size: 97
hidden_loads: false
ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"]
port_model_scheme: |
┌------------------------------------------------------------------------┐
| 97 entry unified scheduler |
@@ -38,7 +43,6 @@ port_model_scheme: |
| VNNI | ┌-------┐
└-------┘ | VNNI |
└-------┘
ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"]
instruction_forms:
- name: addsd
operands:

View File

@@ -5,6 +5,7 @@ isa: "AArch64"
ROB_size: 180
retired_uOps_per_cycle: 4
scheduler_size: 60
hidden_loads: false
ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"]
port_model_scheme: |
┌-----------------------------------------------------------┐

View File

@@ -2,6 +2,8 @@ osaca_version: 0.3.0
micro_architecture: "AMD Zen (family 17h)"
arch_code: "ZEN1"
isa: "x86"
hidden_loads: true
ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "8D", "9", "9D"]
port_model_scheme: |
┌--------------------------------------┐ ┌-----------------------------------------------┐
| 96 entries OoO scheduler | | 84 entries OoO scheduler |
@@ -22,7 +24,6 @@ port_model_scheme: |
| SHUF | ┌-------------┐
└-------┘ | STORE |
└-------------┘
ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "8D", "9", "9D"]
instruction_forms:
- name: add
operands:
@@ -102,6 +103,18 @@ instruction_forms:
throughput: 0.0
latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 8D 9 9D
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
- name: movl
operands:
- class: "memory"
base: "gpr"
offset: "imd"
index: ~
scale: 1
- class: "register"
name: "gpr"
throughput: 0.5
latency: 3.0
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5]
- name: mulsd
operands:
- class: "register"

View File

@@ -2,14 +2,17 @@
import os
import re
from datetime import datetime as dt
from ruamel import yaml
from osaca.semantics import INSTR_FLAGS, SemanticsAppender
import osaca
from osaca.semantics import INSTR_FLAGS, KernelDG, SemanticsAppender
class Frontend(object):
def __init__(self, arch=None, path_to_yaml=None):
def __init__(self, filename, arch=None, path_to_yaml=None):
self._filename = filename
if not arch and not path_to_yaml:
raise ValueError('Either arch or path_to_yaml required.')
if arch and path_to_yaml:
@@ -43,13 +46,20 @@ class Frontend(object):
return instruction_form['comment'] is not None and instruction_form['instruction'] is None
def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
print()
lineno_filler = ' ' if show_lineno else ''
port_len = self._get_max_port_len(kernel)
separator = '-' * sum([x + 3 for x in port_len]) + '-'
separator += '--' + len(str(kernel[-1]['line_number'])) * '-' if show_lineno else ''
col_sep = '|'
sep_list = self._get_separator_list(col_sep)
headline = 'Port pressure in cycles'
headline_str = '{{:^{}}}'.format(len(separator))
print(
'\n\nThroughput Analysis Report\n'
+ '--------------------------'
)
print(headline_str.format(headline))
print(lineno_filler + self._get_port_number_line(port_len))
print(separator)
for instruction_form in kernel:
@@ -86,6 +96,7 @@ class Frontend(object):
string_result = ''
string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else ''
string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else ''
string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else ''
# TODO add other flags
string_result += ' ' if len(string_result) == 0 else ''
return string_result
@@ -120,7 +131,10 @@ class Frontend(object):
return string_result
def print_latency_analysis(self, cp_kernel, separator='|'):
print('\n\n------------------------')
print(
'\n\nLatency Analysis Report\n'
+ '-----------------------'
)
for instruction_form in cp_kernel:
print(
'{:4d} {} {:4.1f} {}{}{} {}'.format(
@@ -142,7 +156,10 @@ class Frontend(object):
)
def print_loopcarried_dependencies(self, dep_tuplelist, separator='|'):
print('\n\n------------------------')
print(
'\n\nLoop-Carried Dependencies Analysis Report\n'
+ '-----------------------------------------'
)
for tup in dep_tuplelist:
print(
'{:4d} {} {:4.1f} {} {:36}{} {}'.format(
@@ -161,11 +178,38 @@ class Frontend(object):
)
)
def print_list_summary(self):
raise NotImplementedError
def _print_header_report(self):
version = osaca.osaca.get_version()
adjust = 20
header = ''
header += 'Open Source Architecture Code Analyzer (OSACA) - {}\n'.format(version)
header += 'Analyzed file:'.ljust(adjust) + '{}\n'.format(self._filename)
header += 'Architecture:'.ljust(adjust) + '{}\n'.format(self._arch)
header += 'Timestamp:'.ljust(adjust) + '{}\n'.format(
dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
)
print(header)
def _print_header_throughput_report(self):
raise NotImplementedError
def _print_symbol_map(self):
symbol_dict = {
INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port',
INSTR_FLAGS.TP_UNKWN: 'No throughput/latency information for this instruction in '
+ 'data file',
INSTR_FLAGS.HIDDEN_LD: 'Throughput of LOAD operation can be hidden behind a past '
+ 'or future STORE instruction',
}
symbol_map = ''
for flag in sorted(symbol_dict.keys()):
symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]), symbol_dict[flag])
print(symbol_map, end='')
def _print_port_binding_summary(self):
raise NotImplementedError
def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False):
self._print_header_report()
self._print_symbol_map()
self.print_throughput_analysis(kernel, show_lineno=True)
self.print_latency_analysis(kernel_dg.get_critical_path())
self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())

View File

@@ -4,7 +4,8 @@ Collection of parsers supported by OSACA.
Only the parser below will be exported, so please add new parsers to __all__.
"""
from .attr_dict import AttrDict
from .base_parser import BaseParser
from .parser_x86att import ParserX86ATT
from .parser_AArch64v81 import ParserAArch64v81
__all__ = ['AttrDict', 'ParserX86ATT', 'ParserAArch64v81']
__all__ = ['AttrDict', 'BaseParser', 'ParserX86ATT', 'ParserAArch64v81']

View File

@@ -3,8 +3,7 @@
import pyparsing as pp
from .attr_dict import AttrDict
from .base_parser import BaseParser
from osaca.parser import AttrDict, BaseParser
class ParserAArch64v81(BaseParser):

View File

@@ -2,8 +2,7 @@
import pyparsing as pp
from .attr_dict import AttrDict
from .base_parser import BaseParser
from osaca.parser import AttrDict, BaseParser
class ParserX86ATT(BaseParser):

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import os
import re
from ruamel import yaml
@@ -71,6 +72,37 @@ class MachineModel(object):
def get_arch(self):
return self._data['arch_code']
def get_ports(self):
return self._data['ports']
def has_hidden_loads(self):
if 'hidden_loads' in self._data:
return self._data['hidden_loads']
return False
def get_data_ports(self):
data_port = re.compile(r'^[0-9]+D$')
data_ports = [x for x in filter(data_port.match, self._data['ports'])]
return data_ports
@staticmethod
def get_isa_for_arch(arch):
arch_dict = {
'vulcan': 'aarch64',
'zen1': 'x86',
'snb': 'x86',
'ivb': 'x86',
'hsw': 'x86',
'bdw': 'x86',
'skl': 'x86',
'skx': 'x86',
'csx': 'x86',
}
arch = arch.lower()
if arch in arch_dict:
return arch_dict[arch].lower()
return None
######################################################
def _check_for_duplicate(self, name, operands):

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python3
import copy
from itertools import chain, product
import networkx as nx
from itertools import chain, product
from osaca.parser import AttrDict
from osaca.semantics import MachineModel

View File

@@ -5,8 +5,7 @@ import warnings
from functools import reduce
from osaca.parser import AttrDict
from .hw_model import MachineModel
from osaca.semantics import MachineModel
class INSTR_FLAGS:
@@ -18,6 +17,8 @@ class INSTR_FLAGS:
LT_UNKWN = 'lt_unkown'
NOT_BOUND = 'not_bound'
HIDDEN_LD = 'hidden_load'
HAS_LD = 'performs_load'
HAS_ST = 'performs_store'
class SemanticsAppender(object):
@@ -36,6 +37,50 @@ class SemanticsAppender(object):
assert os.path.exists(name)
return name
# SUMMARY FUNCTION
def add_semantics(self, kernel):
for instruction_form in kernel:
self.assign_src_dst(instruction_form)
self.assign_tp_lt(instruction_form)
if self._machine_model.has_hidden_loads():
self.set_hidden_loads(kernel)
def set_hidden_loads(self, kernel):
loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr['flags']]
stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr['flags']]
# Filter instructions including load and store
load_ids = [instr['line_number'] for instr in loads]
store_ids = [instr['line_number'] for instr in stores]
shared_ldst = list(set(load_ids).intersection(set(store_ids)))
loads = [instr for instr in loads if instr['line_number'] not in shared_ldst]
stores = [instr for instr in stores if instr['line_number'] not in shared_ldst]
if len(stores) == 0 or len(loads) == 0:
# nothing to do
return
if len(loads) < len(stores):
# Hide all loads
for load in loads:
load['flags'] += [INSTR_FLAGS.HIDDEN_LD]
load['port_pressure'] = self._nullify_data_ports(load['port_pressure'])
else:
for store in stores:
# Get 'closest' load instruction
min_distance_load = min(
[
(
abs(load_instr['line_number'] - store['line_number']),
load_instr['line_number'],
)
for load_instr in loads
if INSTR_FLAGS.HIDDEN_LD not in load_instr['flags']
]
)
load = [instr for instr in kernel if instr['line_number'] == min_distance_load[1]][0]
# Hide load
load['flags'] += [INSTR_FLAGS.HIDDEN_LD]
load['port_pressure'] = self._nullify_data_ports(load['port_pressure'])
# get parser result and assign throughput and latency value to instruction form
# mark instruction form with semantic flags
def assign_tp_lt(self, instruction_form):
@@ -125,6 +170,37 @@ class SemanticsAppender(object):
# store operand list in dict and reassign operand key/value pair
op_dict['operand_list'] = operands
instruction_form['operands'] = AttrDict.convert_dict(op_dict)
# assign LD/ST flags
instruction_form['flags'] = (
instruction_form['flags'] if 'flags' in instruction_form else []
)
if self._has_load(instruction_form):
instruction_form['flags'] += [INSTR_FLAGS.HAS_LD]
if self._has_store(instruction_form):
instruction_form['flags'] += [INSTR_FLAGS.HAS_ST]
def _nullify_data_ports(self, port_pressure):
data_ports = self._machine_model.get_data_ports()
for port in data_ports:
index = self._machine_model.get_ports().index(port)
port_pressure[index] = 0.0
return port_pressure
def _has_load(self, instruction_form):
for operand in (
instruction_form['operands']['source'] + instruction_form['operands']['src_dst']
):
if 'memory' in operand:
return True
return False
def _has_store(self, instruction_form):
for operand in (
instruction_form['operands']['destination'] + instruction_form['operands']['src_dst']
):
if 'memory' in operand:
return True
return False
def _get_regular_source_operands(self, instruction_form):
if self._isa == 'x86':