more interfaces

2026-01-05 10:40:06 +01:00 · 2019-08-20 18:50:57 +02:00
parent e468db4a0d
commit de2ba87d6b
7 changed files with 507 additions and 43 deletions
--- a/osaca/api/asmbench_interface.py
+++ b/osaca/api/asmbench_interface.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+import os
+
+
+class AsmBenchAPI(object):
+    def __init__(self, isa):
+        # TODO
+        self.isa = isa.lower()
+
+    def create_ubenchmark(self):
+        # TODO
+        if self.isa == 'aarch64':
+            self.create_ubench_aarch64()
+        elif self.isa == 'x86':
+            self.create_ubench_x86()
+
+    def import_asmbench_output(self, filepath):
+        # TODO
+        assert os.path.exists(filepath)
+        raise NotImplementedError
+
+    def create_ubench_aarch(self):
+        # TODO
+        raise NotImplementedError
+
+    def create_ubench_x86(self):
+        # TODO
+        raise NotImplementedError
--- a/osaca/api/db_interface.py
+++ b/osaca/api/db_interface.py
@@ -40,6 +40,44 @@ def add_entry_to_db(arch: str, entry):
    __dump_data_to_yaml(filepath, data)


+def add_entries_to_db(arch: str, entries: list) -> None:
+    """Adds entries to the user database in ~/.osaca/data
+
+    Args:
+        arch: string representation of the architecture as abbreviation.
+            Database for this architecture must already exist.
+        entries: :class:`list` of DB entries which will be added. Should consist at best out of
+            'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
+            'throughput', 'latency', 'port_pressure', 'uops'.
+    """
+    # load yaml
+    arch = arch.lower()
+    filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
+    assert os.path.exists(filepath)
+    with open(filepath, 'r') as f:
+        data = yaml.load(f, Loader=yaml.Loader)
+    # check parameter of entry and append it to list
+    for entry in entries:
+        if 'name' not in entry:
+            print(
+                'No name for instruction \n\t{}\nspecified. No import possible'.format(entry),
+                file=sys.stderr,
+            )
+            # remove entry from list
+            entries.remove(entry)
+            continue
+        if 'operands' not in entry:
+            entry['operands'] = None
+        if 'throughput' not in entry:
+            entry['throughput'] = None
+        if 'latency' not in entry:
+            entry['latency'] = None
+        if 'port_pressure' not in entry:
+            entry['port_pressure'] = None
+        data['instruction_forms'].append(entry)
+    __dump_data_to_yaml(filepath, data)
+
+
 def sanity_check(arch: str, verbose=False):
    # load arch machine model
    arch_mm = MachineModel(arch=arch)
@@ -245,44 +283,6 @@ def _get_full_instruction_name(instruction_form):
    return '{}  {}'.format(instruction_form['name'], ','.join(operands))


-def add_entries_to_db(arch: str, entries: list) -> None:
-    """Adds entries to the user database in ~/.osaca/data
-
-    Args:
-        arch: string representation of the architecture as abbreviation.
-            Database for this architecture must already exist.
-        entries: :class:`list` of DB entries which will be added. Should consist at best out of
-            'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
-            'throughput', 'latency', 'port_pressure'.
-    """
-    # load yaml
-    arch = arch.lower()
-    filepath = os.path.join(os.path.expanduser('~/.osaca/data/', arch + '.yml'))
-    assert os.path.exists(filepath)
-    with open(filepath, 'r') as f:
-        data = yaml.load(f, Loader=yaml.Loader)
-    # check parameter of entry and append it to list
-    for entry in entries:
-        if 'name' not in entry:
-            print(
-                'No name for instruction \n\t{}\nspecified. No import possible'.format(entry),
-                file=sys.stderr,
-            )
-            # remove entry from list
-            entries.remove(entry)
-            continue
-        if 'operands' not in entry:
-            entry['operands'] = None
-        if 'throughput' not in entry:
-            entry['throughput'] = None
-        if 'latency' not in entry:
-            entry['latency'] = None
-        if 'port_pressure' not in entry:
-            entry['port_pressure'] = None
-        data['instruction_forms'].append(entry)
-    __dump_data_to_yaml(filepath, data)
-
-
 def __dump_data_to_yaml(filepath, data):
    # first add 'normal' meta data in the right order (no ordered dict yet)
    meta_data = dict(data)
--- a/osaca/api/ibench_interface.py
+++ b/osaca/api/ibench_interface.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+
+import os
+
+from jinja2 import Template
+
+
+class IbenchAPI(object):
+    def __init__(self, isa):
+        # TODO
+        self.isa = isa.lower()
+
+    def create_ubenchmark(self):
+        # TODO
+        if self.isa == 'aarch64':
+            self.create_ubench_aarch64()
+        elif self.isa == 'x86':
+            self.create_ubench_x86()
+
+    def import_ibench_output(self, filepath):
+        # TODO
+        assert os.path.exists(filepath)
+        raise NotImplementedError
+
+    def create_ubench_aarch(self):
+        # TODO
+        raise NotImplementedError
+
+    def create_ubench_x86(self):
+        # TODO
+        raise NotImplementedError
+
+# TODO
+# template_x86 = Template()
+template_aarch64 = Template(
+    '''
+#define INSTR {{ instr }}
+#define NINST {{ ninst }}
+#define N x0
+
+.globl ninst
+.data
+ninst:
+.long NINST
+{% if imd %}
+IMD:
+.long  0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9
+{% endif %}
+.text
+.globl latency
+.type latency, @function
+.align 32
+latency:
+
+{% if vector_regs %}
+    # push callee-save registers onto stack
+    sub     sp, sp, #64
+    st1     {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
+    sub     sp, sp, #64
+    st1     {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
+    mov     x4, N
+    fmov    v0.2d, #1.00000000
+    fmov    v1.2d, #1.00000000
+    fmov    v2.2d, #1.00000000
+    fmov    v3.2d, #1.00000000
+    fmov    v4.2d, #1.00000000
+    fmov    v5.2d, #1.00000000
+    fmov    v6.2d, #1.00000000
+    fmov    v7.2d, #1.00000000
+    fmov    v8.2d, #1.00000000
+    fmov    v9.2d, #1.00000000
+    fmov    v10.2d, #1.00000000
+    fmov    v11.2d, #1.00000000
+    fmov    v12.2d, #1.00000000
+    fmov    v13.2d, #1.00000000
+    fmov    v14.2d, #1.00000000
+    fmov    v15.2d, #1.00000000
+{% endif %}
+{% if gp_regs %}
+{% endif %}
+
+loop:
+{{ loop_kernel }}
+    subs    x4, x4, #1
+    bne     loop
+done:
+
+{% if vector_regs %}
+    # pop callee-save registers from stack
+    ld1     {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
+    add     sp, sp, #64
+    ld1     {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
+    add     sp, sp, #64
+{% endif %}
+{% if gp_regs %}
+{% endif %}
+
+    ret
+.size latency, .-latency
+'''
+)
--- a/osaca/api/kerncraft_interface.py
+++ b/osaca/api/kerncraft_interface.py
@@ -2,7 +2,7 @@

 import collections

-from osaca import Frontend
+from osaca.frontend import Frontend
 from osaca.parser import ParserAArch64v81, ParserX86ATT
 from osaca.semantics import (INSTR_FLAGS, KernelDG, MachineModel, SemanticsAppender,
                             reduce_to_section)
--- a/osaca/data/model_importer.py
+++ b/osaca/data/model_importer.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+import argparse
+import re
+import sys
+import xml.etree.ElementTree as ET
+from distutils.version import StrictVersion
+from itertools import groupby, product
+
+from ruamel import yaml
+
+from osaca.api import add_entries_to_db
+from osaca.parser import ParserAArch64v81, ParserX86ATT
+from osaca.semantics import MachineModel
+
+ARCH_DICT = {
+    'vulcan': 'aarch64',
+    'snb': 'x86',
+    'ivb': 'x86',
+    'hsw': 'x86',
+    'bdw': 'x86',
+    'skl': 'x86',
+    'skx': 'x86',
+    'csx': 'x86',
+}
+
+
+def port_pressure_from_tag_attributes(attrib, arch, ports):
+    # apply cycles for D ports
+    data_port = re.compile(r'[0-9]D$')
+    data_ports = [x[:-1] for x in filter(data_port.match, ports)]
+
+    # format attributes
+    cycles = attrib['ports'].split('+')
+    cycles = [c.split('*') for c in cycles]
+    for i, c in enumerate(cycles):
+        cycles[i][0] = int(c[0])
+        if str(c[1]).startswith('p'):
+            cycles[i][1] = [p for p in c[1][1:]]
+        if data_ports and data_ports == cycles[i][1]:
+            # uops for data ports
+            cycles.append([c[0], [x + 'D' for x in data_ports]])
+        cycles[i][0] = [
+            cycles[i][0] / num for num in range(1, len(cycles[i][1]) + 1) for _ in range(num)
+        ]
+    cycles = [list(product(c[0], c[1])) for c in cycles]
+    all_options = []
+
+    # iterate over all combinations of all uop options
+    for cycles_combs in cycles:
+        options = []
+        tmp_opt = []
+        total = cycles_combs[0][0]
+        # iterate over all combinations of each uop option
+        for comb in cycles_combs:
+            # add options until they reach the total num of uops
+            tmp_opt.append(comb)
+            if sum([c[0] for c in tmp_opt]) == total:
+                # copy this option as one of several to the cycle option list
+                options.append(tmp_opt.copy())
+                tmp_opt = []
+        if len(tmp_opt) != 0:
+            raise ValueError('Cannot compute port pressure')
+        options = [x for x, _ in groupby(options)]
+        all_options.append(options)
+    all_options = list(product(*all_options))
+
+    # find best scheduling
+    port_pressure = {}
+    for p in ports:
+        port_pressure[p] = 0.0
+    first = calculate_port_pressure(all_options[0])
+    for key in first:
+        port_pressure[key] = first[key]
+    for option in all_options[1:]:
+        tmp = calculate_port_pressure(option)
+        if (max(list(tmp.values())) <= max(list(port_pressure.values()))) and (
+            len(tmp) > len([x for x in port_pressure.values() if x != 0.0])
+        ):
+            for k in port_pressure:
+                port_pressure[k] = tmp[k] if k in tmp else 0.0
+
+    # check if calculation equals given throughput
+    if abs(max(list(port_pressure.values())) - float(attrib['TP_ports'])) > 0.01:
+        print('Contradicting TP value compared to port_pressure. Ignore port pressure.')
+        for p in port_pressure:
+            port_pressure[p] = 0.0
+        return port_pressure
+
+    # Also consider DIV pipeline
+    if 'div_cycles' in attrib:
+        div_port = re.compile(r'[0-9]DV$')
+        div_ports = [x for x in filter(div_port.match, ports)]
+        for dp in div_ports:
+            port_pressure[dp] += int(attrib['div_cycles']) / len(div_ports)
+    return port_pressure
+
+
+def calculate_port_pressure(pp_option):
+    ports = {}
+    for option in pp_option:
+        for port in option:
+            if port[1] in ports:
+                ports[port[1]] += port[0]
+            else:
+                ports[port[1]] = port[0]
+    return ports
+
+
+def extract_paramters(instruction_tag, arch):
+    isa = ARCH_DICT[arch.lower()]
+    parser = ParserX86ATT()
+    if isa == 'aarch64':
+        parser = ParserAArch64v81()
+    elif isa == 'x86':
+        parser = ParserX86ATT()
+    # Extract parameter components
+    parameters = []  # used to store string representations
+    parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib['idx']))
+    for parameter_tag in parameter_tags:
+        parameter = {}
+        # Ignore parameters with suppressed=1
+        if int(parameter_tag.attrib.get('suppressed', '0')):
+            continue
+
+        p_type = parameter_tag.attrib['type']
+        if p_type == 'imm':
+            parameter['class'] = 'immediate'
+            parameter['imd'] = 'int'
+            parameters.append(parameter)
+        elif p_type == 'mem':
+            parameter['class'] = 'memory'
+            parameter['base'] = 'gpr'
+            parameter['offset'] = None
+            parameter['index'] = None
+            parameter['scale'] = 1
+            parameters.append(parameter)
+        elif p_type == 'reg':
+            parameter['class'] = 'register'
+            possible_regs = [
+                parser.parse_register('%' + r) for r in parameter_tag.text.split(',')
+            ]
+            if possible_regs[0] is None:
+                raise ValueError(
+                    'Unknown register type for {} with {}.'.format(
+                        parameter_tag.attrib, parameter_tag.text
+                    )
+                )
+            if isa == 'x86':
+                if parser.is_vector_register(possible_regs[0]['register']):
+                    possible_regs[0]['register']['name'] = possible_regs[0]['register']['name'].lower()[:3]
+                    if 'mask' in possible_regs[0]['register']:
+                        possible_regs[0]['register']['mask'] = True
+                else:
+                    possible_regs[0]['register']['name'] = 'gpr'
+            elif isa == 'aarch64':
+                del possible_regs['register']['name']
+            for key in possible_regs[0]['register']:
+                parameter[key] = possible_regs[0]['register'][key]
+            parameters.append(parameter)
+        elif p_type == 'relbr':
+            parameter['class'] = 'identifier'
+            parameters.append(parameter)
+        elif p_type == 'agen':
+            # FIXME actually only address generation
+            parameter['class'] = 'memory'
+            parameter['base'] = 'gpr'
+            parameter['offset'] = None
+            parameter['index'] = None
+            parameter['scale'] = 1
+            parameters.append(parameter)
+            parameters.append(parameter)
+        else:
+            raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
+    return parameters
+
+
+def extract_model(tree, arch):
+    mm = MachineModel(arch.lower())
+    ports = mm._data['ports']
+    model_data = []
+    for instruction_tag in tree.findall('.//instruction'):
+        ignore = False
+
+        mnemonic = instruction_tag.attrib['asm']
+
+        # Extract parameter components
+        try:
+            parameters = extract_paramters(instruction_tag, arch)
+            if ARCH_DICT[arch.lower()] == 'x86':
+                parameters.reverse()
+        except ValueError as e:
+            print(e, file=sys.stderr)
+
+        # Extract port occupation, throughput and latency
+        port_pressure, throughput, latency, uops = [], None, None, None
+        arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
+        if arch_tag is None:
+            continue
+        # We collect all measurement and IACA information and compare them later
+        for measurement_tag in arch_tag.iter('measurement'):
+            if 'TP_ports' in measurement_tag.attrib:
+                throughput = measurement_tag.attrib['TP_ports']
+            else:
+                throughput = (
+                    measurement_tag.attrib['TP'] if 'TP' in measurement_tag.attrib else None
+                )
+            uops = (
+                int(measurement_tag.attrib['uops']) if 'uops' in measurement_tag.attrib else None
+            )
+            if 'ports' in measurement_tag.attrib:
+                port_pressure.append(
+                    port_pressure_from_tag_attributes(measurement_tag.attrib, arch, ports)
+                )
+            latencies = [
+                int(l_tag.attrib['cycles'])
+                for l_tag in measurement_tag.iter('latency')
+                if 'cycles' in l_tag.attrib
+            ]
+            if len(latencies) == 0:
+                latencies = [
+                    int(l_tag.attrib['max_cycles'])
+                    for l_tag in measurement_tag.iter('latency')
+                    if 'max_cycles' in l_tag.attrib
+                ]
+            if latencies[1:] != latencies[:-1]:
+                print("Contradicting latencies found:", mnemonic, file=sys.stderr)
+                ignore = True
+            elif latencies:
+                latency = latencies[0]
+
+        # Ordered by IACA version (newest last)
+        for iaca_tag in sorted(
+            arch_tag.iter('IACA'), key=lambda i: StrictVersion(i.attrib['version'])
+        ):
+            if 'ports' in iaca_tag.attrib:
+                port_pressure.append(
+                    port_pressure_from_tag_attributes(iaca_tag.attrib, arch, ports)
+                )
+        if ignore:
+            continue
+
+        # Check if all are equal
+        if port_pressure:
+            if port_pressure[1:] != port_pressure[:-1]:
+                print(
+                    "Contradicting port occupancies, using latest IACA:", mnemonic, file=sys.stderr
+                )
+            port_pressure = port_pressure[-1]
+            throughput = max(list(port_pressure.values()) + [0.0])
+        else:
+            # print("No data available for this architecture:", mnemonic, file=sys.stderr)
+            continue
+        # ---------------------------------------------
+        model_data.append(
+            {
+                'name': mnemonic,
+                'operands': parameters,
+                'uops': uops,
+                'throughput': throughput,
+                'latency': latency,
+                'port_pressure': port_pressure,
+            }
+        )
+
+    return model_data
+
+
+def architectures(tree):
+    return set([a.attrib['name'] for a in tree.findall('.//architecture')])
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('xml', help='path of instructions.xml from http://uops.info')
+    parser.add_argument(
+        'arch',
+        nargs='?',
+        help='architecture to extract, use IACA abbreviations (e.g., SNB). '
+        'if not given, all will be extracted and saved to file in CWD.',
+    )
+    args = parser.parse_args()
+
+    tree = ET.parse(args.xml)
+    if args.arch:
+        model_data = extract_model(tree, args.arch)
+        print(yaml.dump(model_data, allow_unicode=True))
+    else:
+        for arch in architectures(tree):
+            model_data = extract_model(tree, arch)
+            add_entries_to_db(arch, model_data)
+
+
+if __name__ == '__main__':
+    main()
--- a/osaca/parser/parser_AArch64v81.py
+++ b/osaca/parser/parser_AArch64v81.py
@@ -391,6 +391,15 @@ class ParserAArch64v81(BaseParser):
            exponent *= -1
        return float(ieee_val['mantissa']) * (10 ** exponent)

+    def parse_register(self, register_string):
+        raise NotImplementedError
+
+    def is_gpr(self, register):
+        raise NotImplementedError
+
+    def is_vector_register(self, register):
+        raise NotImplementedError
+
    def is_reg_dependend_of(self, reg_a, reg_b):
        prefixes_gpr = 'wx'
        prefixes_vec = 'bhsdqv'
--- a/osaca/parser/parser_x86att.py
+++ b/osaca/parser/parser_x86att.py
@@ -97,6 +97,24 @@ class ParserX86ATT(BaseParser):
            + pp.Optional(self.comment)
        )

+    def parse_register(self, register_string):
+        register = pp.Group(
+            pp.Literal('%')
+            + pp.Word(pp.alphanums).setResultsName('name')
+            + pp.Optional(
+                pp.Literal('{')
+                + pp.Literal('%')
+                + pp.Word(pp.alphanums).setResultsName('mask')
+                + pp.Literal('}')
+            )
+        ).setResultsName(self.REGISTER_ID)
+        try:
+            return self.process_operand(
+                register.parseString(register_string, parseAll=True).asDict()
+            )
+        except pp.ParseException:
+            return None
+
    def parse_line(self, line, line_number=None):
        """
        Parse line and return instruction form.
@@ -129,9 +147,7 @@ class ParserX86ATT(BaseParser):
        # 2. Parse label
        if result is None:
            try:
-                result = self.process_operand(
-                    self.label.parseString(line, parseAll=True).asDict()
-                )
+                result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
                result = AttrDict.convert_dict(result)
                instruction_form[self.LABEL_ID] = result[self.LABEL_ID]['name']
                if self.COMMENT_ID in result[self.LABEL_ID]:
@@ -297,7 +313,22 @@ class ParserX86ATT(BaseParser):
            return False
        return True

+    def is_gpr(self, register):
+        gpr_parser = (
+            pp.CaselessLiteral('R')
+            + pp.Word(pp.nums).setResultsName('id')
+            + pp.Optional(pp.Word('dwbDWB', exact=1))
+        )
+        if self.is_basic_gpr(register):
+            return True
+        else:
+            try:
+                gpr_parser.parseString(register['name'], parseAll=True)
+                return True
+            except pp.ParseException:
+                return False
+
    def is_vector_register(self, register):
-        if len(register['name']) > 2 and register['name'][1:3] == 'mm':
+        if len(register['name']) > 2 and register['name'][1:3].lower() == 'mm':
            return True
        return False