From cb7cec20a86a97b7470ca6925936c25be1b8c412 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Tue, 15 Oct 2019 12:22:49 +0200 Subject: [PATCH] working importer, better dumper --- osaca/data/model_importer.py | 45 +++++++++++++++++++---------------- osaca/parser/parser_x86att.py | 26 +++++++------------- osaca/semantics/hw_model.py | 26 ++++++++++++++++---- 3 files changed, 55 insertions(+), 42 deletions(-) diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py index 28bf62d..4ee5a50 100755 --- a/osaca/data/model_importer.py +++ b/osaca/data/model_importer.py @@ -12,21 +12,20 @@ from osaca.semantics import MachineModel def port_pressure_from_tag_attributes(attrib): # '1*p015+1*p1+1*p23+1*p4+3*p5' -> - # [(1, '015'), (1, '1'), (1, '23'), (1, '4'), (3, '5')] + # [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']] port_occupation = [] for p in attrib['ports'].split('+'): cycles, ports = p.split('*p') - port_occupation.append((int(cycles), ports)) + port_occupation.append([int(cycles), ports]) # Also if 'div_cycles' in attrib: - port_occupation.append((int(attrib['div_cycles']), ('DV',))) + port_occupation.append([int(attrib['div_cycles']), ['DIV',]]) return port_occupation -def extract_paramters(instruction_tag, isa): - parser = get_parser(isa) +def extract_paramters(instruction_tag, parser, isa): # Extract parameter components parameters = [] # used to store string representations parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib['idx'])) @@ -85,7 +84,9 @@ def extract_paramters(instruction_tag, isa): def extract_model(tree, arch): - mm = MachineModel() + isa = MachineModel.get_isa_for_arch(arch) + mm = MachineModel(isa=isa) + parser = get_parser(isa) for instruction_tag in tree.findall('.//instruction'): ignore = False @@ -94,8 +95,8 @@ def extract_model(tree, arch): # Extract parameter components try: - parameters = extract_paramters(instruction_tag, mm.get_isa_for_arch(arch)) - if mm.get_isa_for_arch(arch).lower() == 'x86': + parameters = extract_paramters(instruction_tag, parser, isa) + if isa == 'x86': parameters.reverse() except ValueError as e: print(e, file=sys.stderr) @@ -141,16 +142,18 @@ def extract_model(tree, arch): if ignore: continue - # Add missing ports: - [p[1] for p in for pp in port_pressure] - mm.add_port() - # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: print("Contradicting port occupancies, using latest IACA:", mnemonic, file=sys.stderr) port_pressure = port_pressure[-1] + + # Add missing ports: + for ports in [pp[1] for pp in port_pressure]: + for p in ports: + mm.add_port(p) + throughput = max(mm.average_port_pressure(port_pressure)) else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) @@ -168,21 +171,23 @@ def architectures(tree): def main(): parser = argparse.ArgumentParser() parser.add_argument('xml', help='path of instructions.xml from http://uops.info') - parser.add_argument( - 'arch', - nargs='?', - help='architecture to extract, use IACA abbreviations (e.g., SNB). ' - 'if not given, all will be extracted and saved to file in CWD.', - ) + parser.add_argument('arch', nargs='?', + help='architecture to extract, use IACA abbreviations (e.g., SNB). ' + 'if not given, all will be extracted and saved to file in CWD.') args = parser.parse_args() tree = ET.parse(args.xml) + print('Available architectures:', ', '.join(architectures(tree))) if args.arch: model = extract_model(tree, args.arch) print(model.dump()) else: - raise NotImplementedError() - + for arch in architectures(tree): + print(arch, end='') + model = extract_model(tree, arch.lower()) + with open('{}.yml'.format(arch.lower()), 'w') as f: + f.write(model.dump()) + print('.') if __name__ == '__main__': main() diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 149d8de..15c0da1 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -32,7 +32,7 @@ class ParserX86ATT(BaseParser): identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) ).setResultsName(self.LABEL_ID) # Register: pp.Regex('^%[0-9a-zA-Z]+,?') - register = pp.Group( + self.register = pp.Group( pp.Literal('%') + pp.Word(pp.alphanums).setResultsName('name') + pp.Optional( @@ -55,9 +55,9 @@ class ParserX86ATT(BaseParser): memory = pp.Group( pp.Optional(offset.setResultsName('offset')) + pp.Literal('(') - + pp.Optional(register.setResultsName('base')) + + pp.Optional(self.register.setResultsName('base')) + pp.Optional(pp.Suppress(pp.Literal(','))) - + pp.Optional(register.setResultsName('index')) + + pp.Optional(self.register.setResultsName('index')) + pp.Optional(pp.Suppress(pp.Literal(','))) + pp.Optional(scale.setResultsName('scale')) + pp.Literal(')') @@ -67,8 +67,8 @@ class ParserX86ATT(BaseParser): directive_option = pp.Combine( pp.Word('#@.', exact=1) + pp.Word(pp.printables, excludeChars=',') ) - directive_parameter = ( - pp.quotedString | directive_option | identifier | hex_number | decimal_number | register + directive_parameter = (pp.quotedString | directive_option | identifier | hex_number | + decimal_number | self.register ) commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',') self.directive = pp.Group( @@ -84,8 +84,8 @@ class ParserX86ATT(BaseParser): pp.alphanums ).setResultsName('mnemonic') # Combine to instruction form - operand_first = pp.Group(register ^ immediate ^ memory ^ identifier) - operand_rest = pp.Group(register ^ immediate ^ memory) + operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier) + operand_rest = pp.Group(self.register ^ immediate ^ memory) self.instruction_parser = ( mnemonic + pp.Optional(operand_first.setResultsName('operand1')) @@ -99,19 +99,9 @@ class ParserX86ATT(BaseParser): ) def parse_register(self, register_string): - register = pp.Group( - pp.Literal('%') - + pp.Word(pp.alphanums).setResultsName('name') - + pp.Optional( - pp.Literal('{') - + pp.Literal('%') - + pp.Word(pp.alphanums).setResultsName('mask') - + pp.Literal('}') - ) - ).setResultsName(self.REGISTER_ID) try: return self.process_operand( - register.parseString(register_string, parseAll=True).asDict() + self.register.parseString(register_string, parseAll=True).asDict() ) except pp.ParseException: return None diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 01591c3..97d2f08 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -3,6 +3,7 @@ import os import re from itertools import product +from copy import deepcopy from ruamel import yaml @@ -11,13 +12,13 @@ from osaca.parser import ParserX86ATT class MachineModel(object): - def __init__(self, arch=None, path_to_yaml=None): + def __init__(self, arch=None, path_to_yaml=None, isa=None): if not arch and not path_to_yaml: self._data = { 'osaca_version': str(__version__), 'micro_architecture': None, 'arch_code': None, - 'isa': None, + 'isa': isa, 'ROB_size': None, 'retired_uOps_per_cycle': None, 'scheduler_size': None, @@ -146,14 +147,31 @@ class MachineModel(object): 'skl': 'x86', 'skx': 'x86', 'csx': 'x86', + 'wsm': 'x86', + 'nhm': 'x86', + 'kbl': 'x86', + 'cnl': 'x86', + 'cfl': 'x86', + 'zen+': 'x86', } arch = arch.lower() if arch in arch_dict: return arch_dict[arch].lower() - return None + else: + raise ValueError("Unknown architecture {!r}.".format(arch)) def dump(self): - return yaml.dump(self._data, Dumper=yaml.Dumper, allow_unicode=True) + # Replace instruction form's port_pressure with styled version for RoundtripDumper + formatted_instruction_forms = deepcopy(self._data['instruction_forms']) + for instruction_form in formatted_instruction_forms: + cs = yaml.comments.CommentedSeq(instruction_form['port_pressure']) + cs.fa.set_flow_style() + instruction_form['port_pressure'] = cs + + return (yaml.dump({k: v for k,v in self._data.items() if k != 'instruction_forms'}, + Dumper=yaml.Dumper, allow_unicode=True) + + yaml.dump({'instruction_forms': formatted_instruction_forms}, + Dumper=yaml.RoundTripDumper, allow_unicode=True, width=100)) ######################################################