From 94d7d35c0bb8b55f04378cdc454951853668920c Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 4 May 2020 18:50:58 +0200 Subject: [PATCH 01/15] more instructions --- osaca/data/isa/x86.yml | 695 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 693 insertions(+), 2 deletions(-) diff --git a/osaca/data/isa/x86.yml b/osaca/data/isa/x86.yml index 29c4096..54ceeba 100644 --- a/osaca/data/isa/x86.yml +++ b/osaca/data/isa/x86.yml @@ -2549,7 +2549,7 @@ instruction_forms: name: "xmm" source: true destination: true - - name: mulpd + - name: [mulpd, mulps] operands: - class: "register" name: "xmm" @@ -2559,7 +2559,597 @@ instruction_forms: name: "xmm" source: true destination: true - - name: [pand, por, pxor] + - name: [packssdw, packsswb, packusdw, packuswb] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [packssdw, packsswb, packusdw, packuswb] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [paddb, paddw, paddd, paddq, paddsb, paddsw, paddusb, paddusw] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [paddb, paddw, paddd, paddq, paddsb, paddsw, paddusb, paddusw] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pand, por, pxor, pandn] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pand, por, pxor, pandn] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pavgb, pavgw] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pavgb, pavgw] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pcmpeqb, pcmpeqw, pcmpeqd, pcmpeqq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pcmpeqb, pcmpeqw, pcmpeqd, pcmpeqq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pcmpgtb, pcmpgtw, pcmpgtd, pcmpgtq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pcmpgtb, pcmpgtw, pcmpgtd, pcmpgtq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: pmaddubsw + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: pmaddubsw + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: pmaddwd + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: pmaddwd + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pmaxsb, pmaxsw, pmaxsd, pmaxsq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pmaxsb, pmaxsw, pmaxsd, pmaxsq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pmaxub, pmaxuw, pmaxud, pmaxuq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pmaxub, pmaxuw] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pminsb, pminsw, pminsd, pminsq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pminsb, pminsw, pminsd, pminsq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [pminub, pminuw, pminud, pminuq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pminub, pminuw] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: pmuldq + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pmulhrsw, pmulhuw, pmulhw, pmullw] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [pmulhrsw, pmulhuw, pmulhw, pmullw] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: pmuludq + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: pmuludq + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: psadbw + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: psadbw + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: pshufb + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: pshufb + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psignb, psignw, psignd] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psignb, psignw, psignd] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psllw, pslld, psllq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psllw, pslld, psllq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psllw, pslld, psllq] + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psllw, pslld, psllq] + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: pslldq + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psraw, psrad, psraq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psraw, psrad, psraq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psraw, psrad, psraq] + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psraw, psrad, psraq] + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psrlw, psrld, psrlq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psrlw, psrld, psrlq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psrlw, psrld, psrlq] + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psrlw, psrld, psrlq] + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: psrldq + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psubb, psubw, psubd, psubq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psubb, psubw, psubd, psubq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psubsb, psubsw] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psubsb, psubsw] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [psubusb, psubusw] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [psubusb, psubusw] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [punpckhbw, punpckhwd, punpckhdq, punpckhqdq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [punpckhbw, punpckhwd, punpckhdq, punpckhqdq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [punpcklbw, punpcklwd, punpckldq, punpcklqdq] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true + - name: [punpcklbw, punpcklwd, punpckldq, punpcklqdq] + operands: + - class: "register" + name: "mm" + source: true + destination: false + - class: "register" + name: "mm" + source: true + destination: true + - name: [sha1msg2, sha1nexte] operands: - class: "register" name: "xmm" @@ -2808,6 +3398,16 @@ instruction_forms: name: "CF" source: false destination: true + - name: [subsd, subpd] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true - name: [subss, subps] operands: - class: "register" @@ -2829,6 +3429,10 @@ instruction_forms: source: true destination: false hidden_operands: + - class: "flag" + name: "AF" + source: false + destination: true - class: "flag" name: "SF" source: false @@ -2855,6 +3459,10 @@ instruction_forms: source: true destination: false hidden_operands: + - class: "flag" + name: "AF" + source: false + destination: true - class: "flag" name: "SF" source: false @@ -2881,6 +3489,10 @@ instruction_forms: source: true destination: false hidden_operands: + - class: "flag" + name: "AF" + source: false + destination: true - class: "flag" name: "SF" source: false @@ -2904,6 +3516,10 @@ instruction_forms: source: true destination: false hidden_operands: + - class: "flag" + name: "AF" + source: false + destination: true - class: "flag" name: "SF" source: false @@ -2916,6 +3532,16 @@ instruction_forms: name: "PF" source: false destination: true + - name: [unpcklps, unpcklpd] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: true - name: vldmxcsr operands: - class: "memory" @@ -3125,6 +3751,60 @@ instruction_forms: scale: "*" source: true destination: true + - name: [vptest, ptest] + operands: + - class: "register" + name: "xmm" + source: true + destination: false + - class: "register" + name: "xmm" + source: true + destination: false + hidden_operands: + - class: "flag" + name: "AF" + source: false + destination: true + - class: "flag" + name: "SF" + source: false + destination: true + - class: "flag" + name: "ZF" + source: false + destination: true + - class: "flag" + name: "PF" + source: false + destination: true + - name: vptest + operands: + - class: "register" + name: "ymm" + source: true + destination: false + - class: "register" + name: "ymm" + source: true + destination: false + hidden_operands: + - class: "flag" + name: "AF" + source: false + destination: true + - class: "flag" + name: "SF" + source: false + destination: true + - class: "flag" + name: "ZF" + source: false + destination: true + - class: "flag" + name: "PF" + source: false + destination: true - name: vzeroall operands: [] hidden_operands: @@ -3281,3 +3961,14 @@ instruction_forms: name: "xmm" source: true destination: true + - name: [shl, shr, shlq, shrq] + operands: + - class: "immediate" + imd: "int" + source: true + destination: false + - class: "register" + name: "gpr" + source: true + destination: true + From 7211dd0799cae1d642ef5e80d4c92d0c76aeec3c Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 25 Jun 2020 21:53:41 +0200 Subject: [PATCH 02/15] improvements for uops.info importer script --- osaca/data/model_importer.py | 91 ++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 20 deletions(-) diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py index d9b7f7d..3b42eea 100755 --- a/osaca/data/model_importer.py +++ b/osaca/data/model_importer.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import os.path import argparse +import os.path import sys import xml.etree.ElementTree as ET from distutils.version import StrictVersion @@ -8,8 +8,23 @@ from distutils.version import StrictVersion from osaca.parser import get_parser from osaca.semantics import MachineModel -intel_archs = ['CON', 'WOL', 'NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', - 'CNL', 'ICL'] +intel_archs = [ + 'CON', + 'WOL', + 'NHM', + 'WSM', + 'SNB', + 'IVB', + 'HSW', + 'BDW', + 'SKL', + 'SKX', + 'KBL', + 'CFL', + 'CNL', + 'ICL', +] +amd_archs = ['ZEN1', 'ZEN+', 'ZEN2'] def port_pressure_from_tag_attributes(attrib): @@ -19,6 +34,7 @@ def port_pressure_from_tag_attributes(attrib): for p in attrib['ports'].split('+'): cycles, ports = p.split('*') ports = ports.lstrip('p') + ports = ports.lstrip('FP') port_occupation.append([int(cycles), ports]) # Also consider div on DIV pipeline @@ -88,10 +104,10 @@ def extract_paramters(instruction_tag, parser, isa): return parameters -def extract_model(tree, arch): +def extract_model(tree, arch, skip_mem=True): try: isa = MachineModel.get_isa_for_arch(arch) - except: + except Exception: print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) @@ -101,6 +117,7 @@ def extract_model(tree, arch): ignore = False mnemonic = instruction_tag.attrib['asm'] + iform = instruction_tag.attrib['iform'] # skip any mnemonic which contain spaces (e.g., "REX CRC32") if ' ' in mnemonic: continue @@ -118,6 +135,26 @@ def extract_model(tree, arch): arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]') if arch_tag is None: continue + # skip any instructions without port utilization + if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]): + print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) + continue + # skip if computed and measured TP don't match + if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][ + 0 + ]: + print( + "Calculated TP from port utilization doesn't match TP, skip: ", + iform, + file=sys.stderr, + ) + continue + # skip if instruction contains memory operand + if skip_mem and any( + [x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')] + ): + print("Contains memory operand, skip: ", iform, file=sys.stderr) + continue # We collect all measurement and IACA information and compare them later for measurement_tag in arch_tag.iter('measurement'): if 'TP_ports' in measurement_tag.attrib: @@ -143,10 +180,14 @@ def extract_model(tree, arch): if 'max_cycles' in l_tag.attrib ] if latencies[1:] != latencies[:-1]: - print("Contradicting latencies found, using first:", mnemonic, latencies, - file=sys.stderr) + print( + "Contradicting latencies found, using smallest:", + iform, + latencies, + file=sys.stderr, + ) if latencies: - latency = latencies[0] + latency = min(latencies) if ignore: continue @@ -160,16 +201,14 @@ def extract_model(tree, arch): # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: - print( - "Contradicting port occupancies, using latest IACA:", - mnemonic, file=sys.stderr) + print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr) port_pressure = port_pressure[-1] else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) continue - + # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake: - if arch.upper() in intel_archs and not arch.upper() in ['ICL']: + if arch.upper() in intel_archs and not arch.upper() in ['ICL']: if any([p['class'] == 'memory' for p in parameters]): # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode @@ -183,7 +222,7 @@ def extract_model(tree, arch): # Add (1, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) if port_23 and not port_4: port_pressure.append((1, ['2D', '3D'])) - + # Add missing ports: for ports in [pp[1] for pp in port_pressure]: for p in ports: @@ -201,7 +240,7 @@ def rhs_comment(uncommented_string, comment): commented_string = "" for l in uncommented_string.split('\n'): - commented_string += ("{:<"+str(max_length)+"} # {}\n").format(l, comment) + commented_string += ("{:<" + str(max_length) + "} # {}\n").format(l, comment) return commented_string @@ -218,21 +257,33 @@ def main(): help='architecture to extract, use IACA abbreviations (e.g., SNB). ' 'if not given, all will be extracted and saved to file in CWD.', ) + parser.add_argument( + '--mem', + dest='skip_mem', + action='store_false', + help='add instruction forms including memory addressing operands, which are ' + 'skipped by default' + ) args = parser.parse_args() basename = os.path.basename(__file__) tree = ET.parse(args.xml) - print('Available architectures:', ', '.join(architectures(tree))) + print('# Available architectures:', ', '.join(architectures(tree))) if args.arch: - model = extract_model(tree, args.arch) + print('# Chosen architecture: {}'.format(args.arch)) + model = extract_model(tree, args.arch, args.skip_mem) if model is not None: - print(rhs_comment(model.dump(), basename+" "+sys.argv[0])) + print( + rhs_comment( + model.dump(), basename + " " + args.xml.split('/')[-1] + " " + args.arch + ) + ) else: for arch in architectures(tree): print(arch, end='') - model = extract_model(tree, arch.lower()) + model = extract_model(tree, arch.lower(), args.skip_mem) if model: - model_string = rhs_comment(model.dump(), basename+" "+arch) + model_string = rhs_comment(model.dump(), basename + " " + arch) with open('{}.yml'.format(arch.lower()), 'w') as f: f.write(model_string) From 93c1951097e96f5cf8ccecfec680ac2519fd5687 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 25 Jun 2020 21:54:52 +0200 Subject: [PATCH 03/15] prettified aarch64 ISA DB --- osaca/data/isa/aarch64.yml | 227 +++++++++++++++---------------------- 1 file changed, 90 insertions(+), 137 deletions(-) diff --git a/osaca/data/isa/aarch64.yml b/osaca/data/isa/aarch64.yml index 4422dac..13b294a 100644 --- a/osaca/data/isa/aarch64.yml +++ b/osaca/data/isa/aarch64.yml @@ -5,48 +5,31 @@ isa: "AArch64" # mnemonic op1 ... opN # means that op1 is the only destination operand and op2 to op(N) are source operands. instruction_forms: - - name: "fmla" + - name: fmla operands: - class: "register" - prefix: "v" - shape: "s" + prefix: "*" + shape: "*" source: true destination: true - class: "register" - prefix: "v" - shape: "s" + prefix: "*" + shape: "*" source: true destination: false - class: "register" - prefix: "v" - shape: "s" + prefix: "*" + shape: "*" source: true destination: false - - name: "fmla" + - name: ldp operands: - class: "register" - prefix: "v" - shape: "d" - source: true - destination: true - - class: "register" - prefix: "v" - shape: "d" - source: true - destination: false - - class: "register" - prefix: "v" - shape: "d" - source: true - destination: false - - name: "ldp" - operands: - - class: "register" - prefix: "d" + prefix: "*" source: false destination: true - class: "register" - prefix: "d" + prefix: "*" source: false destination: true - class: "memory" @@ -58,52 +41,14 @@ instruction_forms: post-indexed: false source: true destination: false - - name: "ldp" + - name: ldp operands: - class: "register" - prefix: "q" + prefix: "*" source: false destination: true - class: "register" - prefix: "q" - source: false - destination: true - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: false - post-indexed: false - source: true - destination: false - - name: "ldp" - operands: - - class: "register" - prefix: "q" - source: false - destination: true - - class: "register" - prefix: "q" - source: false - destination: true - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: true - post-indexed: false - source: true - destination: true - - name: "ldp" - operands: - - class: "register" - prefix: "q" - source: false - destination: true - - class: "register" - prefix: "q" + prefix: "*" source: false destination: true - class: "memory" @@ -115,14 +60,63 @@ instruction_forms: post-indexed: true source: true destination: true - - name: "stp" + - name: ldp operands: - class: "register" - prefix: "d" + prefix: "*" + source: false + destination: true + - class: "register" + prefix: "*" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: true + post-indexed: false + source: true + destination: true + - name: [ldr, ldur] + operands: + - class: "register" + prefix: "*" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: false + post-indexed: true + source: true + destination: true + - name: [ldr, ldur] + operands: + - class: "register" + prefix: "*" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: true + post-indexed: false + source: true + destination: true + - name: stp + operands: + - class: "register" + prefix: "*" source: true destination: false - class: "register" - prefix: "d" + prefix: "*" source: true destination: false - class: "memory" @@ -134,14 +128,33 @@ instruction_forms: post-indexed: false source: false destination: true - - name: "stp" + - name: stp operands: - class: "register" - prefix: "q" + prefix: "*" source: true destination: false - class: "register" - prefix: "q" + prefix: "*" + source: true + destination: false + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: true + post-indexed: false + source: false + destination: true + - name: stp + operands: + - class: "register" + prefix: "*" + source: true + destination: false + - class: "register" + prefix: "*" source: true destination: false - class: "memory" @@ -150,73 +163,13 @@ instruction_forms: index: "*" scale: "*" pre-indexed: false - post-indexed: false + post-indexed: true source: false destination: true - - name: "str" + - name: [str, stur] operands: - class: "register" - prefix: "x" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "str" - operands: - - class: "register" - prefix: "d" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "str" - operands: - - class: "register" - prefix: "q" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "stur" - operands: - - class: "register" - prefix: "q" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "stur" - operands: - - class: "register" - prefix: "d" + prefix: "*" source: true destination: false - class: "memory" From d3d1a896009b568c9f25603739f3b756028b1a4f Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 25 Jun 2020 21:55:10 +0200 Subject: [PATCH 04/15] two new instrs --- osaca/data/tx2.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/osaca/data/tx2.yml b/osaca/data/tx2.yml index 78f85bd..8c9765f 100644 --- a/osaca/data/tx2.yml +++ b/osaca/data/tx2.yml @@ -267,6 +267,34 @@ instruction_forms: throughput: 0.5 latency: 6.0 # 1*p01 port_pressure: [[1, '01']] +- name: frecpe + operands: + - class: register + prefix: v + shape: s + - class: register + prefix: v + shape: s + - class: register + prefix: v + shape: s + throughput: 0.5 + latency: 5.0 # 1*p01 + port_pressure: [[1, '01']] +- name: frecpe + operands: + - class: register + prefix: v + shape: d + - class: register + prefix: v + shape: d + - class: register + prefix: v + shape: d + throughput: 0.5 + latency: 5.0 # 1*p01 + port_pressure: [[1, '01']] - name: fsub operands: - class: register From 680122927560f3e7c912e98b781765fd404a443d Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 25 Jun 2020 21:56:18 +0200 Subject: [PATCH 05/15] PEP8 adjustments --- osaca/data/generate_mov_entries.py | 699 ++++++++++++++--------------- 1 file changed, 330 insertions(+), 369 deletions(-) diff --git a/osaca/data/generate_mov_entries.py b/osaca/data/generate_mov_entries.py index 09a4088..958f7c1 100755 --- a/osaca/data/generate_mov_entries.py +++ b/osaca/data/generate_mov_entries.py @@ -9,8 +9,8 @@ class MOVEntryBuilder: port_occupancy = defaultdict(Fraction) for uops, ports in port_pressure: for p in ports: - port_occupancy[p] += Fraction(uops, len(ports)) - return float(max(list(port_occupancy.values())+[0])) + port_occupancy[p] += Fraction(uops, len(ports)) + return float(max(list(port_occupancy.values()) + [0])) @staticmethod def classify(operands_types): @@ -18,10 +18,10 @@ class MOVEntryBuilder: store = 'mem' in operands_types[-1:] assert not (load and store), "Can not process a combined load-store instruction." return load, store - + def build_description( - self, instruction_name, operand_types, - port_pressure=[], latency=0, comment=None): + self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None + ): if comment: comment = " # " + comment else: @@ -32,10 +32,7 @@ class MOVEntryBuilder: if ot == 'imd': description += ' - class: immediate\n imd: int\n' elif ot.startswith('mem'): - description += ( - ' - class: memory\n' - ' base: "*"\n' - ' offset: "*"\n') + description += ' - class: memory\n' ' base: "*"\n' ' offset: "*"\n' if ot == 'mem_simple': description += ' index: ~\n' elif ot == 'mem_complex': @@ -45,18 +42,20 @@ class MOVEntryBuilder: description += ' scale: "*"\n' else: description += ' - class: register\n name: {}\n'.format(ot) - + description += ( ' latency: {latency}\n' ' port_pressure: {port_pressure!r}\n' ' throughput: {throughput}\n' - ' uops: {uops}\n').format( - latency=latency, - port_pressure=port_pressure, - throughput=self.compute_throughput(port_pressure), - uops=sum([i for i,p in port_pressure])) + ' uops: {uops}\n' + ).format( + latency=latency, + port_pressure=port_pressure, + throughput=self.compute_throughput(port_pressure), + uops=sum([i for i, p in port_pressure]), + ) return description - + def parse_port_pressure(self, port_pressure_str): """ Example: @@ -68,7 +67,7 @@ class MOVEntryBuilder: cycles, ports = p.split('*p') port_pressure.append([int(cycles), ports]) return port_pressure - + def process_item(self, instruction_form, resources): """ Example: @@ -84,9 +83,7 @@ class MOVEntryBuilder: class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): # for SNB and IVB - def build_description( - self, instruction_name, operand_types, - port_pressure=[], latency=0): + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): load, store = self.classify(operand_types) comment = None @@ -100,15 +97,14 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): comment = "with store" return MOVEntryBuilder.build_description( - self, instruction_name, operand_types, port_pressure, latency, comment) + self, instruction_name, operand_types, port_pressure, latency, comment + ) class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): # for HSW, BDW, SKX and CSX - def build_description( - self, instruction_name, operand_types, - port_pressure=[], latency=0): + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): load, store = self.classify(operand_types) if load: @@ -116,7 +112,8 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): latency += 4 comment = "with load" return MOVEntryBuilder.build_description( - self, instruction_name, operand_types, port_pressure, latency, comment) + self, instruction_name, operand_types, port_pressure, latency, comment + ) if store: port_pressure_simple = port_pressure + [[1, '237'], [1, '4']] operands_simple = ['mem_simple' if o == 'mem' else o for o in operand_types] @@ -125,16 +122,28 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): latency += 0 return ( MOVEntryBuilder.build_description( - self, instruction_name, operands_simple, port_pressure_simple, latency, - "with store, simple AGU") + - '\n' + - MOVEntryBuilder.build_description( - self, instruction_name, operands_complex, port_pressure_complex, latency, - "with store, complex AGU")) - + self, + instruction_name, + operands_simple, + port_pressure_simple, + latency, + "with store, simple AGU", + ) + + '\n' + + MOVEntryBuilder.build_description( + self, + instruction_name, + operands_complex, + port_pressure_complex, + latency, + "with store, complex AGU", + ) + ) + # Register only: return MOVEntryBuilder.build_description( - self, instruction_name, operand_types, port_pressure, latency) + self, instruction_name, operand_types, port_pressure, latency + ) np7 = MOVEntryBuilderIntelNoPort7AGU() @@ -149,7 +158,6 @@ snb_mov_instructions = [ ('mov imd gpr', ('1*p015', 1)), ('mov imd mem', ('', 0)), ('movabs imd gpr', ('1*p015', 1)), # AT&T version - # https://www.felixcloutier.com/x86/movapd ('movapd xmm xmm', ('1*p5', 1)), ('movapd xmm mem', ('', 0)), @@ -160,7 +168,6 @@ snb_mov_instructions = [ ('vmovapd ymm ymm', ('1*p5', 1)), ('vmovapd ymm mem', ('', 0)), ('vmovapd mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movaps ('movaps xmm xmm', ('1*p5', 1)), ('movaps xmm mem', ('', 0)), @@ -171,7 +178,6 @@ snb_mov_instructions = [ ('vmovaps ymm ymm', ('1*p5', 1)), ('movaps ymm mem', ('', 0)), ('movaps mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movd:movq ('movd gpr mm', ('1*p5', 1)), ('movd mem mm', ('', 0)), @@ -197,7 +203,6 @@ snb_mov_instructions = [ ('vmovd xmm mem', ('', 0)), ('vmovq xmm gpr', ('1*p0', 1)), ('vmovq xmm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movddup ('movddup xmm xmm', ('1*p5', 1)), ('movddup mem xmm', ('', 0)), @@ -205,10 +210,8 @@ snb_mov_instructions = [ ('vmovddup mem xmm', ('', 0)), ('vmovddup ymm ymm', ('1*p5', 1)), ('vmovddup mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movdq2q ('movdq2q xmm mm', ('1*p015+1*p5', 1)), - # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 ('movdqa xmm xmm', ('1*p015', 1)), ('movdqa mem xmm', ('', 0)), @@ -219,7 +222,6 @@ snb_mov_instructions = [ ('vmovdqa ymm ymm', ('1*p05', 1)), ('vmovdqa mem ymm', ('', 0)), ('vmovdqa ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 ('movdqu xmm xmm', ('1*p015', 1)), ('movdqu mem xmm', ('', 0)), @@ -230,75 +232,60 @@ snb_mov_instructions = [ ('vmovdqu ymm ymm', ('1*p05', 1)), ('vmovdqu mem ymm', ('', 0)), ('vmovdqu ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movhlps ('movhlps xmm xmm', ('1*p5', 1)), ('vmovhlps xmm xmm xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movhpd ('movhpd mem xmm', ('1*p5', 1)), ('vmovhpd mem xmm xmm', ('1*p5', 1)), ('movhpd xmm mem', ('', 0)), ('vmovhpd mem xmm', ('', 0)), - # https://www.felixcloutier.com/x86/movhps ('movhps mem xmm', ('1*p5', 1)), ('vmovhps mem xmm xmm', ('1*p5', 1)), ('movhps xmm mem', ('', 0)), ('vmovhps mem xmm', ('', 0)), - # https://www.felixcloutier.com/x86/movlhps ('movlhps xmm xmm', ('1*p5', 1)), ('vmovlhps xmm xmm xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movlpd ('movlpd mem xmm', ('1*p5', 1)), ('vmovlpd mem xmm xmm', ('1*p5', 1)), ('movlpd xmm mem', ('', 0)), ('vmovlpd mem xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movlps ('movlps mem xmm', ('1*p5', 1)), ('vmovlps mem xmm xmm', ('1*p5', 1)), ('movlps xmm mem', ('', 0)), ('vmovlps mem xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movmskpd ('movmskpd xmm gpr', ('1*p0', 2)), ('vmovmskpd xmm gpr', ('1*p0', 2)), ('vmovmskpd ymm gpr', ('1*p0', 2)), - # https://www.felixcloutier.com/x86/movmskps ('movmskps xmm gpr', ('1*p0', 1)), ('vmovmskps xmm gpr', ('1*p0', 1)), ('vmovmskps ymm gpr', ('1*p0', 1)), - # https://www.felixcloutier.com/x86/movntdq ('movntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntdq ymm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntdqa ('movntdqa mem xmm', ('', 0)), ('vmovntdqa mem xmm', ('', 0)), ('vmovntdqa mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movnti ('movnti gpr mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntpd ('movntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntpd ymm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntps ('movntps xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntps xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntps ymm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntq ('movntq mm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movq ('movq mm mm', ('', 0)), ('movq mem mm', ('', 0)), @@ -309,14 +296,11 @@ snb_mov_instructions = [ ('vmovq xmm xmm', ('1*p015', 1)), ('vmovq mem xmm', ('', 0)), ('vmovq xmm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movq2dq ('movq2dq mm xmm', ('1*p015', 1)), - # https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq # TODO combined load-store is currently not supported # ('movs mem mem', ()), - # https://www.felixcloutier.com/x86/movsd ('movsd xmm xmm', ('1*p5', 1)), ('movsd mem xmm', ('', 0)), @@ -324,7 +308,6 @@ snb_mov_instructions = [ ('vmovsd xmm xmm xmm', ('1*p5', 1)), ('vmovsd mem xmm', ('', 0)), ('vmovsd xmm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movshdup ('movshdup xmm xmm', ('1*p5', 1)), ('movshdup mem xmm', ('', 0)), @@ -332,7 +315,6 @@ snb_mov_instructions = [ ('vmovshdup mem xmm', ('', 0)), ('vmovshdup ymm ymm', ('1*p5', 1)), ('vmovshdup mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movsldup ('movsldup xmm xmm', ('1*p5', 1)), ('movsldup mem xmm', ('', 0)), @@ -340,7 +322,6 @@ snb_mov_instructions = [ ('vmovsldup mem xmm', ('', 0)), ('vmovsldup ymm ymm', ('1*p5', 1)), ('vmovsldup mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movss ('movss xmm xmm', ('1*p5', 1)), ('movss mem xmm', ('', 0)), @@ -349,7 +330,6 @@ snb_mov_instructions = [ ('vmovss xmm xmm', ('1*p5', 1)), ('vmovss xmm mem', ('', 0)), ('movss mem xmm', ('', 0)), - # https://www.felixcloutier.com/x86/movsx:movsxd ('movsx gpr gpr', ('1*p015', 1)), ('movsx mem gpr', ('', 0)), @@ -363,7 +343,6 @@ snb_mov_instructions = [ ('movsl mem gpr', ('', 0)), # AT&T version ('movsq gpr gpr', ('1*p015', 1)), # AT&T version ('movsq mem gpr', ('', 0)), # AT&T version - # https://www.felixcloutier.com/x86/movupd ('movupd xmm xmm', ('1*p5', 1)), ('movupd mem xmm', ('', 0)), @@ -374,7 +353,6 @@ snb_mov_instructions = [ ('vmovupd ymm ymm', ('1*p5', 1)), ('vmovupd mem ymm', ('', 0)), ('vmovupd ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movups ('movups xmm xmm', ('1*p5', 1)), ('movups mem xmm', ('', 0)), @@ -385,7 +363,6 @@ snb_mov_instructions = [ ('vmovups ymm ymm', ('1*p5', 1)), ('vmovups mem ymm', ('', 0)), ('vmovups ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movzx ('movzx gpr gpr', ('1*p015', 1)), ('movzx mem gpr', ('', 0)), @@ -397,7 +374,6 @@ snb_mov_instructions = [ ('movzl mem gpr', ('', 0)), # AT&T version ('movzq gpr gpr', ('1*p015', 1)), # AT&T version ('movzq mem gpr', ('', 0)), # AT&T version - # https://www.felixcloutier.com/x86/cmovcc ('cmova gpr gpr', ('1*p015+2*p05', 2)), ('cmova mem gpr', ('1*p015+2*p05', 2)), @@ -459,12 +435,10 @@ snb_mov_instructions = [ ('cmovs mem gpr', ('1*p015+1*p05', 2)), ('cmovz gpr gpr', ('1*p015+1*p05', 2)), ('cmovz mem gpr', ('1*p015+1*p05', 2)), - # https://www.felixcloutier.com/x86/pmovmskb ('pmovmskb mm gpr', ('1*p0', 2)), ('pmovmskb xmm gpr', ('1*p0', 2)), ('vpmovmskb xmm gpr', ('1*p0', 2)), - # https://www.felixcloutier.com/x86/pmovsx ('pmovsxbw xmm xmm', ('1*p15', 1)), ('pmovsxbw mem xmm', ('1*p15', 1)), @@ -484,7 +458,6 @@ snb_mov_instructions = [ ('vpmovsxbd mem ymm', ('1*p15', 1)), ('vpmovsxbq ymm ymm', ('1*p15', 1)), ('vpmovsxbq mem ymm', ('1*p15', 1)), - # https://www.felixcloutier.com/x86/pmovzx ('pmovzxbw xmm xmm', ('1*p15', 1)), ('pmovzxbw mem xmm', ('1*p15', 1)), @@ -494,307 +467,294 @@ snb_mov_instructions = [ ('vpmovzxbw mem ymm', ('1*p15', 1)), ] -ivb_mov_instructions = list(OrderedDict(snb_mov_instructions + [ - # https://www.felixcloutier.com/x86/mov - ('mov gpr gpr', ('', 0)), - ('mov imd gpr', ('', 0)), +ivb_mov_instructions = list( + OrderedDict( + snb_mov_instructions + + [ + # https://www.felixcloutier.com/x86/mov + ('mov gpr gpr', ('', 0)), + ('mov imd gpr', ('', 0)), + # https://www.felixcloutier.com/x86/movapd + ('movapd xmm xmm', ('', 0)), + ('vmovapd xmm xmm', ('', 0)), + ('vmovapd ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movaps + ('movaps xmm xmm', ('', 0)), + ('vmovaps xmm xmm', ('', 0)), + ('vmovaps ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + ('movdqa xmm xmm', ('', 0)), + ('vmovdqa xmm xmm', ('', 0)), + ('vmovdqa ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + ('movdqu xmm xmm', ('', 0)), + ('vmovdqu xmm xmm', ('', 0)), + ('vmovdqu ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movupd + ('movupd xmm xmm', ('', 0)), + ('vmovupd xmm xmm', ('', 0)), + ('vmovupd ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movupd + ('movups xmm xmm', ('', 0)), + ('vmovups xmm xmm', ('', 0)), + ('vmovups ymm ymm', ('', 0)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movapd - ('movapd xmm xmm', ('', 0)), - ('vmovapd xmm xmm', ('', 0)), - ('vmovapd ymm ymm', ('', 0)), +hsw_mov_instructions = list( + OrderedDict( + ivb_mov_instructions + + [ + # https://www.felixcloutier.com/x86/mov + ('mov imd gpr', ('1*p0156', 1)), + ('mov gpr gpr', ('1*p0156', 1)), + ('movabs imd gpr', ('1*p0156', 1)), # AT&T version + # https://www.felixcloutier.com/x86/movbe + ('movbe gpr mem', ('1*p15', 6)), + ('movbe mem gpr', ('1*p15', 6)), + # https://www.felixcloutier.com/x86/movmskpd + ('movmskpd xmm gpr', ('1*p0', 3)), + ('vmovmskpd xmm gpr', ('1*p0', 3)), + ('vmovmskpd ymm gpr', ('1*p0', 3)), + # https://www.felixcloutier.com/x86/movmskps + ('movmskps xmm gpr', ('1*p0', 3)), + ('vmovmskps xmm gpr', ('1*p0', 3)), + ('vmovmskps ymm gpr', ('1*p0', 3)), + # https://www.felixcloutier.com/x86/movsx:movsxd + ('movsx gpr gpr', ('1*p0156', 1)), + ('movsb gpr gpr', ('1*p0156', 1)), # AT&T version + ('movsw gpr gpr', ('1*p0156', 1)), # AT&T version + ('movsl gpr gpr', ('1*p0156', 1)), # AT&T version + ('movsq gpr gpr', ('1*p0156', 1)), # AT&T version + # https://www.felixcloutier.com/x86/movzx + ('movzx gpr gpr', ('1*p0156', 1)), + ('movzb gpr gpr', ('1*p0156', 1)), # AT&T version + ('movzw gpr gpr', ('1*p0156', 1)), # AT&T version + ('movzl gpr gpr', ('1*p0156', 1)), # AT&T version + ('movzq gpr gpr', ('1*p0156', 1)), # AT&T version + # https://www.felixcloutier.com/x86/cmovcc + ('cmova gpr gpr', ('1*p0156+2*p06', 2)), + ('cmova mem gpr', ('1*p0156+2*p06', 2)), + ('cmovae gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovae mem gpr', ('1*p0156+2*p06', 2)), + ('cmovb gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovb mem gpr', ('1*p0156+1*p06', 2)), + ('cmovbe gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovbe mem gpr', ('1*p0156+2*p06', 2)), + ('cmovc gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovc mem gpr', ('1*p0156+1*p06', 2)), + ('cmove gpr gpr', ('1*p0156+1*p06', 2)), + ('cmove mem gpr', ('1*p0156+1*p06', 2)), + ('cmovg gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovg mem gpr', ('1*p0156+1*p06', 2)), + ('cmovge gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovge mem gpr', ('1*p0156+1*p06', 2)), + ('cmovl gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovl mem gpr', ('1*p0156+1*p06', 2)), + ('cmovle gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovle mem gpr', ('1*p0156+1*p06', 2)), + ('cmovna gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovna mem gpr', ('1*p0156+2*p06', 2)), + ('cmovnae gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnae mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovnbe mem gpr', ('1*p0156+2*p06', 2)), + ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnc gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnc mem gpr', ('1*p0156+1*p06', 2)), + ('cmovne gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovne mem gpr', ('1*p0156+1*p06', 2)), + ('cmovng gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovng mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnge gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnge mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnl gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnl mem gpr', ('1*p0156+1*p06', 2)), + ('cmovno gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovno mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnp gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnp mem gpr', ('1*p0156+1*p06', 2)), + ('cmovns gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovns mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnz gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnz mem gpr', ('1*p0156+1*p06', 2)), + ('cmovo gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovo mem gpr', ('1*p0156+1*p06', 2)), + ('cmovp gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovp mem gpr', ('1*p0156+1*p06', 2)), + ('cmovpe gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovpe mem gpr', ('1*p0156+1*p06', 2)), + ('cmovpo gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovpo mem gpr', ('1*p0156+1*p06', 2)), + ('cmovs gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovs mem gpr', ('1*p0156+1*p06', 2)), + ('cmovz gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovz mem gpr', ('1*p0156+1*p06', 2)), + # https://www.felixcloutier.com/x86/pmovmskb + ('pmovmskb mm gpr', ('1*p0', 3)), + ('pmovmskb xmm gpr', ('1*p0', 3)), + ('vpmovmskb xmm gpr', ('1*p0', 3)), + ('vpmovmskb ymm gpr', ('1*p0', 3)), + # https://www.felixcloutier.com/x86/pmovsx + ('pmovsxbw xmm xmm', ('1*p5', 1)), + ('pmovsxbw mem xmm', ('1*p5', 1)), + ('pmovsxbd xmm xmm', ('1*p5', 1)), + ('pmovsxbd mem xmm', ('1*p5', 1)), + ('pmovsxbq xmm xmm', ('1*p5', 1)), + ('pmovsxbq mem xmm', ('1*p5', 1)), + ('vpmovsxbw xmm xmm', ('1*p5', 1)), + ('vpmovsxbw mem xmm', ('1*p5', 1)), + ('vpmovsxbd xmm xmm', ('1*p5', 1)), + ('vpmovsxbd mem xmm', ('1*p5', 1)), + ('vpmovsxbq xmm xmm', ('1*p5', 1)), + ('vpmovsxbq mem xmm', ('1*p5', 1)), + ('vpmovsxbw ymm ymm', ('1*p5', 1)), + ('vpmovsxbw mem ymm', ('1*p5', 1)), + ('vpmovsxbd ymm ymm', ('1*p5', 1)), + ('vpmovsxbd mem ymm', ('1*p5', 1)), + ('vpmovsxbq ymm ymm', ('1*p5', 1)), + ('vpmovsxbq mem ymm', ('1*p5', 1)), + # https://www.felixcloutier.com/x86/pmovzx + ('pmovzxbw xmm xmm', ('1*p5', 1)), + ('pmovzxbw mem xmm', ('1*p5', 1)), + ('vpmovzxbw xmm xmm', ('1*p5', 1)), + ('vpmovzxbw mem xmm', ('1*p5', 1)), + ('vpmovzxbw ymm ymm', ('1*p5', 1)), + ('vpmovzxbw mem ymm', ('1*p5', 1)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movaps - ('movaps xmm xmm', ('', 0)), - ('vmovaps xmm xmm', ('', 0)), - ('vmovaps ymm ymm', ('', 0)), +bdw_mov_instructions = list( + OrderedDict( + hsw_mov_instructions + + [ + # https://www.felixcloutier.com/x86/cmovcc + ('cmova gpr gpr', ('2*p06', 1)), + ('cmova mem gpr', ('2*p06', 1)), + ('cmovae gpr gpr', ('1*p06', 1)), + ('cmovae mem gpr', ('2*p06', 1)), + ('cmovb gpr gpr', ('2*p06', 1)), + ('cmovb mem gpr', ('1*p06', 1)), + ('cmovbe gpr gpr', ('2*p06', 1)), + ('cmovbe mem gpr', ('2*p06', 1)), + ('cmovc gpr gpr', ('1*p06', 1)), + ('cmovc mem gpr', ('1*p06', 1)), + ('cmove gpr gpr', ('1*p06', 1)), + ('cmove mem gpr', ('1*p06', 1)), + ('cmovg gpr gpr', ('1*p06', 1)), + ('cmovg mem gpr', ('1*p06', 1)), + ('cmovge gpr gpr', ('1*p06', 1)), + ('cmovge mem gpr', ('1*p06', 1)), + ('cmovl gpr gpr', ('1*p06', 1)), + ('cmovl mem gpr', ('1*p06', 1)), + ('cmovle gpr gpr', ('1*p06', 1)), + ('cmovle mem gpr', ('1*p06', 1)), + ('cmovna gpr gpr', ('2*p06', 1)), + ('cmovna mem gpr', ('2*p06', 1)), + ('cmovnae gpr gpr', ('1*p06', 1)), + ('cmovnae mem gpr', ('1*p06', 1)), + ('cmovnb gpr gpr', ('1*p06', 1)), + ('cmovnb mem gpr', ('1*p06', 1)), + ('cmovnbe gpr gpr', ('2*p06', 1)), + ('cmovnbe mem gpr', ('2*p06', 1)), + ('cmovnb gpr gpr', ('1*p06', 1)), + ('cmovnb mem gpr', ('1*p06', 1)), + ('cmovnc gpr gpr', ('1*p06', 1)), + ('cmovnc mem gpr', ('1*p06', 1)), + ('cmovne gpr gpr', ('1*p06', 1)), + ('cmovne mem gpr', ('1*p06', 1)), + ('cmovng gpr gpr', ('1*p06', 1)), + ('cmovng mem gpr', ('1*p06', 1)), + ('cmovnge gpr gpr', ('1*p06', 1)), + ('cmovnge mem gpr', ('1*p06', 1)), + ('cmovnl gpr gpr', ('1*p06', 1)), + ('cmovnl mem gpr', ('1*p06', 1)), + ('cmovno gpr gpr', ('1*p06', 1)), + ('cmovno mem gpr', ('1*p06', 1)), + ('cmovnp gpr gpr', ('1*p06', 1)), + ('cmovnp mem gpr', ('1*p06', 1)), + ('cmovns gpr gpr', ('1*p06', 1)), + ('cmovns mem gpr', ('1*p06', 1)), + ('cmovnz gpr gpr', ('1*p06', 1)), + ('cmovnz mem gpr', ('1*p06', 1)), + ('cmovo gpr gpr', ('1*p06', 1)), + ('cmovo mem gpr', ('1*p06', 1)), + ('cmovp gpr gpr', ('1*p06', 1)), + ('cmovp mem gpr', ('1*p06', 1)), + ('cmovpe gpr gpr', ('1*p06', 1)), + ('cmovpe mem gpr', ('1*p06', 1)), + ('cmovpo gpr gpr', ('1*p06', 1)), + ('cmovpo mem gpr', ('1*p06', 1)), + ('cmovs gpr gpr', ('1*p06', 1)), + ('cmovs mem gpr', ('1*p06', 1)), + ('cmovz gpr gpr', ('1*p06', 1)), + ('cmovz mem gpr', ('1*p06', 1)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 - ('movdqa xmm xmm', ('', 0)), - ('vmovdqa xmm xmm', ('', 0)), - ('vmovdqa ymm ymm', ('', 0)), +skx_mov_instructions = list( + OrderedDict( + bdw_mov_instructions + + [ + # https://www.felixcloutier.com/x86/movapd + # TODO with masking! + # TODO the following may eliminate or be bound to 1*p0156: + # ('movapd xmm xmm', ('1*p5', 1)), + # ('vmovapd xmm xmm', ('1*p5', 1)), + # ('vmovapd ymm ymm', ('1*p5', 1)), + # https://www.felixcloutier.com/x86/movaps + # TODO with masking! + # TODO the following may eliminate or be bound to 1*p0156: + # ('movaps xmm xmm', ('1*p5', 1)), + # ('vmovaps xmm xmm', ('1*p5', 1)), + # ('vmovaps ymm ymm', ('1*p5', 1)), + # https://www.felixcloutier.com/x86/movbe + ('movbe gpr mem', ('1*p15', 4)), + ('movbe mem gpr', ('1*p15', 4)), + # https://www.felixcloutier.com/x86/movddup + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movntdq + ('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntdqa + ('vmovntdqa mem zmm', ('', 0)), + # https://www.felixcloutier.com/x86/movntpd + ('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntps + ('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movq2dq + ('movq2dq mm xmm', ('1*p0+1*p015', 1)), + # https://www.felixcloutier.com/x86/movsd + # TODO with masking! + # https://www.felixcloutier.com/x86/movshdup + # TODO with masking! + # https://www.felixcloutier.com/x86/movsldup + # TODO with masking! + # https://www.felixcloutier.com/x86/movss + # TODO with masking! + # https://www.felixcloutier.com/x86/movupd + # TODO with masking! + # https://www.felixcloutier.com/x86/movups + # TODO with masking! + # https://www.felixcloutier.com/x86/pmovsx + # TODO with masking! + ('vpmovsxbw ymm zmm', ('1*p5', 3)), + ('vpmovsxbw mem zmm', ('1*p5', 1)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 - ('movdqu xmm xmm', ('', 0)), - ('vmovdqu xmm xmm', ('', 0)), - ('vmovdqu ymm ymm', ('', 0)), +csx_mov_instructions = OrderedDict(skx_mov_instructions + []).items() - # https://www.felixcloutier.com/x86/movupd - ('movupd xmm xmm', ('', 0)), - ('vmovupd xmm xmm', ('', 0)), - ('vmovupd ymm ymm', ('', 0)), - - # https://www.felixcloutier.com/x86/movupd - ('movups xmm xmm', ('', 0)), - ('vmovups xmm xmm', ('', 0)), - ('vmovups ymm ymm', ('', 0)), -]).items()) - -hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [ - # https://www.felixcloutier.com/x86/mov - ('mov imd gpr', ('1*p0156', 1)), - ('mov gpr gpr', ('1*p0156', 1)), - ('movabs imd gpr', ('1*p0156', 1)), # AT&T version - - # https://www.felixcloutier.com/x86/movbe - ('movbe gpr mem', ('1*p15', 6)), - ('movbe mem gpr', ('1*p15', 6)), - - # https://www.felixcloutier.com/x86/movmskpd - ('movmskpd xmm gpr', ('1*p0', 3)), - ('vmovmskpd xmm gpr', ('1*p0', 3)), - ('vmovmskpd ymm gpr', ('1*p0', 3)), - - # https://www.felixcloutier.com/x86/movmskps - ('movmskps xmm gpr', ('1*p0', 3)), - ('vmovmskps xmm gpr', ('1*p0', 3)), - ('vmovmskps ymm gpr', ('1*p0', 3)), - - # https://www.felixcloutier.com/x86/movsx:movsxd - ('movsx gpr gpr', ('1*p0156', 1)), - ('movsb gpr gpr', ('1*p0156', 1)), # AT&T version - ('movsw gpr gpr', ('1*p0156', 1)), # AT&T version - ('movsl gpr gpr', ('1*p0156', 1)), # AT&T version - ('movsq gpr gpr', ('1*p0156', 1)), # AT&T version - - # https://www.felixcloutier.com/x86/movzx - ('movzx gpr gpr', ('1*p0156', 1)), - ('movzb gpr gpr', ('1*p0156', 1)), # AT&T version - ('movzw gpr gpr', ('1*p0156', 1)), # AT&T version - ('movzl gpr gpr', ('1*p0156', 1)), # AT&T version - ('movzq gpr gpr', ('1*p0156', 1)), # AT&T version - - # https://www.felixcloutier.com/x86/cmovcc - ('cmova gpr gpr', ('1*p0156+2*p06', 2)), - ('cmova mem gpr', ('1*p0156+2*p06', 2)), - ('cmovae gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovae mem gpr', ('1*p0156+2*p06', 2)), - ('cmovb gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovb mem gpr', ('1*p0156+1*p06', 2)), - ('cmovbe gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovbe mem gpr', ('1*p0156+2*p06', 2)), - ('cmovc gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovc mem gpr', ('1*p0156+1*p06', 2)), - ('cmove gpr gpr', ('1*p0156+1*p06', 2)), - ('cmove mem gpr', ('1*p0156+1*p06', 2)), - ('cmovg gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovg mem gpr', ('1*p0156+1*p06', 2)), - ('cmovge gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovge mem gpr', ('1*p0156+1*p06', 2)), - ('cmovl gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovl mem gpr', ('1*p0156+1*p06', 2)), - ('cmovle gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovle mem gpr', ('1*p0156+1*p06', 2)), - ('cmovna gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovna mem gpr', ('1*p0156+2*p06', 2)), - ('cmovnae gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnae mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovnbe mem gpr', ('1*p0156+2*p06', 2)), - ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnc gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnc mem gpr', ('1*p0156+1*p06', 2)), - ('cmovne gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovne mem gpr', ('1*p0156+1*p06', 2)), - ('cmovng gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovng mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnge gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnge mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnl gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnl mem gpr', ('1*p0156+1*p06', 2)), - ('cmovno gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovno mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnp gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnp mem gpr', ('1*p0156+1*p06', 2)), - ('cmovns gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovns mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnz gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnz mem gpr', ('1*p0156+1*p06', 2)), - ('cmovo gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovo mem gpr', ('1*p0156+1*p06', 2)), - ('cmovp gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovp mem gpr', ('1*p0156+1*p06', 2)), - ('cmovpe gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovpe mem gpr', ('1*p0156+1*p06', 2)), - ('cmovpo gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovpo mem gpr', ('1*p0156+1*p06', 2)), - ('cmovs gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovs mem gpr', ('1*p0156+1*p06', 2)), - ('cmovz gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovz mem gpr', ('1*p0156+1*p06', 2)), - - # https://www.felixcloutier.com/x86/pmovmskb - ('pmovmskb mm gpr', ('1*p0', 3)), - ('pmovmskb xmm gpr', ('1*p0', 3)), - ('vpmovmskb xmm gpr', ('1*p0', 3)), - ('vpmovmskb ymm gpr', ('1*p0', 3)), - - # https://www.felixcloutier.com/x86/pmovsx - ('pmovsxbw xmm xmm', ('1*p5', 1)), - ('pmovsxbw mem xmm', ('1*p5', 1)), - ('pmovsxbd xmm xmm', ('1*p5', 1)), - ('pmovsxbd mem xmm', ('1*p5', 1)), - ('pmovsxbq xmm xmm', ('1*p5', 1)), - ('pmovsxbq mem xmm', ('1*p5', 1)), - ('vpmovsxbw xmm xmm', ('1*p5', 1)), - ('vpmovsxbw mem xmm', ('1*p5', 1)), - ('vpmovsxbd xmm xmm', ('1*p5', 1)), - ('vpmovsxbd mem xmm', ('1*p5', 1)), - ('vpmovsxbq xmm xmm', ('1*p5', 1)), - ('vpmovsxbq mem xmm', ('1*p5', 1)), - ('vpmovsxbw ymm ymm', ('1*p5', 1)), - ('vpmovsxbw mem ymm', ('1*p5', 1)), - ('vpmovsxbd ymm ymm', ('1*p5', 1)), - ('vpmovsxbd mem ymm', ('1*p5', 1)), - ('vpmovsxbq ymm ymm', ('1*p5', 1)), - ('vpmovsxbq mem ymm', ('1*p5', 1)), - - # https://www.felixcloutier.com/x86/pmovzx - ('pmovzxbw xmm xmm', ('1*p5', 1)), - ('pmovzxbw mem xmm', ('1*p5', 1)), - ('vpmovzxbw xmm xmm', ('1*p5', 1)), - ('vpmovzxbw mem xmm', ('1*p5', 1)), - ('vpmovzxbw ymm ymm', ('1*p5', 1)), - ('vpmovzxbw mem ymm', ('1*p5', 1)), -]).items()) - -bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [ - # https://www.felixcloutier.com/x86/cmovcc - ('cmova gpr gpr', ('2*p06', 1)), - ('cmova mem gpr', ('2*p06', 1)), - ('cmovae gpr gpr', ('1*p06', 1)), - ('cmovae mem gpr', ('2*p06', 1)), - ('cmovb gpr gpr', ('2*p06', 1)), - ('cmovb mem gpr', ('1*p06', 1)), - ('cmovbe gpr gpr', ('2*p06', 1)), - ('cmovbe mem gpr', ('2*p06', 1)), - ('cmovc gpr gpr', ('1*p06', 1)), - ('cmovc mem gpr', ('1*p06', 1)), - ('cmove gpr gpr', ('1*p06', 1)), - ('cmove mem gpr', ('1*p06', 1)), - ('cmovg gpr gpr', ('1*p06', 1)), - ('cmovg mem gpr', ('1*p06', 1)), - ('cmovge gpr gpr', ('1*p06', 1)), - ('cmovge mem gpr', ('1*p06', 1)), - ('cmovl gpr gpr', ('1*p06', 1)), - ('cmovl mem gpr', ('1*p06', 1)), - ('cmovle gpr gpr', ('1*p06', 1)), - ('cmovle mem gpr', ('1*p06', 1)), - ('cmovna gpr gpr', ('2*p06', 1)), - ('cmovna mem gpr', ('2*p06', 1)), - ('cmovnae gpr gpr', ('1*p06', 1)), - ('cmovnae mem gpr', ('1*p06', 1)), - ('cmovnb gpr gpr', ('1*p06', 1)), - ('cmovnb mem gpr', ('1*p06', 1)), - ('cmovnbe gpr gpr', ('2*p06', 1)), - ('cmovnbe mem gpr', ('2*p06', 1)), - ('cmovnb gpr gpr', ('1*p06', 1)), - ('cmovnb mem gpr', ('1*p06', 1)), - ('cmovnc gpr gpr', ('1*p06', 1)), - ('cmovnc mem gpr', ('1*p06', 1)), - ('cmovne gpr gpr', ('1*p06', 1)), - ('cmovne mem gpr', ('1*p06', 1)), - ('cmovng gpr gpr', ('1*p06', 1)), - ('cmovng mem gpr', ('1*p06', 1)), - ('cmovnge gpr gpr', ('1*p06', 1)), - ('cmovnge mem gpr', ('1*p06', 1)), - ('cmovnl gpr gpr', ('1*p06', 1)), - ('cmovnl mem gpr', ('1*p06', 1)), - ('cmovno gpr gpr', ('1*p06', 1)), - ('cmovno mem gpr', ('1*p06', 1)), - ('cmovnp gpr gpr', ('1*p06', 1)), - ('cmovnp mem gpr', ('1*p06', 1)), - ('cmovns gpr gpr', ('1*p06', 1)), - ('cmovns mem gpr', ('1*p06', 1)), - ('cmovnz gpr gpr', ('1*p06', 1)), - ('cmovnz mem gpr', ('1*p06', 1)), - ('cmovo gpr gpr', ('1*p06', 1)), - ('cmovo mem gpr', ('1*p06', 1)), - ('cmovp gpr gpr', ('1*p06', 1)), - ('cmovp mem gpr', ('1*p06', 1)), - ('cmovpe gpr gpr', ('1*p06', 1)), - ('cmovpe mem gpr', ('1*p06', 1)), - ('cmovpo gpr gpr', ('1*p06', 1)), - ('cmovpo mem gpr', ('1*p06', 1)), - ('cmovs gpr gpr', ('1*p06', 1)), - ('cmovs mem gpr', ('1*p06', 1)), - ('cmovz gpr gpr', ('1*p06', 1)), - ('cmovz mem gpr', ('1*p06', 1)), -]).items()) - -skx_mov_instructions = list(OrderedDict(bdw_mov_instructions + [ - # https://www.felixcloutier.com/x86/movapd - # TODO with masking! - # TODO the following may eliminate or be bound to 1*p0156: - # ('movapd xmm xmm', ('1*p5', 1)), - # ('vmovapd xmm xmm', ('1*p5', 1)), - # ('vmovapd ymm ymm', ('1*p5', 1)), - - # https://www.felixcloutier.com/x86/movaps - # TODO with masking! - # TODO the following may eliminate or be bound to 1*p0156: - # ('movaps xmm xmm', ('1*p5', 1)), - # ('vmovaps xmm xmm', ('1*p5', 1)), - # ('vmovaps ymm ymm', ('1*p5', 1)), - - # https://www.felixcloutier.com/x86/movbe - ('movbe gpr mem', ('1*p15', 4)), - ('movbe mem gpr', ('1*p15', 4)), - - # https://www.felixcloutier.com/x86/movddup - # TODO with masking! - - # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 - # TODO with masking! - - # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 - # TODO with masking! - - # https://www.felixcloutier.com/x86/movntdq - ('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use? - - # https://www.felixcloutier.com/x86/movntdqa - ('vmovntdqa mem zmm', ('', 0)), - - # https://www.felixcloutier.com/x86/movntpd - ('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use? - - # https://www.felixcloutier.com/x86/movntps - ('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use? - - # https://www.felixcloutier.com/x86/movq2dq - ('movq2dq mm xmm', ('1*p0+1*p015', 1)), - - # https://www.felixcloutier.com/x86/movsd - # TODO with masking! - - # https://www.felixcloutier.com/x86/movshdup - # TODO with masking! - - # https://www.felixcloutier.com/x86/movsldup - # TODO with masking! - - # https://www.felixcloutier.com/x86/movss - # TODO with masking! - - # https://www.felixcloutier.com/x86/movupd - # TODO with masking! - - # https://www.felixcloutier.com/x86/movups - # TODO with masking! - - # https://www.felixcloutier.com/x86/pmovsx - # TODO with masking! - ('vpmovsxbw ymm zmm', ('1*p5', 3)), - ('vpmovsxbw mem zmm', ('1*p5', 1)), -]).items()) - -csx_mov_instructions = OrderedDict(skx_mov_instructions + [ - -]).items() def get_description(arch, rhs_comment=None): descriptions = { @@ -803,7 +763,7 @@ def get_description(arch, rhs_comment=None): 'hsw': '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions]), 'bdw': '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions]), 'skx': '\n'.join([p7.process_item(*item) for item in skx_mov_instructions]), - 'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]) + 'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]), } description = descriptions[arch] @@ -813,20 +773,21 @@ def get_description(arch, rhs_comment=None): commented_description = "" for l in descriptions[arch].split('\n'): - commented_description += ("{:<"+str(max_length)+"} # {}\n").format(l, rhs_comment) + commented_description += ("{:<" + str(max_length) + "} # {}\n").format(l, rhs_comment) description = commented_description return description + if __name__ == '__main__': import sys + if len(sys.argv) != 2: print("Usage: {} (snb|ivb|hsw|bdw|skx|csx)".format(sys.argv[0])) sys.exit(0) - + try: print(get_description(sys.argv[1], rhs_comment=' '.join(sys.argv))) except KeyError: print("Unknown architecture.") sys.exit(1) - From 34e978d2ae82f82966ee4646124ee9d3bc215df2 Mon Sep 17 00:00:00 2001 From: Cloud User Date: Tue, 30 Jun 2020 20:28:57 +0000 Subject: [PATCH 06/15] initial implementation of Neoverse N1 support --- osaca/data/n1.yml | 771 ++++++++++++++++++++++++++++++++++++ osaca/osaca.py | 4 +- osaca/semantics/hw_model.py | 1 + 3 files changed, 774 insertions(+), 2 deletions(-) create mode 100644 osaca/data/n1.yml diff --git a/osaca/data/n1.yml b/osaca/data/n1.yml new file mode 100644 index 0000000..3738df4 --- /dev/null +++ b/osaca/data/n1.yml @@ -0,0 +1,771 @@ +osaca_version: 0.3.3 +micro_architecture: Arm Neoverse N1 +arch_code: n1 +isa: AArch64 +ROB_size: 128 # wikichip +retired_uOps_per_cycle: 8 # wikichip +scheduler_size: 120 # wikichip +hidden_loads: false +load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 5.0, q: 6.0, v: 5.0, z: 4.0} +load_throughput: +- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]} +load_throughput_default: [[1, '67']] +store_throughput: [] +store_throughput_default: [[1, '56'], [1, '67']] +ports: ['0', '1', '2', '3', '4', '4DV', '5', '6', '7'] +port_model_scheme: | + +----------------------------------------------------------------------------+ + | 120 entries | + +----------------------------------------------------------------------------+ + 0 |BR 1 |IS0 2 |IS1 3 |IM0 4 |FP0 5 |FP1 6 |LDST 7 |LDST + \/ \/ \/ \/ \/ \/ \/ \/ + +------+ +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+ + |Branch| | INT | | INT | | INT | | FP ALU | | FP ALU | | AGU | | AGU | + +------+ | ALU | | ALU | | ALU | +--------+ +--------+ +-------+ +-------+ + +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+ + +-----+ +-----+ | FP MUL | | FP MUL | |LD DATA| |LD DATA| + | ST | | INT | +--------+ +--------+ +-------+ +-------+ + | INT | | MUL | +--------+ +---------+ + +-----+ +-----+ | FP DIV | |SIMD SHFT| + +-----+ +--------+ +---------+ + | INT | +--------+ +--------+ + | DIV | | FMA | | FMA | + +-----+ +--------+ +--------+ + +-----+ +--------+ +--------+ + |SHIFT| | ST SIMD| | ST SIMD| + +-----+ | DATA | | DATA | + +-----+ +--------+ +--------+ + | ST | + | INT | + +-----+ +instruction_forms: +- name: add + operands: + - class: register + prefix: x + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: add + operands: + - class: register + prefix: x + - class: register + prefix: x + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: adds + operands: + - class: register + prefix: x + - class: register + prefix: x + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '132']] +- name: b.ne + operands: + - class: identifier + throughput: 1.0 + latency: 0.0 + port_pressure: [[1, '0']] +- name: b.gt + operands: + - class: identifier + throughput: 1.0 + latency: 0.0 + port_pressure: [[1, '0']] +- name: bne + operands: + - class: identifier + throughput: 1.0 + latency: 0.0 + port_pressure: [[1, '0']] +- name: cmp + operands: + - class: register + prefix: w + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: cmp + operands: + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.3333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: dup + operands: + - class: register + prefix: d + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fadd + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fadd + operands: + - class: register + prefix: d + width: '*' + - class: register + prefix: d + width: '*' + - class: register + prefix: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fadd + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fdiv + operands: + - class: register + prefix: v + shape: s + width: 128 + - class: register + prefix: v + shape: s + width: 128 + - class: register + prefix: v + shape: s + width: 128 + throughput: 6.0 + latency: 8.0 # 1*p4+6*p4DV + port_pressure: [[1, '4'], [6, [4DV]]] +- name: fdiv + operands: + - class: register + prefix: v + shape: d + width: 128 + - class: register + prefix: v + shape: d + width: 128 + - class: register + prefix: v + shape: d + width: 128 + throughput: 10.0 + latency: 12.0 # 1*p4+10*p4DV + port_pressure: [[4, '0'], [10, [4DV]]] +- name: fmla + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmla + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmov + operands: + - {class: register, prefix: s} + - {class: immediate, imd: double} + latency: ~ # 1*p45 + port_pressure: [[1, '45']] + throughput: 0.5 +- name: fmul + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 3.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmul + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 3.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmul + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: register + prefix: d + throughput: 0.5 + latency: 3.0 # 1*p45 + port_pressure: [[1, '45']] +- name: frecpe + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 2.0 + latency: 4.0 # 1*p4 + port_pressure: [[2, '4']] +- name: frecpe + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 1.0 + latency: 3.0 # 1*p4 + port_pressure: [[1, '4']] +- name: fsub + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fsub + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: ldp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: imd + index: ~ + scale: 1 + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 5.0 # 2*p67, from n1 opt guide + port_pressure: [[2, '67']] +- name: ldp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: imd + index: ~ + scale: 1 + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 5.0 # 2*p67+1*p123, from n1 opt guide + port_pressure: [[2, '67'], [1, '123']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: 1 + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 7.0 # 2*p67, from n1 opt guide + port_pressure: [[2, '67']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: ~ + index: ~ + scale: 1 + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 7.0 # 2*p67+1*p123, from n1 opt guide + port_pressure: [[2, '56'], [1, '123']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 7.0 # 2*p67 + port_pressure: [[2, '67']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: true + post-indexed: false + throughput: 1.0 + latency: 7.0 # 2*p67+1*p123 + port_pressure: [[2, '67'], [1, '123']] +- name: ldp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 5.0 # 2*p67+1*p123 + port_pressure: [[2, '67'], [1, '123']] +- name: ldur # JL: assumed from n1 opt guide + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 6.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 6.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 5.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: d + - class: memory + base: x + offset: imd + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 5.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 5.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: ldr + operands: + - class: register + prefix: q + - class: register + prefix: q + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: ldr + operands: + - class: register + prefix: d + - class: register + prefix: d + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: mov + operands: + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.25 + latency: 1.0 # 1*p3456 + port_pressure: [[1, '3456']] +- name: mov + operands: + - class: register + prefix: v + shape: b + width: '*' + - class: register + prefix: v + shape: b + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: stp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p45+1*p67 + port_pressure: [[2, '45'], [1, '67']] +- name: stp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 0 # 2*p45+2*p67+1*123 + port_pressure: [[2, '45'], [2, '67'], [1, '123']] +- name: stp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p45+2*p67 + port_pressure: [[2, '45'], [2, '67']] +- name: stur # JL: assumed from n1 opt guide + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 0.5 + latency: 0 # 1*p67+1*p23 + port_pressure: [[1, '56'], [1, '23']] +- name: stur # JL: assumed from n1 opt guide + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p67+1*p45 + port_pressure: [[2, '67'], [1, '45']] +- name: str + operands: + - class: register + prefix: x + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 0.5 + latency: 0 # 1*p67+1*p23 + port_pressure: [[1, '56'], [1, '23']] +- name: str + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 0.5 + latency: 0 # 1*p67+1*p45 + port_pressure: [[1, '67'], [1, '45']] +- name: str + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 0.5 + latency: 0 # 1*p67+1*p45+1*p123 + port_pressure: [[1, '67'], [1, '45'], [1, '123']] +- name: str + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: 1 + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p67+1*p45 + port_pressure: [[1, '67'], [1, '45']] +- name: str + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 0 # 1*p67+1*p45+1*123 + port_pressure: [[1, '67'], [1, '45'], [1, '123']] +- name: str + operands: + - class: register + prefix: x + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 0 # 1*p67+1*p23+1*p123 + port_pressure: [[1, '67'], [1, '23'], [1, '123']] +- name: sub + operands: + - class: register + prefix: w + - class: register + prefix: w + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] diff --git a/osaca/osaca.py b/osaca/osaca.py index aef4547..1551e9d 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -17,7 +17,7 @@ MODULE_DATA_DIR = os.path.join( ) LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/') DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/') -SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2'] +SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2', 'N1'] # Stolen from pip @@ -71,7 +71,7 @@ def create_parser(parser=None): parser.add_argument( '--arch', type=str, - help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2).', + help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2, N1).', ) parser.add_argument( '--fixed', diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 9c7e77d..d7a6cb3 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -241,6 +241,7 @@ class MachineModel(object): """Return ISA for given micro-arch ``arch``.""" arch_dict = { 'tx2': 'aarch64', + 'n1': 'aarch64', 'zen1': 'x86', 'zen+': 'x86', 'zen2': 'x86', From cc393420479b7a4066048ef973c45e4fe37c9230 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 3 Aug 2020 09:07:45 +0200 Subject: [PATCH 07/15] minor enhancement for mask parsing --- osaca/parser/parser_x86att.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index afcd705..b6e42af 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -44,7 +44,7 @@ class ParserX86ATT(BaseParser): + pp.Optional(pp.Literal('(') + pp.Word(pp.nums) + pp.Literal(')')) + pp.Optional( pp.Literal('{') - + pp.Literal('%') + + pp.Optional(pp.Suppress(pp.Literal('%'))) + pp.Word(pp.alphanums).setResultsName('mask') + pp.Literal('}') + pp.Optional( @@ -99,7 +99,7 @@ class ParserX86ATT(BaseParser): + pp.Literal(')') + pp.Optional( pp.Literal('{') - + pp.Literal('%') + + pp.Optional(pp.Suppress(pp.Literal('%'))) + pp.Word(pp.alphanums).setResultsName('mask') + pp.Literal('}') ) From 5361b63b528390e69acd90504410a08d3160c137 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 3 Aug 2020 09:38:50 +0200 Subject: [PATCH 08/15] version bump --- osaca/__init__.py | 2 +- osaca/data/bdw.yml | 2 +- osaca/data/csx.yml | 2 +- osaca/data/hsw.yml | 2 +- osaca/data/isa/aarch64.yml | 2 +- osaca/data/isa/x86.yml | 2 +- osaca/data/ivb.yml | 2 +- osaca/data/n1.yml | 2 +- osaca/data/skx.yml | 2 +- osaca/data/snb.yml | 2 +- osaca/data/tx2.yml | 2 +- osaca/data/zen1.yml | 2 +- osaca/data/zen2.yml | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/osaca/__init__.py b/osaca/__init__.py index d06d900..a1ee834 100644 --- a/osaca/__init__.py +++ b/osaca/__init__.py @@ -1,6 +1,6 @@ """Open Source Architecture Code Analyzer""" name = 'osaca' -__version__ = '0.3.3.dev0' +__version__ = '0.3.4' # To trigger travis deployment to pypi, do the following: # 1. Increment __version___ diff --git a/osaca/data/bdw.yml b/osaca/data/bdw.yml index d29c7ec..7346b62 100644 --- a/osaca/data/bdw.yml +++ b/osaca/data/bdw.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: Intel Broadwell arch_code: BDW isa: x86 diff --git a/osaca/data/csx.yml b/osaca/data/csx.yml index e73d942..5aeed51 100644 --- a/osaca/data/csx.yml +++ b/osaca/data/csx.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: Cascade Lake SP arch_code: CSX isa: x86 diff --git a/osaca/data/hsw.yml b/osaca/data/hsw.yml index 2f1a9f9..3d2b8c2 100644 --- a/osaca/data/hsw.yml +++ b/osaca/data/hsw.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: Intel Haswell arch_code: HSW isa: x86 diff --git a/osaca/data/isa/aarch64.yml b/osaca/data/isa/aarch64.yml index 13b294a..2957a28 100644 --- a/osaca/data/isa/aarch64.yml +++ b/osaca/data/isa/aarch64.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.0 +osaca_version: 0.3.4 isa: "AArch64" # Contains all operand-irregular instruction forms OSACA supports for AArch64. # Operand-regular for a AArch64 instruction form with N operands in the shape of diff --git a/osaca/data/isa/x86.yml b/osaca/data/isa/x86.yml index 54ceeba..b5d8bc7 100644 --- a/osaca/data/isa/x86.yml +++ b/osaca/data/isa/x86.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.0 +osaca_version: 0.3.4 isa: "x86" # Contains all operand-irregular instruction forms OSACA supports for x86. # Operand-regular for a x86 AT&T instruction form with N operands in the shape of diff --git a/osaca/data/ivb.yml b/osaca/data/ivb.yml index 3da9bba..ae5f035 100644 --- a/osaca/data/ivb.yml +++ b/osaca/data/ivb.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: Intel Ivy Bridge arch_code: IVB isa: x86 diff --git a/osaca/data/n1.yml b/osaca/data/n1.yml index 3738df4..d189c44 100644 --- a/osaca/data/n1.yml +++ b/osaca/data/n1.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.3 +osaca_version: 0.3.4 micro_architecture: Arm Neoverse N1 arch_code: n1 isa: AArch64 diff --git a/osaca/data/skx.yml b/osaca/data/skx.yml index 17bd85b..7c09473 100644 --- a/osaca/data/skx.yml +++ b/osaca/data/skx.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: Intel Skylake SP arch_code: SKX isa: x86 diff --git a/osaca/data/snb.yml b/osaca/data/snb.yml index e0545dc..e1bf0ed 100644 --- a/osaca/data/snb.yml +++ b/osaca/data/snb.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: Intel Sandy Bridge arch_code: SNB isa: x86 diff --git a/osaca/data/tx2.yml b/osaca/data/tx2.yml index 8c9765f..3df5f6d 100644 --- a/osaca/data/tx2.yml +++ b/osaca/data/tx2.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: Thunder X2 arch_code: tx2 isa: AArch64 diff --git a/osaca/data/zen1.yml b/osaca/data/zen1.yml index 04973a1..e45ba48 100644 --- a/osaca/data/zen1.yml +++ b/osaca/data/zen1.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: AMD Zen (family 17h) arch_code: ZEN1 isa: x86 diff --git a/osaca/data/zen2.yml b/osaca/data/zen2.yml index 125ba74..bfec889 100644 --- a/osaca/data/zen2.yml +++ b/osaca/data/zen2.yml @@ -1,4 +1,4 @@ -osaca_version: 0.3.2 +osaca_version: 0.3.4 micro_architecture: AMD Zen2 arch_code: ZEN2 isa: x86 From d6b4355a7742e3349cc12d7aa03fe84e1e0444cb Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Mon, 3 Aug 2020 15:53:29 +0200 Subject: [PATCH 09/15] labels may now start with numbers --- osaca/parser/parser_x86att.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index b6e42af..391b52f 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -26,7 +26,7 @@ class ParserX86ATT(BaseParser): # Define x86 assembly identifier relocation = pp.Combine(pp.Literal('@') + pp.Word(pp.alphas)) id_offset = pp.Word(pp.nums) + pp.Suppress(pp.Literal('+')) - first = pp.Word(pp.alphas + '_.', exact=1) + first = pp.Word(pp.alphanums + '_.', exact=1) rest = pp.Word(pp.alphanums + '$_.+-') identifier = pp.Group( pp.Optional(id_offset).setResultsName('offset') From e715badcf988cc65e34800cf4a49575b3e2a0117 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 3 Aug 2020 16:59:48 +0200 Subject: [PATCH 10/15] detects numeric label as label --- osaca/parser/parser_x86att.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index b6e42af..f594d0b 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -34,8 +34,13 @@ class ParserX86ATT(BaseParser): + pp.Optional(relocation).setResultsName('relocation') ).setResultsName('identifier') # Label + numeric_identifier = pp.Group( + pp.Word(pp.nums).setResultsName('name') + pp.Optional(pp.oneOf('b f', caseless=True)) + ).setResultsName('identifier') self.label = pp.Group( - identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) + (identifier | numeric_identifier).setResultsName('name') + + pp.Literal(':') + + pp.Optional(self.comment) ).setResultsName(self.LABEL_ID) # Register: pp.Regex('^%[0-9a-zA-Z]+{}{z},?') self.register = pp.Group( @@ -132,7 +137,7 @@ class ParserX86ATT(BaseParser): pp.alphanums ).setResultsName('mnemonic') # Combine to instruction form - operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier) + operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier ^ numeric_identifier) operand_rest = pp.Group(self.register ^ immediate ^ memory) self.instruction_parser = ( mnemonic @@ -305,7 +310,10 @@ class ParserX86ATT(BaseParser): def process_label(self, label): """Post-process label asm line""" # remove duplicated 'name' level due to identifier - label['name'] = label['name']['name'] + if 'name' in label['name'][0]: + label['name'] = label['name'][0]['name'] + else: + label['name'] = label['name'][0] return AttrDict({self.LABEL_ID: label}) def process_immediate(self, immediate): From 12a8506530e97344bbe3fd1ddc96af8236780c2c Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 3 Aug 2020 17:14:58 +0200 Subject: [PATCH 11/15] removed unnecessary code --- osaca/parser/parser_x86att.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index f594d0b..9576a0e 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -310,10 +310,7 @@ class ParserX86ATT(BaseParser): def process_label(self, label): """Post-process label asm line""" # remove duplicated 'name' level due to identifier - if 'name' in label['name'][0]: - label['name'] = label['name'][0]['name'] - else: - label['name'] = label['name'][0] + label['name'] = label['name'][0]['name'] return AttrDict({self.LABEL_ID: label}) def process_immediate(self, immediate): From 269148c2a1bc32c97e1417b511125cdba02fe3d9 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 3 Aug 2020 18:08:29 +0200 Subject: [PATCH 12/15] save b/f in numeric identifier as suffix tag --- osaca/parser/parser_x86att.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 9576a0e..ce79216 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -import re import string import pyparsing as pp @@ -35,7 +34,7 @@ class ParserX86ATT(BaseParser): ).setResultsName('identifier') # Label numeric_identifier = pp.Group( - pp.Word(pp.nums).setResultsName('name') + pp.Optional(pp.oneOf('b f', caseless=True)) + pp.Word(pp.nums).setResultsName('name') + pp.Optional(pp.oneOf('b f', caseless=True).setResultsName('suffix')) ).setResultsName('identifier') self.label = pp.Group( (identifier | numeric_identifier).setResultsName('name') From 0db8b6bcbf3bd6bf8ab3111af16af00d490050cd Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 3 Aug 2020 18:30:29 +0200 Subject: [PATCH 13/15] fixed first character match for symbolic identifiers --- osaca/parser/parser_x86att.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index b5ee165..09a5921 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -25,7 +25,7 @@ class ParserX86ATT(BaseParser): # Define x86 assembly identifier relocation = pp.Combine(pp.Literal('@') + pp.Word(pp.alphas)) id_offset = pp.Word(pp.nums) + pp.Suppress(pp.Literal('+')) - first = pp.Word(pp.alphanums + '_.', exact=1) + first = pp.Word(pp.alphas + '_.', exact=1) rest = pp.Word(pp.alphanums + '$_.+-') identifier = pp.Group( pp.Optional(id_offset).setResultsName('offset') @@ -34,7 +34,8 @@ class ParserX86ATT(BaseParser): ).setResultsName('identifier') # Label numeric_identifier = pp.Group( - pp.Word(pp.nums).setResultsName('name') + pp.Optional(pp.oneOf('b f', caseless=True).setResultsName('suffix')) + pp.Word(pp.nums).setResultsName('name') + + pp.Optional(pp.oneOf('b f', caseless=True).setResultsName('suffix')) ).setResultsName('identifier') self.label = pp.Group( (identifier | numeric_identifier).setResultsName('name') @@ -136,7 +137,9 @@ class ParserX86ATT(BaseParser): pp.alphanums ).setResultsName('mnemonic') # Combine to instruction form - operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier ^ numeric_identifier) + operand_first = pp.Group( + self.register ^ immediate ^ memory ^ identifier ^ numeric_identifier + ) operand_rest = pp.Group(self.register ^ immediate ^ memory) self.instruction_parser = ( mnemonic From 3308f5d68f4ad4487376957d05d2cb706a5c5ba2 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Wed, 5 Aug 2020 10:59:10 +0200 Subject: [PATCH 14/15] version bump --- osaca/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osaca/__init__.py b/osaca/__init__.py index a1ee834..40a2da8 100644 --- a/osaca/__init__.py +++ b/osaca/__init__.py @@ -1,6 +1,6 @@ """Open Source Architecture Code Analyzer""" name = 'osaca' -__version__ = '0.3.4' +__version__ = '0.3.6' # To trigger travis deployment to pypi, do the following: # 1. Increment __version___ From 64da89ec3d6e9be51a8f17872298b7666ffa0acf Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 17 Sep 2020 22:12:12 +0200 Subject: [PATCH 15/15] enhancecd ARM identifier to support immediate offsets --- osaca/parser/base_parser.py | 1 + osaca/parser/parser_AArch64v81.py | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/osaca/parser/base_parser.py b/osaca/parser/base_parser.py index d08ed97..2706e04 100755 --- a/osaca/parser/base_parser.py +++ b/osaca/parser/base_parser.py @@ -8,6 +8,7 @@ class BaseParser(object): DIRECTIVE_ID = 'directive' IMMEDIATE_ID = 'immediate' LABEL_ID = 'label' + IDENTIFIER_ID = 'identifier' MEMORY_ID = 'memory' REGISTER_ID = 'register' SEGMENT_EXT_ID = 'segment_extension' diff --git a/osaca/parser/parser_AArch64v81.py b/osaca/parser/parser_AArch64v81.py index 51d2fae..34f99c5 100755 --- a/osaca/parser/parser_AArch64v81.py +++ b/osaca/parser/parser_AArch64v81.py @@ -19,22 +19,23 @@ class ParserAArch64v81(BaseParser): pp.ZeroOrMore(pp.Word(pp.printables)) ).setResultsName(self.COMMENT_ID) # Define ARM assembly identifier + decimal_number = pp.Combine( + pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) + ).setResultsName('value') + hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') relocation = pp.Combine(pp.Literal(':') + pp.Word(pp.alphanums + '_') + pp.Literal(':')) first = pp.Word(pp.alphas + '_.', exact=1) rest = pp.Word(pp.alphanums + '_.') identifier = pp.Group( pp.Optional(relocation).setResultsName('relocation') + pp.Combine(first + pp.Optional(rest)).setResultsName('name') - ).setResultsName('identifier') + + pp.Optional(pp.Suppress(pp.Literal('+')) + (hex_number | decimal_number).setResultsName('offset')) + ).setResultsName(self.IDENTIFIER_ID) # Label self.label = pp.Group( identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) ).setResultsName(self.LABEL_ID) # Directive - decimal_number = pp.Combine( - pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) - ).setResultsName('value') - hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') directive_option = pp.Combine( pp.Word(pp.alphas + '#@.%', exact=1) + pp.Optional(pp.Word(pp.printables + ' ', excludeChars=',')) @@ -317,6 +318,8 @@ class ParserAArch64v81(BaseParser): return self.process_immediate(operand[self.IMMEDIATE_ID]) if self.LABEL_ID in operand: return self.process_label(operand[self.LABEL_ID]) + if self.IDENTIFIER_ID in operand: + return self.process_identifier(operand[self.IDENTIFIER_ID]) return operand def process_memory_address(self, memory_address): @@ -396,6 +399,13 @@ class ParserAArch64v81(BaseParser): label['name'] = label['name']['name'] return AttrDict({self.LABEL_ID: label}) + def process_identifier(self, identifier): + """Post-process identifier operand""" + # remove value if it consists of symbol+offset + if 'value' in identifier: + del identifier['value'] + return AttrDict({self.IDENTIFIER_ID: identifier}) + def get_full_reg_name(self, register): """Return one register name string including all attributes""" if 'lanes' in register: