diff --git a/tools/cpuBench/addURLsToXML.py b/tools/cpuBench/addURLsToXML.py new file mode 100755 index 0000000..ade794b --- /dev/null +++ b/tools/cpuBench/addURLsToXML.py @@ -0,0 +1,146 @@ +#!/usr/bin/python +import xml.etree.ElementTree as ET +import argparse +import re +import urllib +from xml.dom import minidom +from utils import * + +def main(): + parser = argparse.ArgumentParser(description='') + parser.add_argument("input", help="Input XML file") + parser.add_argument("output", help="Output XML file") + args = parser.parse_args() + + html = urllib.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*') + lines = re.findall('href="\./(.*?)">(.*?).*?(.*?)', html) # Example: ('ADC.html', 'ADC', 'Add with Carry'), + lineDict = {(line[0],line[1]):line for line in lines} + + root = ET.parse(args.input).getroot() + for instrNode in root.iter('instruction'): + iclass = instrNode.attrib['iclass'] + iform = instrNode.attrib['iform'] + extension = instrNode.attrib['extension'] + + if extension in ['CET', 'MONITORX', 'SSE4a', 'SVM', 'TBM', 'TSX_LDTRK', 'XOP']: + continue + + matchingLines = [] + if iclass == 'INT': + matchingLines = [lineDict[('INTn:INTO:INT3:INT1.html', 'INT n')]] + elif iclass == 'IRETQ': + matchingLines = [lineDict[('IRET:IRETD.html', 'IRET')]] + if iclass == 'MOV': + matchingLines = [lineDict[('MOV.html', 'MOV')]] + elif iclass == 'MOV_CR': + matchingLines = [lineDict[('MOV-1.html', 'MOV')]] + elif iclass == 'MOV_DR': + matchingLines = [lineDict[('MOV-2.html', 'MOV')]] + elif iclass == 'VMRESUME': + matchingLines = [lineDict[('VMLAUNCH:VMRESUME.html', 'VMRESUME')]] + elif iclass == 'SCASQ': + matchingLines = [lineDict[('SCAS:SCASB:SCASW:SCASD.html', 'SCAS')]] + elif iclass == 'UD2': + matchingLines = [lineDict[('UD.html', 'UD')]] + elif iclass in ['CMPSD', 'VCMPSD', 'CMPSD_XMM']: + if extension == 'BASE': + matchingLines = [lineDict[('CMPS:CMPSB:CMPSW:CMPSD:CMPSQ.html', 'CMPSD')]] + else: + matchingLines = [lineDict[('CMPSD.html', 'CMPSD')]] + elif iclass in ['MOVQ', 'VMOVQ']: + if 'GPR' in iform: + matchingLines = [lineDict[('MOVD:MOVQ.html', 'MOVQ')]] + else: + matchingLines = [lineDict[('MOVQ.html', 'MOVQ')]] + elif iclass in ['MOVSD', 'VMOVSD', 'MOVSD_XMM']: + if extension == 'BASE': + matchingLines = [lineDict[('MOVS:MOVSB:MOVSW:MOVSD:MOVSQ.html', 'MOVSD')]] + else: + matchingLines = [lineDict[('MOVSD.html', 'MOVSD')]] + elif iclass == 'VGATHERDPD': + if '512' in extension: + matchingLines = [lineDict[('VGATHERDPS:VGATHERDPD.html', 'VGATHERDPD')]] + else: + matchingLines = [lineDict[('VGATHERDPD:VGATHERQPD.html', 'VGATHERDPD')]] + elif iclass == 'VGATHERDPS': + if '512' in extension: + matchingLines = [lineDict[('VGATHERDPS:VGATHERDPD.html', 'VGATHERDPS')]] + else: + matchingLines = [lineDict[('VGATHERDPS:VGATHERQPS.html', 'VGATHERDPS')]] + elif iclass == 'VGATHERQPD': + if '512' in extension: + matchingLines = [lineDict[('VGATHERQPS:VGATHERQPD.html', 'VGATHERQPD')]] + else: + matchingLines = [lineDict[('VGATHERDPD:VGATHERQPD.html', 'VGATHERQPD')]] + elif iclass == 'VGATHERQPS': + if '512' in extension: + matchingLines = [lineDict[('VGATHERQPS:VGATHERQPD.html', 'VGATHERQPS')]] + else: + matchingLines = [lineDict[('VGATHERDPS:VGATHERQPS.html', 'VGATHERQPS')]] + elif iclass == 'VPGATHERDD': + if '512' in extension: + matchingLines = [lineDict[('VPGATHERDD:VPGATHERDQ.html', 'VPGATHERDD')]] + else: + matchingLines = [lineDict[('VPGATHERDD:VPGATHERQD.html', 'VPGATHERDD')]] + elif iclass == 'VPGATHERDQ': + if '512' in extension: + matchingLines = [lineDict[('VPGATHERDD:VPGATHERDQ.html', 'VPGATHERDQ')]] + else: + matchingLines = [lineDict[('VPGATHERDQ:VPGATHERQQ.html', 'VPGATHERDQ')]] + elif iclass == 'VPGATHERQD': + if '512' in extension: + matchingLines = [lineDict[('VPGATHERQD:VPGATHERQQ.html', 'VPGATHERQD')]] + else: + matchingLines = [lineDict[('VPGATHERDD:VPGATHERQD.html', 'VPGATHERQD')]] + elif iclass == 'VPGATHERQQ': + if '512' in extension: + matchingLines = [lineDict[('VPGATHERQD:VPGATHERQQ.html', 'VPGATHERQQ')]] + else: + matchingLines = [lineDict[('VPGATHERDQ:VPGATHERQQ.html', 'VPGATHERQQ')]] + elif iclass.startswith('PMOVSX') or iclass.startswith('VPMOVSX'): + matchingLines = [lineDict[('PMOVSX.html', 'PMOVSX')]] + elif iclass.startswith('PMOVZX') or iclass.startswith('VPMOVZX'): + matchingLines = [lineDict[('PMOVZX.html', 'PMOVZX')]] + elif iclass.startswith('REP'): + matchingLines = [lineDict[('REP:REPE:REPZ:REPNE:REPNZ.html', 'REP')]] + elif iclass.startswith('VBROADCAST'): + matchingLines = [lineDict[('VBROADCAST.html', 'VBROADCAST')]] + elif iclass.startswith('VMASKMOV'): + matchingLines = [lineDict[('VMASKMOV.html', 'VMASKMOV')]] + elif iclass.startswith('VPANDN'): + matchingLines = [lineDict[('PANDN.html', 'PANDN')]] + elif iclass.startswith('VPAND'): + matchingLines = [lineDict[('PAND.html', 'PAND')]] + elif iclass.startswith('VPBROADCASTM'): + matchingLines = [lineDict[('VPBROADCASTM.html', 'VPBROADCASTM')]] + elif iclass.startswith('VPMASKMOV'): + matchingLines = [lineDict[('VPMASKMOV.html', 'VPMASKMOV')]] + elif iclass.startswith('VPOR'): + matchingLines = [lineDict[('POR.html', 'POR')]] + elif iclass.startswith('VPXOR'): + matchingLines = [lineDict[('PXOR.html', 'PXOR')]] + else: + for line in lines: + mnemonic = line[1].upper() + if iclass in [mnemonic, 'V'+mnemonic, mnemonic+'_LOCK', mnemonic+'_FAR', mnemonic+'_NEAR', mnemonic+'_SSE4', mnemonic+'64']: + matchingLines.append(line) + if 'CC' in mnemonic and iclass.startswith(mnemonic.replace('CC', '')) and not iclass in ['JMP', 'JMP_FAR', 'LOOP']: + matchingLines.append(line) + + if len(matchingLines) > 1: + print 'Duplicate link found for ' + iclass + exit(1) + + instrNode.attrib['url'] = 'uops.info/html-instr/' + canonicalizeInstrString(instrNode.attrib['string']) + '.html' + if matchingLines: + instrNode.attrib['summary'] = str(matchingLines[0][2]) + instrNode.attrib['url-ref'] = 'felixcloutier.com/x86/' + matchingLines[0][0] + + with open(args.output, "w") as f: + rough_string = ET.tostring(root, 'utf-8') + reparsed = minidom.parseString(rough_string) + f.write('\n'.join([line for line in reparsed.toprettyxml(indent=' '*2).split('\n') if line.strip()])) + + +if __name__ == "__main__": + main()