This commit is contained in:
Andreas Abel
2022-11-28 16:32:32 +01:00
parent 09e6a3d6fc
commit 88d021d86e
5 changed files with 59 additions and 47 deletions

View File

@@ -2,24 +2,24 @@
set -x
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input "$2" -arch 'NHM' > output_NHM2.1.txt 2>error_NHM2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'NHM' > output_NHM2.2.txt 2>error_NHM2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'WSM' > output_WSM2.1.txt 2>error_WSM2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'WSM' > output_WSM2.2.txt 2>error_WSM2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'SNB' > output_SNB2.1.txt 2>error_SNB2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'SNB' > output_SNB2.2.txt 2>error_SNB2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'SNB' > output_SNB2.3.txt 2>error_SNB2.3.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'IVB' > output_IVB2.1.txt 2>error_IVB2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'IVB' > output_IVB2.2.txt 2>error_IVB2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'IVB' > output_IVB2.3.txt 2>error_IVB2.3.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'HSW' > output_HSW2.1.txt 2>error_HSW2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'HSW' > output_HSW2.2.txt 2>error_HSW2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'HSW' > output_HSW2.3.txt 2>error_HSW2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'HSW' > output_HSW3.0.txt 2>error_HSW3.0.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'BDW' > output_BDW2.2.txt 2>error_BDW2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'BDW' > output_BDW2.3.txt 2>error_BDW2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'BDW' > output_BDW3.0.txt 2>error_BDW3.0.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'SKL' > output_SKL2.3.txt 2>error_SKL2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'SKL' > output_SKL3.0.txt 2>error_SKL3.0.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'SKX' > output_SKX2.3.txt 2>error_SKX2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'SKX' > output_SKX3.0.txt 2>error_SKX3.0.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input "$2" -arch 'NHM' -output result_IACA.xml > output_NHM2.1.txt 2>error_NHM2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'NHM' -output result_IACA.xml > output_NHM2.2.txt 2>error_NHM2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'WSM' -output result_IACA.xml > output_WSM2.1.txt 2>error_WSM2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'WSM' -output result_IACA.xml > output_WSM2.2.txt 2>error_WSM2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'SNB' -output result_IACA.xml > output_SNB2.1.txt 2>error_SNB2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'SNB' -output result_IACA.xml > output_SNB2.2.txt 2>error_SNB2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'SNB' -output result_IACA.xml > output_SNB2.3.txt 2>error_SNB2.3.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'IVB' -output result_IACA.xml > output_IVB2.1.txt 2>error_IVB2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'IVB' -output result_IACA.xml > output_IVB2.2.txt 2>error_IVB2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'IVB' -output result_IACA.xml > output_IVB2.3.txt 2>error_IVB2.3.txt
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW2.1.txt 2>error_HSW2.1.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW2.2.txt 2>error_HSW2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW2.3.txt 2>error_HSW2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW3.0.txt 2>error_HSW3.0.txt
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'BDW' -output result_IACA.xml > output_BDW2.2.txt 2>error_BDW2.2.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'BDW' -output result_IACA.xml > output_BDW2.3.txt 2>error_BDW2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'BDW' -output result_IACA.xml > output_BDW3.0.txt 2>error_BDW3.0.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'SKL' -output result_IACA.xml > output_SKL2.3.txt 2>error_SKL2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'SKL' -output result_IACA.xml > output_SKL3.0.txt 2>error_SKL3.0.txt
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'SKX' -output result_IACA.xml > output_SKX2.3.txt 2>error_SKX2.3.txt
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'SKX' -output result_IACA.xml > output_SKX3.0.txt 2>error_SKX3.0.txt

View File

@@ -29,8 +29,6 @@ def main():
matchingLines = []
if iclass == 'INT':
matchingLines = [lineDict[('INTn:INTO:INT3:INT1.html', 'INT n')]]
elif iclass == 'IRETQ':
matchingLines = [lineDict[('IRET:IRETD.html', 'IRET')]]
if iclass == 'MOV':
matchingLines = [lineDict[('MOV.html', 'MOV')]]
elif iclass == 'MOV_CR':
@@ -48,6 +46,8 @@ def main():
matchingLines = [lineDict[('CMPS:CMPSB:CMPSW:CMPSD:CMPSQ.html', 'CMPSD')]]
else:
matchingLines = [lineDict[('CMPSD.html', 'CMPSD')]]
elif iclass in ['IRETW', 'IRETD', 'IRETQ']:
matchingLines = [lineDict[('IRET:IRETD:IRETQ.html', 'IRET')]]
elif iclass in ['MOVQ', 'VMOVQ']:
if 'GPR' in iform:
matchingLines = [lineDict[('MOVD:MOVQ.html', 'MOVQ')]]
@@ -133,15 +133,15 @@ def main():
exit(1)
instrNode.attrib['url'] = 'uops.info/html-instr/' + canonicalizeInstrString(instrNode.attrib['string']) + '.html'
if matchingLines:
if matchingLines:
instrNode.attrib['summary'] = str(matchingLines[0][2])
instrNode.attrib['url-ref'] = 'felixcloutier.com/x86/' + matchingLines[0][0]
with open(args.output, "w") as f:
rough_string = ET.tostring(root, 'utf-8')
reparsed = minidom.parseString(rough_string)
f.write('\n'.join([line for line in reparsed.toprettyxml(indent=' '*2).split('\n') if line.strip()]))
if __name__ == "__main__":
main()

View File

@@ -13,8 +13,10 @@ def main():
parser.add_argument('inp2')
parser.add_argument('arch2')
parser.add_argument('-TP', action='store_true')
parser.add_argument('-TPMaxDiff', type=float, default=.0) # if the diff. between two TP measurements is not larger than TPMaxDiff, they are treated as equal
parser.add_argument('-lat', action='store_true')
parser.add_argument('-ports', action='store_true')
parser.add_argument('-printDiff', action='store_true')
args = parser.parse_args()
root1 = ET.parse(args.inp1).getroot()
@@ -30,7 +32,7 @@ def main():
for instrStr in sorted(instrNodeDict1):
instrNode1 = instrNodeDict1[instrStr]
if not instrStr in instrNodeDict2:
print('No matching entry found for ' + instrStr)
if args.printDiff: print('No matching entry found for ' + instrStr)
continue
instrNode2 = instrNodeDict2[instrStr]
for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'):
@@ -39,9 +41,9 @@ def main():
tp1 = min(map(float, [mNode1.attrib.get('TP_unrolled', sys.maxsize), mNode1.attrib.get('TP_loop', sys.maxsize), mNode1.attrib.get('TP', sys.maxsize)]))
tp2 = min(map(float, [mNode2.attrib.get('TP_unrolled', sys.maxsize), mNode2.attrib.get('TP_loop', sys.maxsize), mNode2.attrib.get('TP', sys.maxsize)]))
if tp1 != tp2:
if abs(tp1 - tp2) > args.TPMaxDiff:
tpDiff += 1
print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2))
if args.printDiff: print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2))
if args.lat:
for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')):
@@ -49,22 +51,22 @@ def main():
latStr2 = ET.tostring(latNode2, encoding='utf-8').decode().strip()
if latNode1.attrib != latNode2.attrib:
latDiff += 1
print(instrStr)
print(' ' + latStr1)
print(' ' + latStr2)
if args.printDiff: print(instrStr)
if args.printDiff: print(' ' + latStr1)
if args.printDiff: print(' ' + latStr2)
if args.ports:
p1 = mNode1.attrib.get('ports', '')
p2 = mNode2.attrib.get('ports', '')
if p1 != p2:
portsDiff += 1
print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2)
if args.printDiff: print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2)
if not args.TP and not args.lat and not args.ports:
xmlStr1 = ET.tostring(mNode1, encoding='utf-8').decode().strip()
xmlStr2 = ET.tostring(mNode2, encoding='utf-8').decode().strip()
if xmlStr1 != xmlStr2:
if args.printDiff and xmlStr1 != xmlStr2:
print('-------------------------------')
print(instrStr)
print(xmlStr1)

8
tools/cpuBench/compare_all.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/bin/sh
set -x
for arch in CON WOL NHM WSM SNB IVB HSW BDW SKL KBL CFL SKX CNL CLX ICL TGL RKL ADL-P ADL-E BNL AMT GLM GLP TRM ZEN+ ZEN2 ZEN3 ZEN4
do
./compareXML.py ~/code/html/instructions.xml $arch result_${arch}_measured.xml $arch -TP -lat -ports -TPMaxDiff 0.02
done

View File

@@ -567,9 +567,12 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e:
print('Error: ' + e.output.decode())
logging.warning('Error: ' + e.output.decode())
htmlReports.append('<pre>' + e.output.decode() + '</pre>')
return None
htmlReports.append('<pre>' + iacaOut + '</pre>')
if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
print('IACA error')
return None
@@ -588,8 +591,6 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
if instrPortsCol:
instrUopsOnBlockedPorts += float(instrPortsCol[0])
htmlReports.append('<pre>' + iacaOut + '</pre>')
if allUopsOnBlockedPorts < blockInstrRep-.5:
# something went wrong; fewer uops on ports than blockInstrRep
# happens, e.g., on SKX for ports {0, 1} if AVX-512 is active
@@ -744,7 +745,8 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports):
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e:
print('Error: ' + e.output.decode())
logging.warning('Error: ' + e.output.decode())
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
return None
if debugOutput:
@@ -1092,18 +1094,18 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e:
logging.warning('Error: ' + e.output.decode())
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
if minTP != sys.maxsize:
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
else:
return None
htmlReports.append('<pre>' + iaca_out + '</pre>\n')
if not iaca_out or ' ! ' in iaca_out or ' X ' in iaca_out or ' 0X ' in iaca_out or not 'Total Num Of Uops' in iaca_out:
print('IACA error')
return None
htmlReports.append('<pre>' + iaca_out + '</pre>\n')
cycles = float(iaca_out.split('\n')[3].split()[2])
cycles = cycles/ic
minTP = min(minTP, cycles)
@@ -2483,17 +2485,17 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e:
print('Error: ' + e.output.decode())
logging.warning('Error: ' + e.output.decode())
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
return None
htmlReports.append('<pre>' + iaca_lat + '</pre>\n')
if '!' in iaca_lat or not 'Latency' in iaca_lat:
print('IACA error')
return None
latency = iaca_lat.split('\n')[3].split()[1]
htmlReports.append('<pre>' + iaca_lat + '</pre>\n')
return latency
else:
if instrNode.attrib['iclass'] in ['CALL_NEAR', 'CALL_NEAR_MEMv', 'CLZERO', 'JMP', 'JMP_MEMv', 'MOVDIR64B', 'RET_NEAR', 'RET_NEAR_IMMw', 'RDMSR', 'WRMSR',
@@ -3006,7 +3008,7 @@ def main():
parser = argparse.ArgumentParser(description='CPU Benchmarks')
parser.add_argument("-iaca", help="IACA command line; if not specified, perf. ctrs. are used")
parser.add_argument("-input", help="Instructions XML file", required=True)
parser.add_argument("-output", help="Output XML file", default='result.xml')
parser.add_argument("-output", help="Output XML file")
parser.add_argument("-arch", help="Architecture, Supported: [NHM, ...]")
parser.add_argument("-noPretty", help="Disable pretty printing XML file", action='store_true')
parser.add_argument("-noPorts", help="Don't measure port usage", action='store_true')
@@ -3506,7 +3508,7 @@ def main():
print('Could not solve LP for ' + instrNode.attrib['string'] + ':')
print(err)
with open(args.output, "w") as f:
with open(args.output or 'result_'+arch+(('_IACA_' + iacaVersion) if useIACA else '_measured')+'.xml' , "w") as f:
reparsed = XMLRoot
if not args.noPretty:
rough_string = ET.tostring(XMLRoot, 'utf-8')