mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-12-13 10:10:04 +01:00
update
This commit is contained in:
@@ -2,24 +2,24 @@
|
||||
|
||||
set -x
|
||||
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input "$2" -arch 'NHM' > output_NHM2.1.txt 2>error_NHM2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'NHM' > output_NHM2.2.txt 2>error_NHM2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'WSM' > output_WSM2.1.txt 2>error_WSM2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'WSM' > output_WSM2.2.txt 2>error_WSM2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'SNB' > output_SNB2.1.txt 2>error_SNB2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'SNB' > output_SNB2.2.txt 2>error_SNB2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'SNB' > output_SNB2.3.txt 2>error_SNB2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'IVB' > output_IVB2.1.txt 2>error_IVB2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'IVB' > output_IVB2.2.txt 2>error_IVB2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'IVB' > output_IVB2.3.txt 2>error_IVB2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result.xml -arch 'HSW' > output_HSW2.1.txt 2>error_HSW2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'HSW' > output_HSW2.2.txt 2>error_HSW2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'HSW' > output_HSW2.3.txt 2>error_HSW2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'HSW' > output_HSW3.0.txt 2>error_HSW3.0.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result.xml -arch 'BDW' > output_BDW2.2.txt 2>error_BDW2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'BDW' > output_BDW2.3.txt 2>error_BDW2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'BDW' > output_BDW3.0.txt 2>error_BDW3.0.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'SKL' > output_SKL2.3.txt 2>error_SKL2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'SKL' > output_SKL3.0.txt 2>error_SKL3.0.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result.xml -arch 'SKX' > output_SKX2.3.txt 2>error_SKX2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result.xml -arch 'SKX' > output_SKX3.0.txt 2>error_SKX3.0.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input "$2" -arch 'NHM' -output result_IACA.xml > output_NHM2.1.txt 2>error_NHM2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'NHM' -output result_IACA.xml > output_NHM2.2.txt 2>error_NHM2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'WSM' -output result_IACA.xml > output_WSM2.1.txt 2>error_WSM2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'WSM' -output result_IACA.xml > output_WSM2.2.txt 2>error_WSM2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'SNB' -output result_IACA.xml > output_SNB2.1.txt 2>error_SNB2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'SNB' -output result_IACA.xml > output_SNB2.2.txt 2>error_SNB2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'SNB' -output result_IACA.xml > output_SNB2.3.txt 2>error_SNB2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'IVB' -output result_IACA.xml > output_IVB2.1.txt 2>error_IVB2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'IVB' -output result_IACA.xml > output_IVB2.2.txt 2>error_IVB2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'IVB' -output result_IACA.xml > output_IVB2.3.txt 2>error_IVB2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.1/bin/iaca.sh" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW2.1.txt 2>error_HSW2.1.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW2.2.txt 2>error_HSW2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW2.3.txt 2>error_HSW2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'HSW' -output result_IACA.xml > output_HSW3.0.txt 2>error_HSW3.0.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.2/bin/iaca.sh" -input result_IACA.xml -arch 'BDW' -output result_IACA.xml > output_BDW2.2.txt 2>error_BDW2.2.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'BDW' -output result_IACA.xml > output_BDW2.3.txt 2>error_BDW2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'BDW' -output result_IACA.xml > output_BDW3.0.txt 2>error_BDW3.0.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'SKL' -output result_IACA.xml > output_SKL2.3.txt 2>error_SKL2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'SKL' -output result_IACA.xml > output_SKL3.0.txt 2>error_SKL3.0.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-2.3/bin/iaca.sh" -input result_IACA.xml -arch 'SKX' -output result_IACA.xml > output_SKX2.3.txt 2>error_SKX2.3.txt
|
||||
./cpuBench.py -iaca "$1/iaca-version-3.0/iaca" -input result_IACA.xml -arch 'SKX' -output result_IACA.xml > output_SKX3.0.txt 2>error_SKX3.0.txt
|
||||
|
||||
@@ -29,8 +29,6 @@ def main():
|
||||
matchingLines = []
|
||||
if iclass == 'INT':
|
||||
matchingLines = [lineDict[('INTn:INTO:INT3:INT1.html', 'INT n')]]
|
||||
elif iclass == 'IRETQ':
|
||||
matchingLines = [lineDict[('IRET:IRETD.html', 'IRET')]]
|
||||
if iclass == 'MOV':
|
||||
matchingLines = [lineDict[('MOV.html', 'MOV')]]
|
||||
elif iclass == 'MOV_CR':
|
||||
@@ -48,6 +46,8 @@ def main():
|
||||
matchingLines = [lineDict[('CMPS:CMPSB:CMPSW:CMPSD:CMPSQ.html', 'CMPSD')]]
|
||||
else:
|
||||
matchingLines = [lineDict[('CMPSD.html', 'CMPSD')]]
|
||||
elif iclass in ['IRETW', 'IRETD', 'IRETQ']:
|
||||
matchingLines = [lineDict[('IRET:IRETD:IRETQ.html', 'IRET')]]
|
||||
elif iclass in ['MOVQ', 'VMOVQ']:
|
||||
if 'GPR' in iform:
|
||||
matchingLines = [lineDict[('MOVD:MOVQ.html', 'MOVQ')]]
|
||||
@@ -133,15 +133,15 @@ def main():
|
||||
exit(1)
|
||||
|
||||
instrNode.attrib['url'] = 'uops.info/html-instr/' + canonicalizeInstrString(instrNode.attrib['string']) + '.html'
|
||||
if matchingLines:
|
||||
if matchingLines:
|
||||
instrNode.attrib['summary'] = str(matchingLines[0][2])
|
||||
instrNode.attrib['url-ref'] = 'felixcloutier.com/x86/' + matchingLines[0][0]
|
||||
|
||||
|
||||
with open(args.output, "w") as f:
|
||||
rough_string = ET.tostring(root, 'utf-8')
|
||||
reparsed = minidom.parseString(rough_string)
|
||||
f.write('\n'.join([line for line in reparsed.toprettyxml(indent=' '*2).split('\n') if line.strip()]))
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -13,8 +13,10 @@ def main():
|
||||
parser.add_argument('inp2')
|
||||
parser.add_argument('arch2')
|
||||
parser.add_argument('-TP', action='store_true')
|
||||
parser.add_argument('-TPMaxDiff', type=float, default=.0) # if the diff. between two TP measurements is not larger than TPMaxDiff, they are treated as equal
|
||||
parser.add_argument('-lat', action='store_true')
|
||||
parser.add_argument('-ports', action='store_true')
|
||||
parser.add_argument('-printDiff', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
root1 = ET.parse(args.inp1).getroot()
|
||||
@@ -30,7 +32,7 @@ def main():
|
||||
for instrStr in sorted(instrNodeDict1):
|
||||
instrNode1 = instrNodeDict1[instrStr]
|
||||
if not instrStr in instrNodeDict2:
|
||||
print('No matching entry found for ' + instrStr)
|
||||
if args.printDiff: print('No matching entry found for ' + instrStr)
|
||||
continue
|
||||
instrNode2 = instrNodeDict2[instrStr]
|
||||
for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'):
|
||||
@@ -39,9 +41,9 @@ def main():
|
||||
tp1 = min(map(float, [mNode1.attrib.get('TP_unrolled', sys.maxsize), mNode1.attrib.get('TP_loop', sys.maxsize), mNode1.attrib.get('TP', sys.maxsize)]))
|
||||
tp2 = min(map(float, [mNode2.attrib.get('TP_unrolled', sys.maxsize), mNode2.attrib.get('TP_loop', sys.maxsize), mNode2.attrib.get('TP', sys.maxsize)]))
|
||||
|
||||
if tp1 != tp2:
|
||||
if abs(tp1 - tp2) > args.TPMaxDiff:
|
||||
tpDiff += 1
|
||||
print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2))
|
||||
if args.printDiff: print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2))
|
||||
|
||||
if args.lat:
|
||||
for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')):
|
||||
@@ -49,22 +51,22 @@ def main():
|
||||
latStr2 = ET.tostring(latNode2, encoding='utf-8').decode().strip()
|
||||
if latNode1.attrib != latNode2.attrib:
|
||||
latDiff += 1
|
||||
print(instrStr)
|
||||
print(' ' + latStr1)
|
||||
print(' ' + latStr2)
|
||||
if args.printDiff: print(instrStr)
|
||||
if args.printDiff: print(' ' + latStr1)
|
||||
if args.printDiff: print(' ' + latStr2)
|
||||
|
||||
if args.ports:
|
||||
p1 = mNode1.attrib.get('ports', '')
|
||||
p2 = mNode2.attrib.get('ports', '')
|
||||
if p1 != p2:
|
||||
portsDiff += 1
|
||||
print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2)
|
||||
if args.printDiff: print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2)
|
||||
|
||||
if not args.TP and not args.lat and not args.ports:
|
||||
xmlStr1 = ET.tostring(mNode1, encoding='utf-8').decode().strip()
|
||||
xmlStr2 = ET.tostring(mNode2, encoding='utf-8').decode().strip()
|
||||
|
||||
if xmlStr1 != xmlStr2:
|
||||
if args.printDiff and xmlStr1 != xmlStr2:
|
||||
print('-------------------------------')
|
||||
print(instrStr)
|
||||
print(xmlStr1)
|
||||
|
||||
8
tools/cpuBench/compare_all.sh
Executable file
8
tools/cpuBench/compare_all.sh
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -x
|
||||
|
||||
for arch in CON WOL NHM WSM SNB IVB HSW BDW SKL KBL CFL SKX CNL CLX ICL TGL RKL ADL-P ADL-E BNL AMT GLM GLP TRM ZEN+ ZEN2 ZEN3 ZEN4
|
||||
do
|
||||
./compareXML.py ~/code/html/instructions.xml $arch result_${arch}_measured.xml $arch -TP -lat -ports -TPMaxDiff 0.02
|
||||
done
|
||||
@@ -567,9 +567,12 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
||||
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
||||
iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||
except subprocess.CalledProcessError as e:
|
||||
print('Error: ' + e.output.decode())
|
||||
logging.warning('Error: ' + e.output.decode())
|
||||
htmlReports.append('<pre>' + e.output.decode() + '</pre>')
|
||||
return None
|
||||
|
||||
htmlReports.append('<pre>' + iacaOut + '</pre>')
|
||||
|
||||
if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
|
||||
print('IACA error')
|
||||
return None
|
||||
@@ -588,8 +591,6 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
||||
if instrPortsCol:
|
||||
instrUopsOnBlockedPorts += float(instrPortsCol[0])
|
||||
|
||||
htmlReports.append('<pre>' + iacaOut + '</pre>')
|
||||
|
||||
if allUopsOnBlockedPorts < blockInstrRep-.5:
|
||||
# something went wrong; fewer uops on ports than blockInstrRep
|
||||
# happens, e.g., on SKX for ports {0, 1} if AVX-512 is active
|
||||
@@ -744,7 +745,8 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports):
|
||||
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
||||
iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||
except subprocess.CalledProcessError as e:
|
||||
print('Error: ' + e.output.decode())
|
||||
logging.warning('Error: ' + e.output.decode())
|
||||
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
|
||||
return None
|
||||
|
||||
if debugOutput:
|
||||
@@ -1092,18 +1094,18 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
||||
iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.warning('Error: ' + e.output.decode())
|
||||
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
|
||||
if minTP != sys.maxsize:
|
||||
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
|
||||
continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
|
||||
else:
|
||||
return None
|
||||
|
||||
htmlReports.append('<pre>' + iaca_out + '</pre>\n')
|
||||
|
||||
if not iaca_out or ' ! ' in iaca_out or ' X ' in iaca_out or ' 0X ' in iaca_out or not 'Total Num Of Uops' in iaca_out:
|
||||
print('IACA error')
|
||||
return None
|
||||
|
||||
htmlReports.append('<pre>' + iaca_out + '</pre>\n')
|
||||
|
||||
cycles = float(iaca_out.split('\n')[3].split()[2])
|
||||
cycles = cycles/ic
|
||||
minTP = min(minTP, cycles)
|
||||
@@ -2483,17 +2485,17 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
|
||||
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
||||
iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||
except subprocess.CalledProcessError as e:
|
||||
print('Error: ' + e.output.decode())
|
||||
logging.warning('Error: ' + e.output.decode())
|
||||
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
|
||||
return None
|
||||
|
||||
htmlReports.append('<pre>' + iaca_lat + '</pre>\n')
|
||||
|
||||
if '!' in iaca_lat or not 'Latency' in iaca_lat:
|
||||
print('IACA error')
|
||||
return None
|
||||
|
||||
latency = iaca_lat.split('\n')[3].split()[1]
|
||||
|
||||
htmlReports.append('<pre>' + iaca_lat + '</pre>\n')
|
||||
|
||||
return latency
|
||||
else:
|
||||
if instrNode.attrib['iclass'] in ['CALL_NEAR', 'CALL_NEAR_MEMv', 'CLZERO', 'JMP', 'JMP_MEMv', 'MOVDIR64B', 'RET_NEAR', 'RET_NEAR_IMMw', 'RDMSR', 'WRMSR',
|
||||
@@ -3006,7 +3008,7 @@ def main():
|
||||
parser = argparse.ArgumentParser(description='CPU Benchmarks')
|
||||
parser.add_argument("-iaca", help="IACA command line; if not specified, perf. ctrs. are used")
|
||||
parser.add_argument("-input", help="Instructions XML file", required=True)
|
||||
parser.add_argument("-output", help="Output XML file", default='result.xml')
|
||||
parser.add_argument("-output", help="Output XML file")
|
||||
parser.add_argument("-arch", help="Architecture, Supported: [NHM, ...]")
|
||||
parser.add_argument("-noPretty", help="Disable pretty printing XML file", action='store_true')
|
||||
parser.add_argument("-noPorts", help="Don't measure port usage", action='store_true')
|
||||
@@ -3506,7 +3508,7 @@ def main():
|
||||
print('Could not solve LP for ' + instrNode.attrib['string'] + ':')
|
||||
print(err)
|
||||
|
||||
with open(args.output, "w") as f:
|
||||
with open(args.output or 'result_'+arch+(('_IACA_' + iacaVersion) if useIACA else '_measured')+'.xml' , "w") as f:
|
||||
reparsed = XMLRoot
|
||||
if not args.noPretty:
|
||||
rough_string = ET.tostring(XMLRoot, 'utf-8')
|
||||
|
||||
Reference in New Issue
Block a user