mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2026-01-05 03:50:06 +01:00
fix for AMD doc
This commit is contained in:
@@ -52,6 +52,9 @@ def main():
|
|||||||
lat = row[9]
|
lat = row[9]
|
||||||
tp = row[10]
|
tp = row[10]
|
||||||
|
|
||||||
|
if (ops is None) and (unit is None) and (lat is None) and (tp is None):
|
||||||
|
continue
|
||||||
|
|
||||||
de = DocEntry(mnemonic, operands, ops, unit, lat, tp)
|
de = DocEntry(mnemonic, operands, ops, unit, lat, tp)
|
||||||
docEntrySet.add(de)
|
docEntrySet.add(de)
|
||||||
mnemonicMap.setdefault(mnemonic, []).append(de)
|
mnemonicMap.setdefault(mnemonic, []).append(de)
|
||||||
@@ -74,9 +77,10 @@ def main():
|
|||||||
|
|
||||||
xmlToDocDict = dict()
|
xmlToDocDict = dict()
|
||||||
|
|
||||||
for de in sorted(docEntrySet):
|
for de in docEntrySet:
|
||||||
if de.mnemonic not in iclassAsmDict:
|
if de.mnemonic not in iclassAsmDict:
|
||||||
print('no XML entry found for ' + str(de))
|
print('no XML entry found for ' + str(de))
|
||||||
|
continue
|
||||||
|
|
||||||
xmlFound = False
|
xmlFound = False
|
||||||
for instrNode in iclassAsmDict[de.mnemonic]:
|
for instrNode in iclassAsmDict[de.mnemonic]:
|
||||||
|
|||||||
@@ -536,9 +536,9 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
||||||
iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
|
iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print('Error: ' + e.output)
|
print('Error: ' + e.output.decode())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
|
if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
|
||||||
@@ -696,9 +696,9 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports):
|
|||||||
createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, getInstrInstanceFromNode(instrNode, useDistinctRegs=True).asm)
|
createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, getInstrInstanceFromNode(instrNode, useDistinctRegs=True).asm)
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
||||||
iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
|
iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print('Error: ' + e.output)
|
print('Error: ' + e.output.decode())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if debugOutput:
|
if debugOutput:
|
||||||
@@ -1025,11 +1025,11 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, instrStr)
|
createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, instrStr)
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
||||||
iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
|
iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
logging.warn('Error: ' + e.output)
|
logging.warn('Error: ' + e.output.decode())
|
||||||
if minTP != sys.maxsize:
|
if minTP != sys.maxsize:
|
||||||
htmlReports.append('<pre>' + e.output + '</pre>\n')
|
htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
|
||||||
continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
|
continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
@@ -1038,8 +1038,6 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
print('IACA error')
|
print('IACA error')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
print(instrNode.attrib['iform'] + ' - throughput')
|
|
||||||
|
|
||||||
htmlReports.append('<pre>' + iaca_out + '</pre>\n')
|
htmlReports.append('<pre>' + iaca_out + '</pre>\n')
|
||||||
|
|
||||||
cycles = float(iaca_out.split('\n')[3].split()[2])
|
cycles = float(iaca_out.split('\n')[3].split()[2])
|
||||||
@@ -1051,7 +1049,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
minTP_single = min(minTP_single, cycles)
|
minTP_single = min(minTP_single, cycles)
|
||||||
|
|
||||||
unfused_uops_line = iaca_out.split('\n')[-2]
|
unfused_uops_line = iaca_out.split('\n')[-2]
|
||||||
unfused_uops = int(unfused_uops_line.split()[4])/ic
|
unfused_uops = int(unfused_uops_line.split()[4])//ic
|
||||||
|
|
||||||
ports_line = iaca_out.split('\n')[-3]
|
ports_line = iaca_out.split('\n')[-3]
|
||||||
fused_uops = '^' in ports_line.split()[1]
|
fused_uops = '^' in ports_line.split()[1]
|
||||||
@@ -2367,9 +2365,9 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
|
|||||||
if iacaVersion == '2.1':
|
if iacaVersion == '2.1':
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
|
||||||
iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
|
iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print('Error: ' + e.output)
|
print('Error: ' + e.output.decode())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if '!' in iaca_lat or not 'Latency' in iaca_lat:
|
if '!' in iaca_lat or not 'Latency' in iaca_lat:
|
||||||
@@ -2896,7 +2894,7 @@ def main():
|
|||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
versionString = e.output
|
versionString = e.output
|
||||||
global iacaVersion
|
global iacaVersion
|
||||||
iacaVersion = re.search('\d\.\d', versionString).group(0)
|
iacaVersion = re.search('\d\.\d', versionString.decode()).group(0)
|
||||||
global iacaCMDLine
|
global iacaCMDLine
|
||||||
iacaCMDLine = [args.iaca, '-reduceout', '-arch', arch]
|
iacaCMDLine = [args.iaca, '-reduceout', '-arch', arch]
|
||||||
if iacaVersion == '2.1':
|
if iacaVersion == '2.1':
|
||||||
@@ -2981,8 +2979,10 @@ def main():
|
|||||||
tpResult = getThroughputAndUops(instrNode, True, False, htmlReports)
|
tpResult = getThroughputAndUops(instrNode, True, False, htmlReports)
|
||||||
print(instrNode.attrib['string'] + " - tp: " + str(tpResult))
|
print(instrNode.attrib['string'] + " - tp: " + str(tpResult))
|
||||||
|
|
||||||
if tpResult:
|
if tpResult is None:
|
||||||
tpDict[instrNode] = tpResult
|
continue
|
||||||
|
|
||||||
|
tpDict[instrNode] = tpResult
|
||||||
|
|
||||||
if hasCommonReg:
|
if hasCommonReg:
|
||||||
htmlReports.append('<hr><h2 id="sameReg">With the same register for for different operands</h2>\n')
|
htmlReports.append('<hr><h2 id="sameReg">With the same register for for different operands</h2>\n')
|
||||||
@@ -2999,7 +2999,7 @@ def main():
|
|||||||
tpDictIndexedAddr[instrNode] = tpResultIndexed
|
tpDictIndexedAddr[instrNode] = tpResultIndexed
|
||||||
|
|
||||||
# Macro-Fusion
|
# Macro-Fusion
|
||||||
if tpResult.fused_uops == 1 and (instrNode.find('./operand[@type="flags"][@w="1"]') is not None):
|
if (not useIACA) and tpResult.fused_uops == 1 and (instrNode.find('./operand[@type="flags"][@w="1"]') is not None):
|
||||||
htmlReports.append('<hr><h2 id="macroFusion">Tests for macro-fusion</h2>\n')
|
htmlReports.append('<hr><h2 id="macroFusion">Tests for macro-fusion</h2>\n')
|
||||||
fusibleInstrList = []
|
fusibleInstrList = []
|
||||||
for brInstr in condBrInstr:
|
for brInstr in condBrInstr:
|
||||||
|
|||||||
Reference in New Issue
Block a user