diff --git a/tools/cpuBench/addAMDDocToXML.py b/tools/cpuBench/addAMDDocToXML.py index 212c39d..d6247d4 100755 --- a/tools/cpuBench/addAMDDocToXML.py +++ b/tools/cpuBench/addAMDDocToXML.py @@ -52,6 +52,9 @@ def main(): lat = row[9] tp = row[10] + if (ops is None) and (unit is None) and (lat is None) and (tp is None): + continue + de = DocEntry(mnemonic, operands, ops, unit, lat, tp) docEntrySet.add(de) mnemonicMap.setdefault(mnemonic, []).append(de) @@ -74,9 +77,10 @@ def main(): xmlToDocDict = dict() - for de in sorted(docEntrySet): + for de in docEntrySet: if de.mnemonic not in iclassAsmDict: print('no XML entry found for ' + str(de)) + continue xmlFound = False for instrNode in iclassAsmDict[de.mnemonic]: diff --git a/tools/cpuBench/cpuBench.py b/tools/cpuBench/cpuBench.py index 7e38826..2d19b10 100755 --- a/tools/cpuBench/cpuBench.py +++ b/tools/cpuBench/cpuBench.py @@ -536,9 +536,9 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr try: subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) - iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) + iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode() except subprocess.CalledProcessError as e: - print('Error: ' + e.output) + print('Error: ' + e.output.decode()) return None if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut: @@ -696,9 +696,9 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports): createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, getInstrInstanceFromNode(instrNode, useDistinctRegs=True).asm) try: subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) - iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) + iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode() except subprocess.CalledProcessError as e: - print('Error: ' + e.output) + print('Error: ' + e.output.decode()) return None if debugOutput: @@ -1025,11 +1025,11 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, instrStr) try: subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) - iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) + iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode() except subprocess.CalledProcessError as e: - logging.warn('Error: ' + e.output) + logging.warn('Error: ' + e.output.decode()) if minTP != sys.maxsize: - htmlReports.append('
' + e.output + '
\n') + htmlReports.append('
' + e.output.decode() + '
\n') continue # on SNB, IACA 2.2 crashes on only some (larger) inputs else: return None @@ -1038,8 +1038,6 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports print('IACA error') return None - print(instrNode.attrib['iform'] + ' - throughput') - htmlReports.append('
' + iaca_out + '
\n') cycles = float(iaca_out.split('\n')[3].split()[2]) @@ -1051,7 +1049,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports minTP_single = min(minTP_single, cycles) unfused_uops_line = iaca_out.split('\n')[-2] - unfused_uops = int(unfused_uops_line.split()[4])/ic + unfused_uops = int(unfused_uops_line.split()[4])//ic ports_line = iaca_out.split('\n')[-3] fused_uops = '^' in ports_line.split()[1] @@ -2367,9 +2365,9 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports): if iacaVersion == '2.1': try: subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) - iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) + iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode() except subprocess.CalledProcessError as e: - print('Error: ' + e.output) + print('Error: ' + e.output.decode()) return None if '!' in iaca_lat or not 'Latency' in iaca_lat: @@ -2896,7 +2894,7 @@ def main(): except subprocess.CalledProcessError as e: versionString = e.output global iacaVersion - iacaVersion = re.search('\d\.\d', versionString).group(0) + iacaVersion = re.search('\d\.\d', versionString.decode()).group(0) global iacaCMDLine iacaCMDLine = [args.iaca, '-reduceout', '-arch', arch] if iacaVersion == '2.1': @@ -2981,8 +2979,10 @@ def main(): tpResult = getThroughputAndUops(instrNode, True, False, htmlReports) print(instrNode.attrib['string'] + " - tp: " + str(tpResult)) - if tpResult: - tpDict[instrNode] = tpResult + if tpResult is None: + continue + + tpDict[instrNode] = tpResult if hasCommonReg: htmlReports.append('

With the same register for for different operands

\n') @@ -2999,7 +2999,7 @@ def main(): tpDictIndexedAddr[instrNode] = tpResultIndexed # Macro-Fusion - if tpResult.fused_uops == 1 and (instrNode.find('./operand[@type="flags"][@w="1"]') is not None): + if (not useIACA) and tpResult.fused_uops == 1 and (instrNode.find('./operand[@type="flags"][@w="1"]') is not None): htmlReports.append('

Tests for macro-fusion

\n') fusibleInstrList = [] for brInstr in condBrInstr: