fix for AMD doc

This commit is contained in:
Andreas Abel
2021-03-26 01:13:10 +01:00
parent 77b42f0b71
commit 0d35f82778
2 changed files with 21 additions and 17 deletions

View File

@@ -52,6 +52,9 @@ def main():
lat = row[9] lat = row[9]
tp = row[10] tp = row[10]
if (ops is None) and (unit is None) and (lat is None) and (tp is None):
continue
de = DocEntry(mnemonic, operands, ops, unit, lat, tp) de = DocEntry(mnemonic, operands, ops, unit, lat, tp)
docEntrySet.add(de) docEntrySet.add(de)
mnemonicMap.setdefault(mnemonic, []).append(de) mnemonicMap.setdefault(mnemonic, []).append(de)
@@ -74,9 +77,10 @@ def main():
xmlToDocDict = dict() xmlToDocDict = dict()
for de in sorted(docEntrySet): for de in docEntrySet:
if de.mnemonic not in iclassAsmDict: if de.mnemonic not in iclassAsmDict:
print('no XML entry found for ' + str(de)) print('no XML entry found for ' + str(de))
continue
xmlFound = False xmlFound = False
for instrNode in iclassAsmDict[de.mnemonic]: for instrNode in iclassAsmDict[de.mnemonic]:

View File

@@ -536,9 +536,9 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
try: try:
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print('Error: ' + e.output) print('Error: ' + e.output.decode())
return None return None
if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut: if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
@@ -696,9 +696,9 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports):
createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, getInstrInstanceFromNode(instrNode, useDistinctRegs=True).asm) createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, getInstrInstanceFromNode(instrNode, useDistinctRegs=True).asm)
try: try:
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print('Error: ' + e.output) print('Error: ' + e.output.decode())
return None return None
if debugOutput: if debugOutput:
@@ -1025,11 +1025,11 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, instrStr) createIacaAsmFile("/tmp/ramdisk/asm.s", "", 0, instrStr)
try: try:
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
logging.warn('Error: ' + e.output) logging.warn('Error: ' + e.output.decode())
if minTP != sys.maxsize: if minTP != sys.maxsize:
htmlReports.append('<pre>' + e.output + '</pre>\n') htmlReports.append('<pre>' + e.output.decode() + '</pre>\n')
continue # on SNB, IACA 2.2 crashes on only some (larger) inputs continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
else: else:
return None return None
@@ -1038,8 +1038,6 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
print('IACA error') print('IACA error')
return None return None
print(instrNode.attrib['iform'] + ' - throughput')
htmlReports.append('<pre>' + iaca_out + '</pre>\n') htmlReports.append('<pre>' + iaca_out + '</pre>\n')
cycles = float(iaca_out.split('\n')[3].split()[2]) cycles = float(iaca_out.split('\n')[3].split()[2])
@@ -1051,7 +1049,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
minTP_single = min(minTP_single, cycles) minTP_single = min(minTP_single, cycles)
unfused_uops_line = iaca_out.split('\n')[-2] unfused_uops_line = iaca_out.split('\n')[-2]
unfused_uops = int(unfused_uops_line.split()[4])/ic unfused_uops = int(unfused_uops_line.split()[4])//ic
ports_line = iaca_out.split('\n')[-3] ports_line = iaca_out.split('\n')[-3]
fused_uops = '^' in ports_line.split()[1] fused_uops = '^' in ports_line.split()[1]
@@ -2367,9 +2365,9 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
if iacaVersion == '2.1': if iacaVersion == '2.1':
try: try:
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT).decode()
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print('Error: ' + e.output) print('Error: ' + e.output.decode())
return None return None
if '!' in iaca_lat or not 'Latency' in iaca_lat: if '!' in iaca_lat or not 'Latency' in iaca_lat:
@@ -2896,7 +2894,7 @@ def main():
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
versionString = e.output versionString = e.output
global iacaVersion global iacaVersion
iacaVersion = re.search('\d\.\d', versionString).group(0) iacaVersion = re.search('\d\.\d', versionString.decode()).group(0)
global iacaCMDLine global iacaCMDLine
iacaCMDLine = [args.iaca, '-reduceout', '-arch', arch] iacaCMDLine = [args.iaca, '-reduceout', '-arch', arch]
if iacaVersion == '2.1': if iacaVersion == '2.1':
@@ -2981,8 +2979,10 @@ def main():
tpResult = getThroughputAndUops(instrNode, True, False, htmlReports) tpResult = getThroughputAndUops(instrNode, True, False, htmlReports)
print(instrNode.attrib['string'] + " - tp: " + str(tpResult)) print(instrNode.attrib['string'] + " - tp: " + str(tpResult))
if tpResult: if tpResult is None:
tpDict[instrNode] = tpResult continue
tpDict[instrNode] = tpResult
if hasCommonReg: if hasCommonReg:
htmlReports.append('<hr><h2 id="sameReg">With the same register for for different operands</h2>\n') htmlReports.append('<hr><h2 id="sameReg">With the same register for for different operands</h2>\n')
@@ -2999,7 +2999,7 @@ def main():
tpDictIndexedAddr[instrNode] = tpResultIndexed tpDictIndexedAddr[instrNode] = tpResultIndexed
# Macro-Fusion # Macro-Fusion
if tpResult.fused_uops == 1 and (instrNode.find('./operand[@type="flags"][@w="1"]') is not None): if (not useIACA) and tpResult.fused_uops == 1 and (instrNode.find('./operand[@type="flags"][@w="1"]') is not None):
htmlReports.append('<hr><h2 id="macroFusion">Tests for macro-fusion</h2>\n') htmlReports.append('<hr><h2 id="macroFusion">Tests for macro-fusion</h2>\n')
fusibleInstrList = [] fusibleInstrList = []
for brInstr in condBrInstr: for brInstr in condBrInstr: