mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-12-15 19:10:08 +01:00
Initial support for Arrow Lake
This commit is contained in:
@@ -7,8 +7,6 @@ import sys
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
|
||||||
from x64_lib import *
|
|
||||||
|
|
||||||
PFC_START_ASM = '.quad 0xE0B513B1C2813F04'
|
PFC_START_ASM = '.quad 0xE0B513B1C2813F04'
|
||||||
PFC_STOP_ASM = '.quad 0xF0B513B1C2813F04'
|
PFC_STOP_ASM = '.quad 0xF0B513B1C2813F04'
|
||||||
|
|
||||||
@@ -49,7 +47,7 @@ def assemble(code, objFile, asmFile='/tmp/ramdisk/asm.s'):
|
|||||||
if ('same type of prefix used twice' in e.output.decode()) and ('REX64' in code):
|
if ('same type of prefix used twice' in e.output.decode()) and ('REX64' in code):
|
||||||
return assemble(code.replace('REX64 ', ''), objFile, asmFile)
|
return assemble(code.replace('REX64 ', ''), objFile, asmFile)
|
||||||
elif "register type mismatch for `lsl'" in e.output.decode():
|
elif "register type mismatch for `lsl'" in e.output.decode():
|
||||||
code, n = re.subn(r'(LSL \S*, )(\S*?);', lambda m: f'{m.group(1)}{regToSize(m.group(2),16)};', code)
|
code, n = re.subn(r'(LSL \S*, )E?(\S*?)(D?);', lambda m: f'{m.group(1)}{m.group(2)}{m.group(3).replace("D", "W")};', code)
|
||||||
if n > 0:
|
if n > 0:
|
||||||
return assemble(code, objFile, asmFile)
|
return assemble(code, objFile, asmFile)
|
||||||
print(f"Error (assemble): {str(e)}", file=sys.stderr)
|
print(f"Error (assemble): {str(e)}", file=sys.stderr)
|
||||||
|
|||||||
@@ -224,7 +224,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
|
|||||||
elif arch in ['NHM', 'WSM', 'BNL', 'GLM', 'GLP']: evt = 'UOPS_RETIRED.ANY'
|
elif arch in ['NHM', 'WSM', 'BNL', 'GLM', 'GLP']: evt = 'UOPS_RETIRED.ANY'
|
||||||
elif arch in ['SNB', 'SLM', 'AMT', 'ADL-E', 'MTL-E']: evt = 'UOPS_RETIRED.ALL'
|
elif arch in ['SNB', 'SLM', 'AMT', 'ADL-E', 'MTL-E']: evt = 'UOPS_RETIRED.ALL'
|
||||||
elif arch in ['HSW']: evt = 'UOPS_EXECUTED.CORE'
|
elif arch in ['HSW']: evt = 'UOPS_EXECUTED.CORE'
|
||||||
elif arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: evt = 'UOPS_EXECUTED.THREAD'
|
elif arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P', 'ARL-P']: evt = 'UOPS_EXECUTED.THREAD'
|
||||||
elif arch in ['TRM']: evt = 'TOPDOWN_RETIRING.ALL'
|
elif arch in ['TRM']: evt = 'TOPDOWN_RETIRING.ALL'
|
||||||
localHtmlReports.append('<li>' + evt + ': ' + str(value) + '</li>\n')
|
localHtmlReports.append('<li>' + evt + ': ' + str(value) + '</li>\n')
|
||||||
localHtmlReports.append('</ul>\n</li>')
|
localHtmlReports.append('</ul>\n</li>')
|
||||||
@@ -279,17 +279,18 @@ def getEventConfig(event):
|
|||||||
if arch in ['BNL', 'SLM', 'AMT']: return 'C2.10' # UOPS_RETIRED.ANY
|
if arch in ['BNL', 'SLM', 'AMT']: return 'C2.10' # UOPS_RETIRED.ANY
|
||||||
if arch in ['HSW']: return 'B1.02' # UOPS_EXECUTED.CORE; note: may undercount due to erratum HSD30
|
if arch in ['HSW']: return 'B1.02' # UOPS_EXECUTED.CORE; note: may undercount due to erratum HSD30
|
||||||
if arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return 'B1.01' # UOPS_EXECUTED.THREAD
|
if arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return 'B1.01' # UOPS_EXECUTED.THREAD
|
||||||
|
if arch in ['ARL-P']: return 'B1.01.CTR=3' # UOPS_EXECUTED.THREAD
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '0C1.00'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '0C1.00'
|
||||||
if event == 'RETIRE_SLOTS':
|
if event == 'RETIRE_SLOTS':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return 'C2.02'
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P', 'ARL-P']: return 'C2.02'
|
||||||
if event == 'UOPS_MITE':
|
if event == 'UOPS_MITE':
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return '79.04'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P', 'ARL-P']: return '79.04'
|
||||||
if event == 'UOPS_MITE>=1':
|
if event == 'UOPS_MITE>=1':
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return '79.04.CMSK=1'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P', 'ARL-P']: return '79.04.CMSK=1'
|
||||||
if event == 'UOPS_MS':
|
if event == 'UOPS_MS':
|
||||||
if arch in ['NHM', 'WSM']: return 'D1.02'
|
if arch in ['NHM', 'WSM']: return 'D1.02'
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return '79.30'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return '79.30'
|
||||||
if arch in ['ADL-P', 'EMR', 'MTL-P']: return '79.20'
|
if arch in ['ADL-P', 'EMR', 'MTL-P', 'ARL-P']: return '79.20'
|
||||||
if arch in ['SLM', 'AMT', 'GLM', 'GLP', 'TRM', 'ADL-E', 'MTL-E']: return 'C2.01'
|
if arch in ['SLM', 'AMT', 'GLM', 'GLP', 'TRM', 'ADL-E', 'MTL-E']: return 'C2.01'
|
||||||
if arch in ['BNL']: return 'A9.01' # undocumented, but seems to work
|
if arch in ['BNL']: return 'A9.01' # undocumented, but seems to work
|
||||||
if event == 'UOPS_PORT_0':
|
if event == 'UOPS_PORT_0':
|
||||||
@@ -341,13 +342,37 @@ def getEventConfig(event):
|
|||||||
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.20.CMSK=2'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.20.CMSK=2'
|
||||||
if event == 'UOPS_PORT_23A':
|
if event == 'UOPS_PORT_23A':
|
||||||
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.04'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.04'
|
||||||
|
if event == 'UOPS_DISPATCHED.INT_EU_ALL':
|
||||||
|
if arch in ['ARL-P']: return 'B2.01.CTR=2'
|
||||||
|
if event == 'UOPS_DISPATCHED.ALU':
|
||||||
|
if arch in ['ARL-P']: return 'B2.02.CTR=2'
|
||||||
|
if event == 'UOPS_DISPATCHED.LD':
|
||||||
|
if arch in ['ARL-P']: return 'B2.04'
|
||||||
|
if event == 'UOPS_DISPATCHED.SLOW':
|
||||||
|
if arch in ['ARL-P']: return 'B2.08'
|
||||||
|
if event == 'UOPS_DISPATCHED.STD':
|
||||||
|
if arch in ['ARL-P']: return 'B2.10'
|
||||||
|
if event == 'UOPS_DISPATCHED.SHIFT':
|
||||||
|
if arch in ['ARL-P']: return 'B2.20'
|
||||||
|
if event == 'UOPS_DISPATCHED.JMP':
|
||||||
|
if arch in ['ARL-P']: return 'B2.40'
|
||||||
|
if event == 'UOPS_DISPATCHED.STA':
|
||||||
|
if arch in ['ARL-P']: return 'B2.80'
|
||||||
|
if event == 'UOPS_DISPATCHED.V0':
|
||||||
|
if arch in ['ARL-P']: return 'B3.01'
|
||||||
|
if event == 'UOPS_DISPATCHED.V1':
|
||||||
|
if arch in ['ARL-P']: return 'B3.02'
|
||||||
|
if event == 'UOPS_DISPATCHED.V2':
|
||||||
|
if arch in ['ARL-P']: return 'B3.04'
|
||||||
|
if event == 'UOPS_DISPATCHED.V3':
|
||||||
|
if arch in ['ARL-P']: return 'B3.08'
|
||||||
if event == 'DIV_CYCLES':
|
if event == 'DIV_CYCLES':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return '14.01' # undocumented on HSW, but seems to work
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return '14.01' # undocumented on HSW, but seems to work
|
||||||
if arch in ['ICL', 'TGL', 'RKL']: return '14.09'
|
if arch in ['ICL', 'TGL', 'RKL']: return '14.09'
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '0D3.00'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '0D3.00'
|
||||||
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B0.09.CMSK=1'
|
if arch in ['ADL-P', 'EMR', 'MTL-P', 'ARL-P']: return 'B0.09.CMSK=1'
|
||||||
if event == 'ILD_STALL.LCP':
|
if event == 'ILD_STALL.LCP':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return '87.01'
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P', 'ARL-P']: return '87.01'
|
||||||
if event == 'INST_DECODED.DEC0':
|
if event == 'INST_DECODED.DEC0':
|
||||||
if arch in ['NHM', 'WSM']: return '18.01'
|
if arch in ['NHM', 'WSM']: return '18.01'
|
||||||
if event == 'FpuPipeAssignment.Total0':
|
if event == 'FpuPipeAssignment.Total0':
|
||||||
@@ -407,7 +432,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
|
|||||||
commonReg = None
|
commonReg = None
|
||||||
if not useDistinctRegs:
|
if not useDistinctRegs:
|
||||||
commonRegs = findCommonRegisters(instrNode)
|
commonRegs = findCommonRegisters(instrNode)
|
||||||
commonRegs -= set(doNotWriteRegs)|set(doNotReadRegs)|globalDoNotWriteRegs|(memRegs if hasMemOperand else set())
|
commonRegs -= set(map(getCanonicalReg, set(doNotWriteRegs)|set(doNotReadRegs)|globalDoNotWriteRegs|(memRegs if hasMemOperand else set())))
|
||||||
if commonRegs:
|
if commonRegs:
|
||||||
commonReg = sortRegs(commonRegs)[0]
|
commonReg = sortRegs(commonRegs)[0]
|
||||||
|
|
||||||
@@ -543,7 +568,7 @@ def createIacaAsmFile(fileName, prefixInstr, prefixRep, instr):
|
|||||||
writeFile(fileName, asm)
|
writeFile(fileName, asm)
|
||||||
|
|
||||||
|
|
||||||
def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstrRep, blockedPorts, config, htmlReports):
|
def getUopsOnBlockedPorts(instrNode, blockInstrNode, blockInstrRep, blockedPorts, config, htmlReports):
|
||||||
instrInstance = config.independentInstrs[0]
|
instrInstance = config.independentInstrs[0]
|
||||||
instr = instrInstance.asm
|
instr = instrInstance.asm
|
||||||
readRegs = instrInstance.readRegs
|
readRegs = instrInstance.readRegs
|
||||||
@@ -600,6 +625,8 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
|||||||
if arch in ['NHM', 'WSM']:
|
if arch in ['NHM', 'WSM']:
|
||||||
# Needed for workaround for broken port 5 counter
|
# Needed for workaround for broken port 5 counter
|
||||||
events = ['UOPS_PORT_'+str(p) for p in range(0,6)] + ['UOPS']
|
events = ['UOPS_PORT_'+str(p) for p in range(0,6)] + ['UOPS']
|
||||||
|
elif arch in ['ARL-P']:
|
||||||
|
events = ['UOPS_DISPATCHED.V0', 'UOPS_DISPATCHED.V1', 'UOPS_DISPATCHED.V2', 'UOPS_DISPATCHED.V3']
|
||||||
else:
|
else:
|
||||||
events = ['UOPS_PORT_'+str(p) for p in blockedPorts]
|
events = ['UOPS_PORT_'+str(p) for p in blockedPorts]
|
||||||
|
|
||||||
@@ -637,11 +664,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
|||||||
measurementResult['UOPS_PORT_5'] = measurementResult['UOPS_PORT_5B']
|
measurementResult['UOPS_PORT_5'] = measurementResult['UOPS_PORT_5B']
|
||||||
del measurementResult['UOPS_PORT_5B']
|
del measurementResult['UOPS_PORT_5B']
|
||||||
|
|
||||||
if isIntelCPU():
|
ports_dict = {getPortNameFromEventName(p): i for p, i in measurementResult.items() if getPortNameFromEventName(p) is not None}
|
||||||
ports_dict = {p[10:]: i for p, i in measurementResult.items() if p.startswith('UOPS_PORT')}
|
|
||||||
else:
|
|
||||||
ports_dict = {p[23:]: i for p, i in measurementResult.items() if 'FpuPipeAssignment.Total' in p}
|
|
||||||
|
|
||||||
if sum(ports_dict.values()) < blockInstrRep-.5:
|
if sum(ports_dict.values()) < blockInstrRep-.5:
|
||||||
# something went wrong; fewer uops on ports than blockInstrRep
|
# something went wrong; fewer uops on ports than blockInstrRep
|
||||||
# happens, e.g., on SKX for ports {0, 1} if AVX-512 is active
|
# happens, e.g., on SKX for ports {0, 1} if AVX-512 is active
|
||||||
@@ -650,6 +673,48 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
|||||||
return int(.2+sum([uops for p, uops in ports_dict.items() if p in blockedPorts])) - blockInstrRep
|
return int(.2+sum([uops for p, uops in ports_dict.items() if p in blockedPorts])) - blockInstrRep
|
||||||
|
|
||||||
|
|
||||||
|
# Example return value: {'ALU': 2, 'LOAD': 1, 'INT_OTHER': 2}
|
||||||
|
def getUopTypes(instrNode, config, lfenceUopTypeDict, htmlReports):
|
||||||
|
htmlReports.append('<hr><h3>With lfence (to avoid incorrect counts due to replays)</h3>')
|
||||||
|
|
||||||
|
if arch in ['ARL-P']:
|
||||||
|
events = ['UOPS_DISPATCHED.INT_EU_ALL', 'UOPS_DISPATCHED.ALU', 'UOPS_DISPATCHED.LD', 'UOPS_DISPATCHED.SLOW', 'UOPS_DISPATCHED.STD',
|
||||||
|
'UOPS_DISPATCHED.SHIFT', 'UOPS_DISPATCHED.JMP', 'UOPS_DISPATCHED.STA']
|
||||||
|
else:
|
||||||
|
raise RuntimeError(f"getUopTypes() does not support {arch}")
|
||||||
|
configurePFCs(events)
|
||||||
|
|
||||||
|
instrInstance = config.independentInstrs[0]
|
||||||
|
init = instrInstance.regMemInit + config.init
|
||||||
|
|
||||||
|
htmlReports.append('<ul>\n')
|
||||||
|
# Without the nops, the INT_EU_ALL counter undercounts on ARL in some cases, e.g., 'RCR AL, 0;'
|
||||||
|
measurementResult = runExperiment(instrNode, f'{config.preInstrCode}; {instrInstance.asm}; 10*|nop|; lfence', init=init, unrollCount=100, basicMode=True,
|
||||||
|
htmlReports=htmlReports)
|
||||||
|
htmlReports.append('</ul>\n')
|
||||||
|
|
||||||
|
if config.preInstrCode:
|
||||||
|
htmlReports.append('<ul>\n')
|
||||||
|
preInstrResult = runExperiment(instrNode, config.preInstrCode, init=init, unrollCount=100, basicMode=True, htmlReports=htmlReports)
|
||||||
|
htmlReports.append('</ul>\n')
|
||||||
|
for ev in events:
|
||||||
|
measurementResult[ev] -= preInstrResult[ev]
|
||||||
|
|
||||||
|
uopTypeDict = {t.replace('UOPS_DISPATCHED.', ''): int(i + .2) for t, i in measurementResult.items() if t in events}
|
||||||
|
intAll = uopTypeDict['INT_EU_ALL']
|
||||||
|
del uopTypeDict['INT_EU_ALL']
|
||||||
|
uopTypeDict['INT_OTHER'] = intAll - uopTypeDict['ALU'] - uopTypeDict['SLOW'] - uopTypeDict['SHIFT'] - uopTypeDict['JMP']
|
||||||
|
if uopTypeDict['INT_OTHER'] < 0:
|
||||||
|
print((f"unexpected uopTypeDict {config.preInstrCode} {instrInstance.asm} {measurementResult}"))
|
||||||
|
return {}
|
||||||
|
|
||||||
|
if lfenceUopTypeDict:
|
||||||
|
for t in uopTypeDict:
|
||||||
|
uopTypeDict[t] = uopTypeDict[t] - lfenceUopTypeDict[t]
|
||||||
|
|
||||||
|
return uopTypeDict
|
||||||
|
|
||||||
|
|
||||||
# Takes an instrNode and returns a list [instrI, instrI', ...] s.t. instrI(')* are the results of
|
# Takes an instrNode and returns a list [instrI, instrI', ...] s.t. instrI(')* are the results of
|
||||||
# calls to getInstrInstanceFromNode for instrNode and there are no read-after-writes of the same regs/memory locations. The length of the list is limited by maxTPRep.
|
# calls to getInstrInstanceFromNode for instrNode and there are no read-after-writes of the same regs/memory locations. The length of the list is limited by maxTPRep.
|
||||||
def getIndependentInstructions(instrNode, useDistinctRegs, useIndexedAddr, doNotReadRegs=None, doNotWriteRegs=None, initialOffset=0, immediate=2):
|
def getIndependentInstructions(instrNode, useDistinctRegs, useIndexedAddr, doNotReadRegs=None, doNotWriteRegs=None, initialOffset=0, immediate=2):
|
||||||
@@ -1057,6 +1122,16 @@ def fancyRound(cycles):
|
|||||||
return round(cycles, 2)
|
return round(cycles, 2)
|
||||||
|
|
||||||
|
|
||||||
|
def getPortNameFromEventName(evtName: str) -> str:
|
||||||
|
if evtName.startswith('UOPS_PORT'):
|
||||||
|
return evtName[10:]
|
||||||
|
elif evtName.startswith('UOPS_DISPATCHED.V'):
|
||||||
|
return evtName[17:]
|
||||||
|
elif evtName.startswith('FpuPipeAssignment.Total'):
|
||||||
|
return evtName[23:]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
TPResult = namedtuple('TPResult', ['TP', 'TP_loop', 'TP_noLoop', 'TP_noDepBreaking_noLoop', 'TP_single', 'uops', 'fused_uops', 'uops_MITE', 'uops_MS', 'divCycles',
|
TPResult = namedtuple('TPResult', ['TP', 'TP_loop', 'TP_noLoop', 'TP_noDepBreaking_noLoop', 'TP_single', 'uops', 'fused_uops', 'uops_MITE', 'uops_MS', 'divCycles',
|
||||||
'ILD_stalls', 'complexDec', 'nAvailableSimpleDecoders', 'config', 'unblocked_ports', 'all_used_ports'])
|
'ILD_stalls', 'complexDec', 'nAvailableSimpleDecoders', 'config', 'unblocked_ports', 'all_used_ports'])
|
||||||
|
|
||||||
@@ -1138,10 +1213,10 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
else:
|
else:
|
||||||
divCycles = 0
|
divCycles = 0
|
||||||
|
|
||||||
return TPResult(minTP, minTP, minTP, minTP_noDepBreaking_noLoop, minTP_single, unfused_uops, fused_uops, None, None, divCycles, 0, False, None, config,
|
return TPResult(TP=minTP, TP_loop=minTP, TP_noLoop=minTP, TP_noDepBreaking_noLoop=minTP_noDepBreaking_noLoop, TP_single=minTP_single, uops=unfused_uops,
|
||||||
ports_dict, all_used_ports)
|
fused_uops=fused_uops, uops_MITE=None, uops_MS=None, divCycles=divCycles, ILD_stalls=0, complexDec=False, nAvailableSimpleDecoders=None,
|
||||||
|
config=config, unblocked_ports=ports_dict, all_used_ports=all_used_ports)
|
||||||
else:
|
else:
|
||||||
hasMemWriteOperand = len(instrNode.findall('./operand[@type="mem"][@r="1"][@w="1"]'))>0
|
|
||||||
uops = None
|
uops = None
|
||||||
uopsFused = None
|
uopsFused = None
|
||||||
uopsMITE = None
|
uopsMITE = None
|
||||||
@@ -1249,8 +1324,8 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
if not useDepBreakingInstrs:
|
if not useDepBreakingInstrs:
|
||||||
minTP_noDepBreaking_noLoop = min(minTP_noDepBreaking_noLoop, cycles)
|
minTP_noDepBreaking_noLoop = min(minTP_noDepBreaking_noLoop, cycles)
|
||||||
for p, i in result.items():
|
for p, i in result.items():
|
||||||
if (i/ic > .1) and (('UOPS_PORT' in p) or ('FpuPipeAssignment.Total' in p)):
|
if (i/ic > .1) and (getPortNameFromEventName(p) is not None):
|
||||||
all_used_ports.add(p[10:] if ('UOPS_PORT' in p) else p[23:])
|
all_used_ports.add(getPortNameFromEventName(p))
|
||||||
else:
|
else:
|
||||||
minTP_loop = min(minTP_loop, cycles)
|
minTP_loop = min(minTP_loop, cycles)
|
||||||
|
|
||||||
@@ -1258,11 +1333,9 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
minConfig = config
|
minConfig = config
|
||||||
minTP_single = min(minTP_single, cycles)
|
minTP_single = min(minTP_single, cycles)
|
||||||
|
|
||||||
if isIntelCPU():
|
if not isAMDCPU() or not instrNode.attrib['extension'] == 'BASE':
|
||||||
ports_dict = {p[10:]: i for p, i in result.items() if 'UOPS_PORT' in p}
|
# We ignore BASE instructions for AMD, as they sometimes wrongly count floating point uops
|
||||||
elif isAMDCPU() and not instrNode.attrib['extension'] == 'BASE':
|
ports_dict = {getPortNameFromEventName(p): i for p, i in result.items() if getPortNameFromEventName(p) is not None}
|
||||||
# We ignore BASE instructions, as they sometimes wrongly count floating point uops
|
|
||||||
ports_dict = {p[23:]: i for p, i in result.items() if 'FpuPipeAssignment.Total' in p}
|
|
||||||
|
|
||||||
uops = int(result['UOPS']+.2)
|
uops = int(result['UOPS']+.2)
|
||||||
if 'RETIRE_SLOTS' in result:
|
if 'RETIRE_SLOTS' in result:
|
||||||
@@ -1300,8 +1373,9 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
htmlReports.append('</div>')
|
htmlReports.append('</div>')
|
||||||
|
|
||||||
if minTP < sys.maxsize:
|
if minTP < sys.maxsize:
|
||||||
return TPResult(minTP, minTP_loop, minTP_noLoop, minTP_noDepBreaking_noLoop, minTP_single, uops, uopsFused, uopsMITE, uopsMS, divCycles, ILD_stalls,
|
return TPResult(TP=minTP, TP_loop=minTP_loop, TP_noLoop=minTP_noLoop, TP_noDepBreaking_noLoop=minTP_noDepBreaking_noLoop, TP_single=minTP_single,
|
||||||
complexDec, nAvailableSimpleDecoders, minConfig, ports_dict, all_used_ports)
|
uops=uops, fused_uops=uopsFused, uops_MITE=uopsMITE, uops_MS=uopsMS, divCycles=divCycles, ILD_stalls=ILD_stalls, complexDec=complexDec,
|
||||||
|
nAvailableSimpleDecoders=nAvailableSimpleDecoders, config=minConfig, unblocked_ports=ports_dict, all_used_ports=all_used_ports)
|
||||||
|
|
||||||
|
|
||||||
def canMacroFuse(flagInstrNode, branchInstrNode, htmlReports):
|
def canMacroFuse(flagInstrNode, branchInstrNode, htmlReports):
|
||||||
@@ -1359,7 +1433,7 @@ def getBasicLatencies(instrNodeList):
|
|||||||
for flag in STATUSFLAGS_noAF:
|
for flag in STATUSFLAGS_noAF:
|
||||||
testSetResult = runExperiment(None, 'TEST AL, AL; SET' + flag[0] + ' AL')
|
testSetResult = runExperiment(None, 'TEST AL, AL; SET' + flag[0] + ' AL')
|
||||||
# we additionally test with a nop, as the result may be higher than the actual latency (e.g., on ADL-P), probably due to non-optimal port assignments
|
# we additionally test with a nop, as the result may be higher than the actual latency (e.g., on ADL-P), probably due to non-optimal port assignments
|
||||||
testSetResultNop = runExperiment(None, 'TEST AL, AL; SET' + flag[0] + ' AL; NOP')
|
testSetResultNop = runExperiment(None, 'TEST AL, AL; NOP; SET' + flag[0] + ' AL;')
|
||||||
testSetCycles = min(int(testSetResult['Core cycles'] + .2), int(testSetResultNop['Core cycles'] + .2))
|
testSetCycles = min(int(testSetResult['Core cycles'] + .2), int(testSetResultNop['Core cycles'] + .2))
|
||||||
|
|
||||||
if testSetCycles == 2:
|
if testSetCycles == 2:
|
||||||
@@ -3110,7 +3184,9 @@ def main():
|
|||||||
else:
|
else:
|
||||||
configurePFCs(['UOPS', 'RETIRE_SLOTS', 'UOPS_MITE', 'UOPS_MS', 'UOPS_PORT_0', 'UOPS_PORT_1', 'UOPS_PORT_2', 'UOPS_PORT_3', 'UOPS_PORT_4',
|
configurePFCs(['UOPS', 'RETIRE_SLOTS', 'UOPS_MITE', 'UOPS_MS', 'UOPS_PORT_0', 'UOPS_PORT_1', 'UOPS_PORT_2', 'UOPS_PORT_3', 'UOPS_PORT_4',
|
||||||
'UOPS_PORT_5', 'UOPS_PORT_6', 'UOPS_PORT_7', 'UOPS_PORT_23', 'UOPS_PORT_49', 'UOPS_PORT_78', 'UOPS_PORT_5B', 'UOPS_PORT_5B>=2',
|
'UOPS_PORT_5', 'UOPS_PORT_6', 'UOPS_PORT_7', 'UOPS_PORT_23', 'UOPS_PORT_49', 'UOPS_PORT_78', 'UOPS_PORT_5B', 'UOPS_PORT_5B>=2',
|
||||||
'UOPS_PORT_23A', 'DIV_CYCLES', 'ILD_STALL.LCP', 'INST_DECODED.DEC0', 'UOPS_MITE>=1'])
|
'UOPS_PORT_23A', 'UOPS_DISPATCHED.INT_EU_ALL', 'UOPS_DISPATCHED.ALU', 'UOPS_DISPATCHED.LOAD', 'UOPS_DISPATCHED.SLOW',
|
||||||
|
'UOPS_DISPATCHED.STD', 'UOPS_DISPATCHED.SHIFT', 'UOPS_DISPATCHED.JMP', 'UOPS_DISPATCHED.STA', 'UOPS_DISPATCHED.V0',
|
||||||
|
'UOPS_DISPATCHED.V1', 'UOPS_DISPATCHED.V2', 'UOPS_DISPATCHED.V3', 'DIV_CYCLES', 'ILD_STALL.LCP', 'INST_DECODED.DEC0', 'UOPS_MITE>=1'])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
subprocess.check_output('mkdir -p /tmp/ramdisk; sudo mount -t tmpfs -o size=100M none /tmp/ramdisk/', shell=True)
|
subprocess.check_output('mkdir -p /tmp/ramdisk; sudo mount -t tmpfs -o size=100M none /tmp/ramdisk/', shell=True)
|
||||||
@@ -3255,6 +3331,9 @@ def main():
|
|||||||
portCombinationsResultDict = {}
|
portCombinationsResultDict = {}
|
||||||
portCombinationsResultDictSameReg = {}
|
portCombinationsResultDictSameReg = {}
|
||||||
portCombinationsResultDictIndexedAddr = {}
|
portCombinationsResultDictIndexedAddr = {}
|
||||||
|
uopTypeResultDict = {}
|
||||||
|
uopTypeResultDictSameReg = {}
|
||||||
|
uopTypeResultDictIndexedAddr = {}
|
||||||
|
|
||||||
if not args.noPorts:
|
if not args.noPorts:
|
||||||
for instr, tpResult in tpDict.items():
|
for instr, tpResult in tpDict.items():
|
||||||
@@ -3374,7 +3453,11 @@ def main():
|
|||||||
|
|
||||||
sortedPortCombinationsNonAVX = sorted(blockingInstructionsDictNonAVX.keys(), key=lambda x:(len(x), sorted(x)))
|
sortedPortCombinationsNonAVX = sorted(blockingInstructionsDictNonAVX.keys(), key=lambda x:(len(x), sorted(x)))
|
||||||
sortedPortCombinationsNonSSE = sorted(blockingInstructionsDictNonSSE.keys(), key=lambda x:(len(x), sorted(x)))
|
sortedPortCombinationsNonSSE = sorted(blockingInstructionsDictNonSSE.keys(), key=lambda x:(len(x), sorted(x)))
|
||||||
print('sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX))
|
print('sortedPortCombinationsNonAVX: ' + str(sortedPortCombinationsNonAVX))
|
||||||
|
print('sortedPortCombinationsNonSSE: ' + str(sortedPortCombinationsNonSSE))
|
||||||
|
|
||||||
|
if arch in ['ARL-P']:
|
||||||
|
lfenceUopTypeDict = getUopTypes(instrNodeDict['LFENCE'], TPConfig(independentInstrs=[InstrInstance(None, '', [], [], {}, [])]), None, [])
|
||||||
|
|
||||||
for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (len(tpDict[x].config.preInstrNodes), x.attrib['string']))):
|
for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (len(tpDict[x].config.preInstrNodes), x.attrib['string']))):
|
||||||
#if not 'CVTPD2PI' in instrNode.attrib['string']: continue
|
#if not 'CVTPD2PI' in instrNode.attrib['string']: continue
|
||||||
@@ -3401,6 +3484,17 @@ def main():
|
|||||||
if not useIACA and tpResult.config.preInstrNodes:
|
if not useIACA and tpResult.config.preInstrNodes:
|
||||||
rem_uops -= sum(tpDict[instrNodeDict[preInstrNode.attrib['string']]].uops for preInstrNode in tpResult.config.preInstrNodes)
|
rem_uops -= sum(tpDict[instrNodeDict[preInstrNode.attrib['string']]].uops for preInstrNode in tpResult.config.preInstrNodes)
|
||||||
|
|
||||||
|
if arch in ['ARL-P']:
|
||||||
|
uopTypeDict = getUopTypes(instrNode, tpResult.config, lfenceUopTypeDict, htmlReports)
|
||||||
|
print(f"{instrNode.attrib['string']}: {uopTypeDict}")
|
||||||
|
if not useDistinctRegs:
|
||||||
|
uopTypeResultDictSameReg[instrNode] = uopTypeDict
|
||||||
|
elif useIndexedAddr:
|
||||||
|
uopTypeResultDictIndexedAddr[instrNode] = uopTypeDict
|
||||||
|
else:
|
||||||
|
uopTypeResultDict[instrNode] = uopTypeDict
|
||||||
|
rem_uops -= sum(uopTypeDict.values())
|
||||||
|
|
||||||
used_ports = tpResult.all_used_ports
|
used_ports = tpResult.all_used_ports
|
||||||
if debugOutput: print(instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports))
|
if debugOutput: print(instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports))
|
||||||
|
|
||||||
@@ -3421,6 +3515,7 @@ def main():
|
|||||||
if used_ports.issubset(combination):
|
if used_ports.issubset(combination):
|
||||||
uopsCombinationList = [(combination, 1)]
|
uopsCombinationList = [(combination, 1)]
|
||||||
htmlReports.append('<hr>Port usage: 1*' + ('p' if isIntelCPU() else 'FP') + ''.join(str(p) for p in combination))
|
htmlReports.append('<hr>Port usage: 1*' + ('p' if isIntelCPU() else 'FP') + ''.join(str(p) for p in combination))
|
||||||
|
rem_uops = 0
|
||||||
break
|
break
|
||||||
elif (rem_uops > 0) and (arch not in ['ZEN+', 'ZEN2']):
|
elif (rem_uops > 0) and (arch not in ['ZEN+', 'ZEN2']):
|
||||||
for combination in sortedPortCombinations:
|
for combination in sortedPortCombinations:
|
||||||
@@ -3445,7 +3540,7 @@ def main():
|
|||||||
nPortsInComb = sum(len(str(x)) for x in combination)
|
nPortsInComb = sum(len(str(x)) for x in combination)
|
||||||
blockInstrRep = max(2 * nPortsInComb * max(1,int(tpDict[instrNode].TP_single)), nPortsInComb * tpDict[instrNode].uops, 10)
|
blockInstrRep = max(2 * nPortsInComb * max(1,int(tpDict[instrNode].TP_single)), nPortsInComb * tpDict[instrNode].uops, 10)
|
||||||
blockInstrRep = min(blockInstrRep, 100)
|
blockInstrRep = min(blockInstrRep, 100)
|
||||||
uopsOnBlockedPorts = getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockingInstrs[combination], blockInstrRep, combination, tpResult.config, htmlReports)
|
uopsOnBlockedPorts = getUopsOnBlockedPorts(instrNode, blockingInstrs[combination], blockInstrRep, combination, tpResult.config, htmlReports)
|
||||||
if uopsOnBlockedPorts is None:
|
if uopsOnBlockedPorts is None:
|
||||||
#print('no uops on blocked ports: ' + str(combination))
|
#print('no uops on blocked ports: ' + str(combination))
|
||||||
continue
|
continue
|
||||||
@@ -3474,6 +3569,9 @@ def main():
|
|||||||
rem_uops -= uopsOnBlockedPorts
|
rem_uops -= uopsOnBlockedPorts
|
||||||
if rem_uops <= 0: break
|
if rem_uops <= 0: break
|
||||||
|
|
||||||
|
if arch in ['ARL-P'] and rem_uops > 0:
|
||||||
|
uopTypeDict['UNKNOWN'] = rem_uops
|
||||||
|
|
||||||
# on ICL, some combinations (e.g. {4,9}) are treated as one port (49) above, as there is only a single counter for both ports
|
# on ICL, some combinations (e.g. {4,9}) are treated as one port (49) above, as there is only a single counter for both ports
|
||||||
# we split these combinations now, as, e.g., the call to getTP_LP requires them to be separate
|
# we split these combinations now, as, e.g., the call to getTP_LP requires them to be separate
|
||||||
uopsCombinationList = [(frozenset(''.join(comb)), uops) for comb, uops in uopsCombinationList]
|
uopsCombinationList = [(frozenset(''.join(comb)), uops) for comb, uops in uopsCombinationList]
|
||||||
@@ -3499,18 +3597,18 @@ def main():
|
|||||||
else:
|
else:
|
||||||
resultNode = archNode.find('./measurement')
|
resultNode = archNode.find('./measurement')
|
||||||
|
|
||||||
applicableResults = [(tpDict[instrNode], portCombinationsResultDict.get(instrNode, None), '')]
|
applicableResults = [(tpDict[instrNode], portCombinationsResultDict.get(instrNode), uopTypeResultDict.get(instrNode, {}), '')]
|
||||||
for otherTPDict, otherPCDict, suffix in [(tpDictSameReg, portCombinationsResultDictSameReg, '_same_reg'),
|
for otherTPDict, otherPCDict, otherUopTypeDict, suffix in [(tpDictSameReg, portCombinationsResultDictSameReg, uopTypeResultDictSameReg, '_same_reg'),
|
||||||
(tpDictIndexedAddr, portCombinationsResultDictIndexedAddr, '_indexed')]:
|
(tpDictIndexedAddr, portCombinationsResultDictIndexedAddr, uopTypeResultDictIndexedAddr, '_indexed')]:
|
||||||
if instrNode in otherTPDict:
|
if instrNode in otherTPDict:
|
||||||
t1 = tpDict[instrNode]
|
t1, p1, u1, _ = applicableResults[0]
|
||||||
t2 = otherTPDict[instrNode]
|
t2 = otherTPDict[instrNode]
|
||||||
p1 = portCombinationsResultDict.get(instrNode, None)
|
p2 = otherPCDict.get(instrNode)
|
||||||
p2 = otherPCDict.get(instrNode, None)
|
u2 = otherUopTypeDict.get(instrNode, {})
|
||||||
if (t1.uops != t2.uops or t1.fused_uops != t2.fused_uops or t1.uops_MITE != t2.uops_MITE or ((p2 is not None) and (p1 != p2))):
|
if (t1.uops != t2.uops or t1.fused_uops != t2.fused_uops or t1.uops_MITE != t2.uops_MITE or ((p2 is not None) and (p1 != p2)) or (u1 != u2)):
|
||||||
applicableResults.append((t2, p2, suffix))
|
applicableResults.append((t2, p2, u2, suffix))
|
||||||
|
|
||||||
for tpResult, portUsageList, suffix in applicableResults:
|
for tpResult, portUsageList, uopTypeDict, suffix in applicableResults:
|
||||||
uops = tpResult.uops
|
uops = tpResult.uops
|
||||||
uopsFused = tpResult.fused_uops
|
uopsFused = tpResult.fused_uops
|
||||||
uopsMITE = tpResult.uops_MITE
|
uopsMITE = tpResult.uops_MITE
|
||||||
@@ -3553,15 +3651,49 @@ def main():
|
|||||||
divCycles = tpResult.divCycles
|
divCycles = tpResult.divCycles
|
||||||
if divCycles: resultNode.attrib['div_cycles'+suffix] = str(divCycles)
|
if divCycles: resultNode.attrib['div_cycles'+suffix] = str(divCycles)
|
||||||
|
|
||||||
portPrefix = ('p' if isIntelCPU() else 'FP')
|
|
||||||
computePortStr = lambda lst: '+'.join(str(uops)+'*'+portPrefix+''.join(p for p in sorted(c)) for c, uops in sorted(lst, key=lambda x: sorted(x[0])))
|
def computePortStr(lst):
|
||||||
if portUsageList:
|
portPrefix = ''
|
||||||
resultNode.attrib['ports'+suffix] = computePortStr(portUsageList)
|
if isIntelCPU() and not arch in ['ARL-P']:
|
||||||
try:
|
portPrefix = 'p'
|
||||||
resultNode.attrib['TP_ports'+suffix] = "%.2f" % getTP_LP(portUsageList)
|
elif arch in ['ARL-P']:
|
||||||
except ValueError as err:
|
portPrefix = 'V'
|
||||||
print('Could not solve LP for ' + instrNode.attrib['string'] + ':')
|
elif isAMDCPU():
|
||||||
print(err)
|
portPrefix = 'FP'
|
||||||
|
elements = []
|
||||||
|
for c, uops in sorted(lst, key=lambda x: sorted(x[0])):
|
||||||
|
elements.append(f"{uops}*{portPrefix}{''.join(p for p in sorted(c))}")
|
||||||
|
return '+'.join(elements)
|
||||||
|
|
||||||
|
uopTypePortMapping = {
|
||||||
|
'ARL-P': {'ALU': {'I0', 'I1', 'I2', 'I3', 'I4', 'I5'},
|
||||||
|
'JMP': {'I0', 'I1', 'I2'},
|
||||||
|
'MUL': {'I3', 'I4', 'I5'},
|
||||||
|
'SHIFT': {'I3', 'I4', 'I5'},
|
||||||
|
'SLOW': {'I3'},
|
||||||
|
'LD': {'M0', 'M1', 'M2'},
|
||||||
|
'STA': {'M3', 'M4', 'M5'},
|
||||||
|
'STD': {'D0', 'D1'},
|
||||||
|
'INT_OTHER': {},
|
||||||
|
'UNKNOWN': {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
portUsageForLP = list(portUsageList or [])
|
||||||
|
uopTypeStrList = []
|
||||||
|
for t, n in sorted(uopTypeDict.items()):
|
||||||
|
if n > 0:
|
||||||
|
uopTypeStrList.append(f'{n}*{t}')
|
||||||
|
portUsageForLP.append((frozenset(uopTypePortMapping[arch][t]), n))
|
||||||
|
|
||||||
|
portStr = '+'.join(uopTypeStrList + ([computePortStr(portUsageList)] if portUsageList else []))
|
||||||
|
if portStr:
|
||||||
|
resultNode.attrib['ports'+suffix] = portStr
|
||||||
|
if (not uopTypeDict.get('INT_OTHER')) and (not uopTypeDict.get('UNKNOWN')):
|
||||||
|
try:
|
||||||
|
resultNode.attrib['TP_ports'+suffix] = "%.2f" % getTP_LP(portUsageForLP)
|
||||||
|
except ValueError as err:
|
||||||
|
print('Could not solve LP for ' + instrNode.attrib['string'] + ':')
|
||||||
|
print(err)
|
||||||
|
|
||||||
with open(args.output or 'result_'+arch+(('_IACA_' + iacaVersion) if useIACA else '_measured')+'.xml' , "w") as f:
|
with open(args.output or 'result_'+arch+(('_IACA_' + iacaVersion) if useIACA else '_measured')+'.xml' , "w") as f:
|
||||||
reparsed = XMLRoot
|
reparsed = XMLRoot
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ import xml.etree.ElementTree as ET
|
|||||||
import argparse
|
import argparse
|
||||||
from utils import *
|
from utils import *
|
||||||
|
|
||||||
def getLink(instrNode, text, arch, tool, linkType, anchor=None):
|
def getLink(instrNode, text, arch, tool, linkType, baseDir, anchor=None):
|
||||||
url = '/tmp/html-' + linkType + '/' + arch + '/' + canonicalizeInstrString(instrNode.attrib['string']) + '-' + tool + '.html'
|
url = baseDir + '/html-' + linkType + '/' + arch + '/' + canonicalizeInstrString(instrNode.attrib['string']) + '-' + tool + '.html'
|
||||||
if anchor: url += '#' + anchor
|
if anchor: url += '#' + anchor
|
||||||
return '<a href="' + url + '">' + text + '</a>'
|
return '<a href="' + url + '">' + text + '</a>'
|
||||||
|
|
||||||
@@ -13,6 +13,7 @@ def main():
|
|||||||
parser = argparse.ArgumentParser(description='Generates a basic HTML table with the results for a microarchitecture')
|
parser = argparse.ArgumentParser(description='Generates a basic HTML table with the results for a microarchitecture')
|
||||||
parser.add_argument("-input", help="Input XML file", default='result.xml')
|
parser.add_argument("-input", help="Input XML file", default='result.xml')
|
||||||
parser.add_argument("-arch", help="Consider only this architecture")
|
parser.add_argument("-arch", help="Consider only this architecture")
|
||||||
|
parser.add_argument("-base_dir", help="Directory containing HTML files with details", default='/tmp')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
root = ET.parse(args.input)
|
root = ET.parse(args.input)
|
||||||
@@ -64,7 +65,7 @@ def main():
|
|||||||
latTableEntry = getLatencyTableEntry(resultNode)
|
latTableEntry = getLatencyTableEntry(resultNode)
|
||||||
if latTableEntry is not None:
|
if latTableEntry is not None:
|
||||||
lat = str(latTableEntry[0])
|
lat = str(latTableEntry[0])
|
||||||
f.write(' <td align="right">' + getLink(XMLInstr, lat, args.arch, 'Measurements', 'lat') + '</td>\n')
|
f.write(' <td align="right">' + getLink(XMLInstr, lat, args.arch, 'Measurements', 'lat', args.base_dir) + '</td>\n')
|
||||||
|
|
||||||
TPPorts = float(resultNode.attrib.get('TP_ports', float("inf")))
|
TPPorts = float(resultNode.attrib.get('TP_ports', float("inf")))
|
||||||
TPPortsStr = ("{:.2f}".format(TPPorts) if TPPorts < float("inf") else '')
|
TPPortsStr = ("{:.2f}".format(TPPorts) if TPPorts < float("inf") else '')
|
||||||
@@ -84,10 +85,10 @@ def main():
|
|||||||
color = ' bgcolor="orange"'
|
color = ' bgcolor="orange"'
|
||||||
TPDiff += 1
|
TPDiff += 1
|
||||||
|
|
||||||
f.write(' <td align="right"' + color + '>' + getLink(XMLInstr, TPMeasuredStr, args.arch, 'Measurements', 'tp') + '</td>\n')
|
f.write(' <td align="right"' + color + '>' + getLink(XMLInstr, TPMeasuredStr, args.arch, 'Measurements', 'tp', args.base_dir) + '</td>\n')
|
||||||
|
|
||||||
f.write(' <td align="right">' + resultNode.attrib.get('uops', '') + '</td>\n')
|
f.write(' <td align="right">' + resultNode.attrib.get('uops', '') + '</td>\n')
|
||||||
f.write(' <td>' + getLink(XMLInstr, resultNode.attrib.get('ports', ''), args.arch, 'Measurements', 'ports') + '</td>\n')
|
f.write(' <td>' + getLink(XMLInstr, resultNode.attrib.get('ports', ''), args.arch, 'Measurements', 'ports', args.base_dir) + '</td>\n')
|
||||||
f.write(' <tr>\n')
|
f.write(' <tr>\n')
|
||||||
|
|
||||||
f.write('</table>\n')
|
f.write('</table>\n')
|
||||||
|
|||||||
Reference in New Issue
Block a user