mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-12-16 11:30:07 +01:00
support for Cascade Lake
This commit is contained in:
@@ -204,7 +204,10 @@ def micro_arch(cpu):
|
|||||||
if (vi.displ_family, vi.displ_model) in [(0x06, 0x4E), (0x06, 0x5E)]:
|
if (vi.displ_family, vi.displ_model) in [(0x06, 0x4E), (0x06, 0x5E)]:
|
||||||
return 'SKL'
|
return 'SKL'
|
||||||
if (vi.displ_family, vi.displ_model) in [(0x06, 0x55)]:
|
if (vi.displ_family, vi.displ_model) in [(0x06, 0x55)]:
|
||||||
return 'SKX'
|
if vi.stepping <= 0x4:
|
||||||
|
return 'SKX'
|
||||||
|
else:
|
||||||
|
return 'CLX'
|
||||||
if (vi.displ_family, vi.displ_model) in [(0x06, 0x8E), (0x06, 0x9E)]:
|
if (vi.displ_family, vi.displ_model) in [(0x06, 0x8E), (0x06, 0x9E)]:
|
||||||
# ToDo: not sure if this is correct
|
# ToDo: not sure if this is correct
|
||||||
if vi.stepping <= 0x9:
|
if vi.stepping <= 0x9:
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from xml.dom import minidom
|
from xml.dom import minidom
|
||||||
import argparse
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
# Shows the differences between two XML files for a specific microarchitecture
|
# Shows the differences between two XML files for a specific microarchitecture
|
||||||
def main():
|
def main():
|
||||||
@@ -34,11 +35,12 @@ def main():
|
|||||||
for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'):
|
for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'):
|
||||||
for mNode2 in instrNode2.findall('./architecture[@name="' + args.arch2 + '"]/measurement'):
|
for mNode2 in instrNode2.findall('./architecture[@name="' + args.arch2 + '"]/measurement'):
|
||||||
if args.TP:
|
if args.TP:
|
||||||
tp1 = mNode1.attrib['TP']
|
tp1 = min(map(float, [mNode1.attrib.get('TP_unrolled', sys.maxsize), mNode1.attrib.get('TP_loop', sys.maxsize), mNode1.attrib.get('TP', sys.maxsize)]))
|
||||||
tp2 = mNode2.attrib['TP']
|
tp2 = min(map(float, [mNode2.attrib.get('TP_unrolled', sys.maxsize), mNode2.attrib.get('TP_loop', sys.maxsize), mNode2.attrib.get('TP', sys.maxsize)]))
|
||||||
|
|
||||||
if tp1 != tp2:
|
if tp1 != tp2:
|
||||||
tpDiff += 1
|
tpDiff += 1
|
||||||
print instrStr + ' - TP1: ' + tp1 + ' - TP2: ' + tp2
|
print instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2)
|
||||||
|
|
||||||
if args.lat:
|
if args.lat:
|
||||||
for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')):
|
for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')):
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
|
|||||||
# we use vzeroall instead of just vzeroupper to make sure that XMM14 is 0 for VSIB addressing
|
# we use vzeroall instead of just vzeroupper to make sure that XMM14 is 0 for VSIB addressing
|
||||||
init += ['VZEROALL']
|
init += ['VZEROALL']
|
||||||
|
|
||||||
if not 'DIV' in instrNode.attrib['iclass'] and not 'SQRT' in instrNode.attrib['iclass']:
|
if not isDivOrSqrtInstr(instrNode):
|
||||||
for opNode in instrNode.findall('./operand[@r="1"]'):
|
for opNode in instrNode.findall('./operand[@r="1"]'):
|
||||||
opIdx = int(opNode.attrib['idx'])
|
opIdx = int(opNode.attrib['idx'])
|
||||||
xtype = opNode.attrib.get('xtype', '')
|
xtype = opNode.attrib.get('xtype', '')
|
||||||
@@ -120,14 +120,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
|
|||||||
else:
|
else:
|
||||||
init += ['MOVUPD ' + reg + ', [R14]']
|
init += ['MOVUPD ' + reg + ', [R14]']
|
||||||
elif regPrefix in ['XMM', 'YMM', 'ZMM'] and isAVXInstr(instrNode):
|
elif regPrefix in ['XMM', 'YMM', 'ZMM'] and isAVXInstr(instrNode):
|
||||||
# some AVX instr. (e.g. VORPS, VAESDEC) incur a penalty (?) if a source was not written by an AVX instr. of a similar kind
|
init += ['VXORPS '+reg+', '+reg+', '+reg]
|
||||||
if reg not in globalDoNotWriteRegs:
|
|
||||||
for opNode2 in instrNode.findall('./operand[@w="1"]'):
|
|
||||||
if not opNode2.text == opNode.text: continue
|
|
||||||
init += [getInstrInstanceFromNode(instrNode, opRegDict={int(opNode2.attrib['idx']):reg}, computeRegMemInit=False).asm]
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
init += ['VXORPS '+reg+', '+reg+', '+reg]
|
|
||||||
elif 'MM' in regPrefix:
|
elif 'MM' in regPrefix:
|
||||||
init += ['PXOR '+reg+', '+reg]
|
init += ['PXOR '+reg+', '+reg]
|
||||||
elif opNode.attrib['type'] == 'mem':
|
elif opNode.attrib['type'] == 'mem':
|
||||||
@@ -202,7 +195,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
|
|||||||
if arch in ['CON', 'WOL']: evt = 'RS_UOPS_DISPATCHED'
|
if arch in ['CON', 'WOL']: evt = 'RS_UOPS_DISPATCHED'
|
||||||
elif arch in ['NHM', 'WSM']: evt = 'UOPS_RETIRED.ANY'
|
elif arch in ['NHM', 'WSM']: evt = 'UOPS_RETIRED.ANY'
|
||||||
elif arch in ['SNB', 'IVB', 'HSW', 'BDW']: evt = 'UOPS_RETIRED.ALL'
|
elif arch in ['SNB', 'IVB', 'HSW', 'BDW']: evt = 'UOPS_RETIRED.ALL'
|
||||||
elif arch in ['SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: evt = 'UOPS_EXECUTED.THREAD'
|
elif arch in ['SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: evt = 'UOPS_EXECUTED.THREAD'
|
||||||
localHtmlReports.append('<li>' + evt + ': ' + str(value) + '</li>\n')
|
localHtmlReports.append('<li>' + evt + ': ' + str(value) + '</li>\n')
|
||||||
localHtmlReports.append('</ul>\n</li>')
|
localHtmlReports.append('</ul>\n</li>')
|
||||||
|
|
||||||
@@ -253,47 +246,47 @@ def getEventConfig(event):
|
|||||||
if event == 'UOPS':
|
if event == 'UOPS':
|
||||||
if arch in ['CON', 'WOL']: return 'A0.00' # RS_UOPS_DISPATCHED
|
if arch in ['CON', 'WOL']: return 'A0.00' # RS_UOPS_DISPATCHED
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW']: return 'C2.01' # UOPS_RETIRED.ALL
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW']: return 'C2.01' # UOPS_RETIRED.ALL
|
||||||
if arch in ['SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return 'B1.01' # UOPS_EXECUTED.THREAD
|
if arch in ['SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return 'B1.01' # UOPS_EXECUTED.THREAD
|
||||||
if arch in ['ZEN+', 'ZEN2']: return '0C1.00'
|
if arch in ['ZEN+', 'ZEN2']: return '0C1.00'
|
||||||
if event == 'RETIRE_SLOTS':
|
if event == 'RETIRE_SLOTS':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return 'C2.02'
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return 'C2.02'
|
||||||
if event == 'UOPS_MITE':
|
if event == 'UOPS_MITE':
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return '79.04'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return '79.04'
|
||||||
if event == 'UOPS_MS':
|
if event == 'UOPS_MS':
|
||||||
if arch in ['NHM', 'WSM']: return 'D1.02'
|
if arch in ['NHM', 'WSM']: return 'D1.02'
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return '79.30'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return '79.30'
|
||||||
if event == 'UOPS_PORT0':
|
if event == 'UOPS_PORT0':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.01.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.01.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.01'
|
if arch in ['NHM', 'WSM']: return 'B1.01'
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return 'A1.01'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return 'A1.01'
|
||||||
if event == 'UOPS_PORT1':
|
if event == 'UOPS_PORT1':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.02.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.02.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.02'
|
if arch in ['NHM', 'WSM']: return 'B1.02'
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return 'A1.02'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return 'A1.02'
|
||||||
if event == 'UOPS_PORT2':
|
if event == 'UOPS_PORT2':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.04.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.04.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.04'
|
if arch in ['NHM', 'WSM']: return 'B1.04'
|
||||||
if arch in ['SNB', 'IVB']: return 'A1.0C'
|
if arch in ['SNB', 'IVB']: return 'A1.0C'
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL']: return 'A1.04'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return 'A1.04'
|
||||||
if event == 'UOPS_PORT3':
|
if event == 'UOPS_PORT3':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.08.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.08.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.08'
|
if arch in ['NHM', 'WSM']: return 'B1.08'
|
||||||
if arch in ['SNB', 'IVB']: return 'A1.30'
|
if arch in ['SNB', 'IVB']: return 'A1.30'
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL']: return 'A1.08'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return 'A1.08'
|
||||||
if event == 'UOPS_PORT4':
|
if event == 'UOPS_PORT4':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.10.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.10.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.10'
|
if arch in ['NHM', 'WSM']: return 'B1.10'
|
||||||
if arch in ['SNB', 'IVB']: return 'A1.40'
|
if arch in ['SNB', 'IVB']: return 'A1.40'
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL']: return 'A1.10'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return 'A1.10'
|
||||||
if event == 'UOPS_PORT5':
|
if event == 'UOPS_PORT5':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.20.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.20.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.20'
|
if arch in ['NHM', 'WSM']: return 'B1.20'
|
||||||
if arch in ['SNB', 'IVB']: return 'A1.80'
|
if arch in ['SNB', 'IVB']: return 'A1.80'
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return 'A1.20'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return 'A1.20'
|
||||||
if event == 'UOPS_PORT6':
|
if event == 'UOPS_PORT6':
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return 'A1.40'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return 'A1.40'
|
||||||
if event == 'UOPS_PORT7':
|
if event == 'UOPS_PORT7':
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL']: return 'A1.80'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return 'A1.80'
|
||||||
if event == 'UOPS_PORT23':
|
if event == 'UOPS_PORT23':
|
||||||
if arch in ['ICL']: return 'A1.04'
|
if arch in ['ICL']: return 'A1.04'
|
||||||
if event == 'UOPS_PORT49':
|
if event == 'UOPS_PORT49':
|
||||||
@@ -301,11 +294,11 @@ def getEventConfig(event):
|
|||||||
if event == 'UOPS_PORT78':
|
if event == 'UOPS_PORT78':
|
||||||
if arch in ['ICL']: return 'A1.80'
|
if arch in ['ICL']: return 'A1.80'
|
||||||
if event == 'DIV_CYCLES':
|
if event == 'DIV_CYCLES':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL']: return '14.01.CMSK=1' # undocumented on HSW, but seems to work
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return '14.01.CMSK=1' # undocumented on HSW, but seems to work
|
||||||
if arch in ['ICL']: return '14.09.CMSK=1'
|
if arch in ['ICL']: return '14.09.CMSK=1'
|
||||||
if arch in ['ZEN+', 'ZEN2']: return '0D3.00'
|
if arch in ['ZEN+', 'ZEN2']: return '0D3.00'
|
||||||
if event == 'ILD_STALL.LCP':
|
if event == 'ILD_STALL.LCP':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL']: return '87.01'
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX']: return '87.01'
|
||||||
if event == 'INST_DECODED.DEC0':
|
if event == 'INST_DECODED.DEC0':
|
||||||
if arch in ['NHM', 'WSM']: return '18.01'
|
if arch in ['NHM', 'WSM']: return '18.01'
|
||||||
if event == 'FpuPipeAssignment.Total0':
|
if event == 'FpuPipeAssignment.Total0':
|
||||||
@@ -355,6 +348,13 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
|
|||||||
elif operandNode.attrib['type'] == "mem" and 'base' in operandNode.attrib:
|
elif operandNode.attrib['type'] == "mem" and 'base' in operandNode.attrib:
|
||||||
readRegs.add(operandNode.attrib['base'])
|
readRegs.add(operandNode.attrib['base'])
|
||||||
|
|
||||||
|
commonReg = None
|
||||||
|
if not useDistinctRegs:
|
||||||
|
commonRegs = findCommonRegisters(instrNode)
|
||||||
|
commonRegs -= set(doNotWriteRegs)|set(doNotReadRegs)|globalDoNotWriteRegs
|
||||||
|
if commonRegs:
|
||||||
|
commonReg = sortRegs(commonRegs)[0]
|
||||||
|
|
||||||
asm = instrNode.attrib['asm']
|
asm = instrNode.attrib['asm']
|
||||||
|
|
||||||
first = True
|
first = True
|
||||||
@@ -376,23 +376,23 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
|
|||||||
else:
|
else:
|
||||||
regsList = operandNode.text.split(',')
|
regsList = operandNode.text.split(',')
|
||||||
|
|
||||||
if len(regsList) > 1:
|
reg = None
|
||||||
ignoreRegs = set()
|
if commonReg:
|
||||||
if operandNode.attrib.get('w', '0') == '1':
|
for reg2 in regsList:
|
||||||
ignoreRegs |= set(doNotWriteRegs)|globalDoNotWriteRegs|(set(opRegDict.values()) if useDistinctRegs else set(doNotReadRegs))
|
if getCanonicalReg(reg2) == commonReg:
|
||||||
if operandNode.attrib.get('r', '0') == '1':
|
reg = reg2
|
||||||
ignoreRegs |= set(doNotReadRegs)|(writtenRegs|readRegs|set(opRegDict.values()) if useDistinctRegs else set(doNotWriteRegs)|globalDoNotWriteRegs)
|
break
|
||||||
regsList = filter(lambda x: not any(y in ignoreRegs for y in getSubRegs(x)) and not (x in [z for y in ignoreRegs for z in getSubRegs(y)]), regsList)
|
if reg is None:
|
||||||
if not regsList:
|
if len(regsList) > 1:
|
||||||
return None;
|
ignoreRegs = set()
|
||||||
|
if operandNode.attrib.get('w', '0') == '1':
|
||||||
reg = sortRegs(regsList)[0];
|
ignoreRegs |= set(doNotWriteRegs)|globalDoNotWriteRegs|set(opRegDict.values())
|
||||||
if not useDistinctRegs:
|
if operandNode.attrib.get('r', '0') == '1':
|
||||||
for oReg in opRegDict.values():
|
ignoreRegs |= set(doNotReadRegs)|writtenRegs|readRegs|set(opRegDict.values())
|
||||||
for reg2 in regsList:
|
regsList = filter(lambda x: not any(y in ignoreRegs for y in getSubRegs(x)) and not (x in [z for y in ignoreRegs for z in getSubRegs(y)]), regsList)
|
||||||
if getCanonicalReg(oReg) == getCanonicalReg(reg2):
|
if not regsList:
|
||||||
reg = reg2
|
return None;
|
||||||
break
|
reg = sortRegs(regsList)[0]
|
||||||
|
|
||||||
opRegDict[opI] = reg
|
opRegDict[opI] = reg
|
||||||
if operandNode.attrib.get('w', '0') == '1':
|
if operandNode.attrib.get('w', '0') == '1':
|
||||||
@@ -626,14 +626,18 @@ def hasCommonRegister(instrNode):
|
|||||||
return False
|
return False
|
||||||
if instrNode.find('./operand[@type="reg"][@suppressed="1"]') is not None:
|
if instrNode.find('./operand[@type="reg"][@suppressed="1"]') is not None:
|
||||||
return False
|
return False
|
||||||
|
return len(findCommonRegisters(instrNode)) > 0
|
||||||
|
|
||||||
|
def findCommonRegisters(instrNode):
|
||||||
for opNode1 in instrNode.findall('./operand[@type="reg"]'):
|
for opNode1 in instrNode.findall('./operand[@type="reg"]'):
|
||||||
regs1 = set(map(getCanonicalReg, opNode1.text.split(",")))
|
regs1 = set(map(getCanonicalReg, opNode1.text.split(",")))
|
||||||
for opNode2 in instrNode.findall('./operand[@type="reg"]'):
|
for opNode2 in instrNode.findall('./operand[@type="reg"]'):
|
||||||
if opNode1 == opNode2: continue
|
if opNode1 == opNode2: continue
|
||||||
regs2 = set(map(getCanonicalReg, opNode2.text.split(",")))
|
regs2 = set(map(getCanonicalReg, opNode2.text.split(",")))
|
||||||
if regs1.intersection(regs2):
|
intersection = regs1.intersection(regs2)
|
||||||
return True
|
if intersection:
|
||||||
return False
|
return intersection
|
||||||
|
return set()
|
||||||
|
|
||||||
def hasExplicitNonVSIBMemOperand(instrNode):
|
def hasExplicitNonVSIBMemOperand(instrNode):
|
||||||
for opNode in instrNode.findall('./operand[@type="mem"]'):
|
for opNode in instrNode.findall('./operand[@type="mem"]'):
|
||||||
@@ -673,12 +677,12 @@ class TPConfig:
|
|||||||
self.note = note
|
self.note = note
|
||||||
|
|
||||||
def getTPConfigs(instrNode, useDistinctRegs=True, useIndexedAddr=False, computeIndepAndDepBreakingInstrs=True):
|
def getTPConfigs(instrNode, useDistinctRegs=True, useIndexedAddr=False, computeIndepAndDepBreakingInstrs=True):
|
||||||
|
if isDivOrSqrtInstr(instrNode):
|
||||||
|
return getTPConfigsForDiv(instrNode)
|
||||||
|
|
||||||
iform = instrNode.attrib['iform']
|
iform = instrNode.attrib['iform']
|
||||||
iclass = instrNode.attrib['iclass']
|
iclass = instrNode.attrib['iclass']
|
||||||
|
|
||||||
if 'DIV' in iclass or 'SQRT' in iclass:
|
|
||||||
return getTPConfigsForDiv(instrNode)
|
|
||||||
|
|
||||||
independentInstrs = []
|
independentInstrs = []
|
||||||
depBreakingInstrs = ''
|
depBreakingInstrs = ''
|
||||||
if computeIndepAndDepBreakingInstrs:
|
if computeIndepAndDepBreakingInstrs:
|
||||||
@@ -880,7 +884,7 @@ def getTPConfigsForDiv(instrNode):
|
|||||||
config.init += ['VMOVUP' + dataType + ' ' + divisorReg + ', [R14]']
|
config.init += ['VMOVUP' + dataType + ' ' + divisorReg + ', [R14]']
|
||||||
|
|
||||||
config.independentInstrs = [getInstrInstanceFromNode(instrNode, opRegDict={1:regType+str(reg), (nOperands-1):dividendReg, nOperands:divisorReg}) for reg in range(2, 10)]
|
config.independentInstrs = [getInstrInstanceFromNode(instrNode, opRegDict={1:regType+str(reg), (nOperands-1):dividendReg, nOperands:divisorReg}) for reg in range(2, 10)]
|
||||||
elif instrNode.attrib['iclass'] in ['SQRTSS', 'SQRTPS', 'SQRTSD', 'SQRTPD', 'RSQRTSS', 'RSQRTPS', 'RCPSS', 'RCPPS', 'VSQRTSS', 'VSQRTPS', 'VSQRTSD', 'VSQRTPD','VRSQRTSS', 'VRSQRTPS', 'VRCPSS', 'VRCPPS', 'VRSQRT14SS', 'VRSQRT14SD', 'VRSQRT14PS', 'VRSQRT14PD']:
|
elif instrNode.attrib['iclass'] in ['SQRTSS', 'SQRTPS', 'SQRTSD', 'SQRTPD', 'RSQRTSS', 'RSQRTPS', 'VSQRTSS', 'VSQRTPS', 'VSQRTSD', 'VSQRTPD','VRSQRTSS', 'VRSQRTPS', 'VRSQRT14SS', 'VRSQRT14SD', 'VRSQRT14PS', 'VRSQRT14PD']:
|
||||||
dataType = instrNode.attrib['iclass'][-1]
|
dataType = instrNode.attrib['iclass'][-1]
|
||||||
|
|
||||||
if dataType == 'S':
|
if dataType == 'S':
|
||||||
@@ -931,7 +935,8 @@ def fancyRound(cycles):
|
|||||||
return round(cycles, 2)
|
return round(cycles, 2)
|
||||||
|
|
||||||
|
|
||||||
TPResult = namedtuple('TPResult', ['TP', 'TP_noDepBreaking_noLoop', 'TP_single', 'uops', 'fused_uops', 'uops_MITE', 'uops_MS', 'divCycles', 'ILD_stalls', 'dec0', 'config', 'unblocked_ports'])
|
TPResult = namedtuple('TPResult', ['TP', 'TP_loop', 'TP_noLoop', 'TP_noDepBreaking_noLoop', 'TP_single', 'uops', 'fused_uops', 'uops_MITE', 'uops_MS', 'divCycles',
|
||||||
|
'ILD_stalls', 'dec0', 'config', 'unblocked_ports'])
|
||||||
|
|
||||||
# returns TPResult
|
# returns TPResult
|
||||||
# port usages are averages (when no ports are blocked by other instructions)
|
# port usages are averages (when no ports are blocked by other instructions)
|
||||||
@@ -939,6 +944,8 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
configs = getTPConfigs(instrNode, useDistinctRegs, useIndexedAddr)
|
configs = getTPConfigs(instrNode, useDistinctRegs, useIndexedAddr)
|
||||||
|
|
||||||
minTP = sys.maxint
|
minTP = sys.maxint
|
||||||
|
minTP_loop = sys.maxint
|
||||||
|
minTP_noLoop = sys.maxint
|
||||||
minTP_noDepBreaking_noLoop = sys.maxint
|
minTP_noDepBreaking_noLoop = sys.maxint
|
||||||
minTP_single = sys.maxint
|
minTP_single = sys.maxint
|
||||||
|
|
||||||
@@ -1007,7 +1014,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
else:
|
else:
|
||||||
divCycles = 0
|
divCycles = 0
|
||||||
|
|
||||||
return TPResult(minTP, minTP_noDepBreaking_noLoop, minTP_single, unfused_uops, fused_uops, None, None, divCycles, 0, False, config, ports_dict)
|
return TPResult(minTP, minTP, minTP, minTP_noDepBreaking_noLoop, minTP_single, unfused_uops, fused_uops, None, None, divCycles, 0, False, config, ports_dict)
|
||||||
else:
|
else:
|
||||||
hasMemWriteOperand = len(instrNode.findall('./operand[@type="mem"][@r="1"][@w="1"]'))>0
|
hasMemWriteOperand = len(instrNode.findall('./operand[@type="mem"][@r="1"][@w="1"]'))>0
|
||||||
uops = None
|
uops = None
|
||||||
@@ -1016,17 +1023,21 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
uopsMS = None
|
uopsMS = None
|
||||||
divCycles = None
|
divCycles = None
|
||||||
ILD_stalls = None
|
ILD_stalls = None
|
||||||
dec0 = False
|
dec0 = None
|
||||||
ports_dict = {}
|
ports_dict = {}
|
||||||
for config in configs:
|
for config in configs:
|
||||||
if config.note: htmlReports.append('<h2>' + config.note + '</h2>\n')
|
if config.note: htmlReports.append('<h2>' + config.note + '</h2>\n')
|
||||||
|
|
||||||
instrIList = config.independentInstrs
|
instrIList = config.independentInstrs
|
||||||
for ic in sorted(set([1, min(4, len(instrIList)), min(8, len(instrIList)), len(instrIList)])):
|
for ic in sorted(set([1, min(4, len(instrIList)), min(8, len(instrIList)), len(instrIList)])):
|
||||||
|
if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
|
||||||
|
|
||||||
if len(instrIList) > 1: htmlReports.append('<h3 style="margin-left: 25px">With ' + str(ic) + ' independent instruction' + ('s' if ic>1 else '') + '</h3>\n')
|
if len(instrIList) > 1: htmlReports.append('<h3 style="margin-left: 25px">With ' + str(ic) + ' independent instruction' + ('s' if ic>1 else '') + '</h3>\n')
|
||||||
htmlReports.append('<div style="margin-left: 50px">\n')
|
htmlReports.append('<div style="margin-left: 50px">\n')
|
||||||
|
|
||||||
for useDepBreakingInstrs in ([False, True] if config.depBreakingInstrs else [False]):
|
for useDepBreakingInstrs in ([False, True] if config.depBreakingInstrs else [False]):
|
||||||
|
if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
|
||||||
|
|
||||||
if useDepBreakingInstrs:
|
if useDepBreakingInstrs:
|
||||||
instrStr = ';'.join([config.depBreakingInstrs+';'+config.preInstrCode+';'+i.asm for i in instrIList[0:ic]])
|
instrStr = ';'.join([config.depBreakingInstrs+';'+config.preInstrCode+';'+i.asm for i in instrIList[0:ic]])
|
||||||
htmlReports.append('<h4>With additional dependency-breaking instructions</h4>\n')
|
htmlReports.append('<h4>With additional dependency-breaking instructions</h4>\n')
|
||||||
@@ -1036,17 +1047,21 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
init = list(chain.from_iterable(i.regMemInit for i in instrIList[0:ic])) + config.init
|
init = list(chain.from_iterable(i.regMemInit for i in instrIList[0:ic])) + config.init
|
||||||
|
|
||||||
for repType in ['unrollOnly', 'loopSmall', 'loopBig']:
|
for repType in ['unrollOnly', 'loopSmall', 'loopBig']:
|
||||||
if minTP < sys.maxint and minTP > 100: continue
|
if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
|
||||||
|
|
||||||
if repType == 'unrollOnly':
|
if repType == 'unrollOnly':
|
||||||
unrollCount = int(round(500/ic+49, -2)) # should still fit in the icache
|
unrollCount = int(round(500/ic+49, -2)) # should still fit in the icache
|
||||||
if instrNode.attrib['iclass'] in ['WBINVD']: unrollCount /= 10;
|
if instrNode.attrib['iclass'] in ['CPUID', 'RDRAND', 'RDSEED', 'WBINVD'] or instrNode.attrib['category'] in ['IO', 'IOSTRINGOP']:
|
||||||
|
unrollCount = 10
|
||||||
loopCount = 0
|
loopCount = 0
|
||||||
else:
|
else:
|
||||||
# we test with a small loop body so that uops may be delivered from the loop stream detector (LSD)
|
# we test with a small loop body so that uops may be delivered from the loop stream detector (LSD)
|
||||||
# we also test with a larger loop body to minimize potential overhead from the loop itself
|
# we also test with a larger loop body to minimize potential overhead from the loop itself
|
||||||
loopCount = 100;
|
loopCount = 100
|
||||||
unrollCount = max(1, int(round(10.0/ic)))
|
unrollCount = max(1, int(round(10.0/ic)))
|
||||||
|
if minTP < sys.maxint and minTP > 100:
|
||||||
|
unrollCount = 1
|
||||||
|
loopCount = 10
|
||||||
if repType == 'loopBig':
|
if repType == 'loopBig':
|
||||||
unrollCount *= 10
|
unrollCount *= 10
|
||||||
|
|
||||||
@@ -1062,16 +1077,21 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
|
|
||||||
cycles = fancyRound(result['Core cycles']/ic)
|
cycles = fancyRound(result['Core cycles']/ic)
|
||||||
|
|
||||||
invalid = False
|
#invalid = False
|
||||||
if any('PORT' in e for e in result):
|
#if any('PORT' in e for e in result):
|
||||||
maxPortUops = max(v/(len(e)-9) for e,v in result.items() if e.startswith('UOPS_PORT') and not '4' in e)
|
# maxPortUops = max(v/(len(e)-9) for e,v in result.items() if e.startswith('UOPS_PORT') and not '4' in e)
|
||||||
if maxPortUops * .98 > result['Core cycles']:
|
# if maxPortUops * .98 > result['Core cycles']:
|
||||||
print 'More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles'])
|
# print 'More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles'])
|
||||||
#invalid = True
|
# #invalid = True
|
||||||
|
|
||||||
if not invalid:
|
#if not invalid:
|
||||||
minTP = min(minTP, cycles)
|
minTP = min(minTP, cycles)
|
||||||
if not useDepBreakingInstrs and repType == 'unrollOnly': minTP_noDepBreaking_noLoop = min(minTP_noDepBreaking_noLoop, cycles)
|
if repType == 'unrollOnly':
|
||||||
|
minTP_noLoop = min(minTP_noLoop, cycles)
|
||||||
|
if not useDepBreakingInstrs:
|
||||||
|
minTP_noDepBreaking_noLoop = min(minTP_noDepBreaking_noLoop, cycles)
|
||||||
|
else:
|
||||||
|
minTP_loop = min(minTP_loop, cycles)
|
||||||
|
|
||||||
if ic == 1 and (minTP == sys.maxint or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
|
if ic == 1 and (minTP == sys.maxint or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
|
||||||
minTP_single = min(minTP_single, cycles)
|
minTP_single = min(minTP_single, cycles)
|
||||||
@@ -1106,7 +1126,8 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
htmlReports.append('</div>')
|
htmlReports.append('</div>')
|
||||||
|
|
||||||
if minTP < sys.maxint:
|
if minTP < sys.maxint:
|
||||||
return TPResult(minTP, minTP_noDepBreaking_noLoop, minTP_single, uops, uopsFused, uopsMITE, uopsMS, divCycles, ILD_stalls, dec0, minConfig, ports_dict)
|
return TPResult(minTP, minTP_loop, minTP_noLoop, minTP_noDepBreaking_noLoop, minTP_single, uops, uopsFused, uopsMITE, uopsMS, divCycles, ILD_stalls,
|
||||||
|
dec0, minConfig, ports_dict)
|
||||||
|
|
||||||
|
|
||||||
def canMacroFuse(flagInstrNode, branchInstrNode, htmlReports):
|
def canMacroFuse(flagInstrNode, branchInstrNode, htmlReports):
|
||||||
@@ -1521,8 +1542,8 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep):
|
|||||||
configList.append(config)
|
configList.append(config)
|
||||||
configList.isUpperBound = True
|
configList.isUpperBound = True
|
||||||
return configLists
|
return configLists
|
||||||
elif instrNode.attrib['iclass'] in ['SQRTSS', 'SQRTPS', 'SQRTSD', 'SQRTPD', 'RSQRTSS', 'RSQRTPS', 'RCPSS', 'RCPPS', 'VSQRTSS', 'VSQRTPS', 'VSQRTSD',
|
elif instrNode.attrib['iclass'] in ['SQRTSS', 'SQRTPS', 'SQRTSD', 'SQRTPD', 'RSQRTSS', 'RSQRTPS', 'VSQRTSS', 'VSQRTPS', 'VSQRTSD',
|
||||||
'VSQRTPD','VRSQRTSS', 'VRSQRTPS', 'VRSQRT14PD', 'VRSQRT14PS', 'VRSQRT14SD', 'VRSQRT14SS', 'VRCPSS', 'VRCPPS']:
|
'VSQRTPD','VRSQRTSS', 'VRSQRTPS', 'VRSQRT14PD', 'VRSQRT14PS', 'VRSQRT14SD', 'VRSQRT14SS']:
|
||||||
dataType = instrNode.attrib['iclass'][-1]
|
dataType = instrNode.attrib['iclass'][-1]
|
||||||
|
|
||||||
if dataType == 'S':
|
if dataType == 'S':
|
||||||
@@ -1767,7 +1788,7 @@ LatResult = namedtuple('LatResult', ['minLat','maxLat','lat_sameReg','isUpperBou
|
|||||||
def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem, tpDict):
|
def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem, tpDict):
|
||||||
cRep = min(100, 2 + 2 * int(math.ceil(tpDict[instrNode].TP_single / 2))) # must be a multiple of 2
|
cRep = min(100, 2 + 2 * int(math.ceil(tpDict[instrNode].TP_single / 2))) # must be a multiple of 2
|
||||||
|
|
||||||
if 'DIV' in instrNode.attrib['iclass'] or 'SQRT' in instrNode.attrib['iclass']:
|
if isDivOrSqrtInstr(instrNode):
|
||||||
if not useDistinctRegs: return None
|
if not useDistinctRegs: return None
|
||||||
if targetNode.attrib['type'] == 'flags': return None
|
if targetNode.attrib['type'] == 'flags': return None
|
||||||
if addrMem == 'mem': return None
|
if addrMem == 'mem': return None
|
||||||
@@ -2188,15 +2209,14 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
|
|||||||
inputOpnds.append(opNode)
|
inputOpnds.append(opNode)
|
||||||
if opNode.attrib.get('w', '0') == '1':
|
if opNode.attrib.get('w', '0') == '1':
|
||||||
outputOpnds.append(opNode)
|
outputOpnds.append(opNode)
|
||||||
if opNode.attrib.get('r', '0') == '1':
|
if opNode.attrib.get('r', '0') == '0':
|
||||||
continue
|
if opNode.attrib['type'] == 'mem':
|
||||||
if opNode.attrib['type'] == 'mem':
|
inputOpnds.append(opNode) # address of memory write
|
||||||
inputOpnds.append(opNode) # address of memory write
|
elif opNode.attrib.get('conditionalWrite', '0') == '1':
|
||||||
elif opNode.attrib['type'] == 'reg':
|
|
||||||
if opNode.attrib.get('conditionalWrite', '0') == '1':
|
|
||||||
inputOpnds.append(opNode)
|
|
||||||
elif opNode.attrib.get('width', '') in ['8', '16'] and opNode.text.split(',')[0] in GPRegs:
|
|
||||||
inputOpnds.append(opNode)
|
inputOpnds.append(opNode)
|
||||||
|
elif opNode.attrib['type'] == 'reg':
|
||||||
|
if opNode.attrib.get('width', '') in ['8', '16'] and opNode.text.split(',')[0] in GPRegs:
|
||||||
|
inputOpnds.append(opNode)
|
||||||
|
|
||||||
archNode = instrNode.find('./architecture[@name="' + arch + '"]')
|
archNode = instrNode.find('./architecture[@name="' + arch + '"]')
|
||||||
measurementNode = archNode.find('./measurement')
|
measurementNode = archNode.find('./measurement')
|
||||||
@@ -2270,12 +2290,31 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
|
|||||||
newlatConfig.notes.append('with ' + reg + '=' + regVal)
|
newlatConfig.notes.append('with ' + reg + '=' + regVal)
|
||||||
latConfigList.latConfigs.append(newlatConfig)
|
latConfigList.latConfigs.append(newlatConfig)
|
||||||
|
|
||||||
|
# some SSE/AVX instr. (e.g., VORPS (on SKL, CLX), VAESDEC) incur a penalty (?) if a source was not written by an instr. of a similar kind,
|
||||||
|
# some other instructions (e.g., VPDPWSSD on ICL) incur a penalty if the source was written by an instr. of the same kind;
|
||||||
|
# therefore, we create configurations for both scenarios
|
||||||
|
if (isSSEInstr(instrNode) or isAVXInstr(instrNode)) and not isDivOrSqrtInstr(instrNode):
|
||||||
|
for latConfig in list(latConfigList.latConfigs):
|
||||||
|
regInit = []
|
||||||
|
for opNode in instrNode.findall('./operand[@r="1"][@type="reg"]'):
|
||||||
|
reg = latConfig.instrI.opRegDict[int(opNode.attrib['idx'])]
|
||||||
|
regPrefix = re.sub('\d', '', reg)
|
||||||
|
if (regPrefix in ['XMM', 'YMM', 'ZMM']) and (reg not in globalDoNotWriteRegs):
|
||||||
|
for opNode2 in instrNode.findall('./operand[@w="1"][@type="reg"]'):
|
||||||
|
if opNode2.text != opNode.text: continue
|
||||||
|
regInit += [getInstrInstanceFromNode(instrNode, opRegDict={int(opNode2.attrib['idx']):reg}, computeRegMemInit=False).asm]
|
||||||
|
break
|
||||||
|
if regInit:
|
||||||
|
newlatConfig = copy.deepcopy(latConfig)
|
||||||
|
newlatConfig.instrI.regMemInit.extend(regInit)
|
||||||
|
newlatConfig.notes.append('source registers initialized by an instruction of the same kind')
|
||||||
|
latConfigList.latConfigs.append(newlatConfig)
|
||||||
|
|
||||||
# Create a copy of each experiment with dependency-breaking instructions for all dependencies other than the dependency from opNode2 to
|
# Create a copy of each experiment with dependency-breaking instructions for all dependencies other than the dependency from opNode2 to
|
||||||
# opNode1 if there aren't sufficiently many fill instructions in the chain
|
# opNode1 if there aren't sufficiently many fill instructions in the chain
|
||||||
if (not 'DIV' in instrNode.attrib['iclass'] and not 'SQRT' in instrNode.attrib['iclass'] and
|
if (not isDivOrSqrtInstr(instrNode) and not 'GATHER' in instrNode.attrib['category'] and not 'SCATTER' in instrNode.attrib['category']):
|
||||||
not 'GATHER' in instrNode.attrib['category'] and not 'SCATTER' in instrNode.attrib['category']):
|
|
||||||
for latConfig in list(latConfigList.latConfigs):
|
for latConfig in list(latConfigList.latConfigs):
|
||||||
if latConfig.chainLatency > tpDict[instrNode].TP_single:
|
if not isAVXInstr(instrNode) and latConfig.chainLatency > tpDict[instrNode].TP_single:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
depBreakingInstrs = getDependencyBreakingInstrs(instrNode, latConfig.instrI.opRegDict)
|
depBreakingInstrs = getDependencyBreakingInstrs(instrNode, latConfig.instrI.opRegDict)
|
||||||
@@ -2429,10 +2468,12 @@ def isSSEInstr(instrNode):
|
|||||||
extension = instrNode.attrib['extension']
|
extension = instrNode.attrib['extension']
|
||||||
return 'SSE' in extension or extension in ['AES']
|
return 'SSE' in extension or extension in ['AES']
|
||||||
|
|
||||||
|
|
||||||
def isAVXInstr(instrNode):
|
def isAVXInstr(instrNode):
|
||||||
return ('vex' in instrNode.attrib or 'evex' in instrNode.attrib)
|
return ('vex' in instrNode.attrib or 'evex' in instrNode.attrib)
|
||||||
|
|
||||||
|
def isDivOrSqrtInstr(instrNode):
|
||||||
|
return ('DIV' in instrNode.attrib['iclass']) or ('SQRT' in instrNode.attrib['iclass'])
|
||||||
|
|
||||||
|
|
||||||
def writeHtmlFile(folder, instrNode, title, body):
|
def writeHtmlFile(folder, instrNode, title, body):
|
||||||
filename = canonicalizeInstrString(instrNode.attrib['string'])
|
filename = canonicalizeInstrString(instrNode.attrib['string'])
|
||||||
@@ -2471,7 +2512,7 @@ def filterInstructions(XMLRoot):
|
|||||||
# Not supported by assembler
|
# Not supported by assembler
|
||||||
if XMLInstr.attrib['iclass'] == 'NOP' and len(XMLInstr.findall('operand')) > 1:
|
if XMLInstr.attrib['iclass'] == 'NOP' and len(XMLInstr.findall('operand')) > 1:
|
||||||
instrSet.discard(XMLInstr)
|
instrSet.discard(XMLInstr)
|
||||||
if extension in ['WBNOINVD']: instrSet.discard(XMLInstr)
|
if extension in ['MCOMMIT', 'WBNOINVD']: instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
# Only supported by VIA
|
# Only supported by VIA
|
||||||
if 'VIA_' in extension:
|
if 'VIA_' in extension:
|
||||||
@@ -2572,7 +2613,7 @@ def filterInstructions(XMLRoot):
|
|||||||
if extension == 'RDTSCP' and not cpuid.get_bit(edx8_1, 27): instrSet.discard(XMLInstr)
|
if extension == 'RDTSCP' and not cpuid.get_bit(edx8_1, 27): instrSet.discard(XMLInstr)
|
||||||
if extension == '3DNOW' and not cpuid.get_bit(edx8_1, 31): instrSet.discard(XMLInstr)
|
if extension == '3DNOW' and not cpuid.get_bit(edx8_1, 31): instrSet.discard(XMLInstr)
|
||||||
if extension == 'CLZERO' and not cpuid.get_bit(ebx8_8, 0): instrSet.discard(XMLInstr)
|
if extension == 'CLZERO' and not cpuid.get_bit(ebx8_8, 0): instrSet.discard(XMLInstr)
|
||||||
if extension == 'MCOMMIT' and not cpuid.get_bit(ebx8_8, 8): instrSet.discard(XMLInstr)
|
#if extension == 'MCOMMIT' and not cpuid.get_bit(ebx8_8, 8): instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
# Virtualization instructions
|
# Virtualization instructions
|
||||||
if extension in ['SVM', 'VMFUNC', 'VTX']: instrSet.discard(XMLInstr)
|
if extension in ['SVM', 'VMFUNC', 'VTX']: instrSet.discard(XMLInstr)
|
||||||
@@ -2674,7 +2715,7 @@ def main():
|
|||||||
# preInstr has been measured
|
# preInstr has been measured
|
||||||
instrRequiringPreInstr = []
|
instrRequiringPreInstr = []
|
||||||
if not useIACA:
|
if not useIACA:
|
||||||
instrRequiringPreInstr = [x for x in instrNodeList if 'DIV' in x.attrib['iclass'] or 'SQRT' in x.attrib['iclass'] or getPreInstr(x)[0]]
|
instrRequiringPreInstr = [x for x in instrNodeList if isDivOrSqrtInstr(x) or getPreInstr(x)[0]]
|
||||||
instrNodeList.sort(key=lambda x: (x in instrRequiringPreInstr, x.attrib['string']))
|
instrNodeList.sort(key=lambda x: (x in instrRequiringPreInstr, x.attrib['string']))
|
||||||
|
|
||||||
condBrInstr = [i for i in instrNodeList if i.attrib['category'] == 'COND_BR' and i.attrib['isa-set'] == 'I86' and not 'LOOP' in i.attrib['iclass']]
|
condBrInstr = [i for i in instrNodeList if i.attrib['category'] == 'COND_BR' and i.attrib['isa-set'] == 'I86' and not 'LOOP' in i.attrib['iclass']]
|
||||||
@@ -2708,7 +2749,7 @@ def main():
|
|||||||
tpDictNoInteriteration = {instrNodeDict[k.attrib['string']]:v for k,v in pTpDictNoInteriteration.items()}
|
tpDictNoInteriteration = {instrNodeDict[k.attrib['string']]:v for k,v in pTpDictNoInteriteration.items()}
|
||||||
else:
|
else:
|
||||||
for i, instrNode in enumerate(instrNodeList):
|
for i, instrNode in enumerate(instrNodeList):
|
||||||
#if not 'MOV_NOREX' in instrNode.attrib['string']: continue
|
#if not 'VPDPWSSD (XMM, K, XMM, XMM)' in instrNode.attrib['string']: continue
|
||||||
print 'Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
|
print 'Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
|
||||||
|
|
||||||
htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Throughput and Uops' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
|
htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Throughput and Uops' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
|
||||||
@@ -2729,7 +2770,8 @@ def main():
|
|||||||
htmlReports.append('<hr><h2 id="sameReg">With the same register for for different operands</h2>\n')
|
htmlReports.append('<hr><h2 id="sameReg">With the same register for for different operands</h2>\n')
|
||||||
tpResultSR = getThroughputAndUops(instrNode, False, False, htmlReports)
|
tpResultSR = getThroughputAndUops(instrNode, False, False, htmlReports)
|
||||||
if tpResultSR and (tpResult.uops != tpResultSR.uops or tpResult.fused_uops != tpResultSR.fused_uops or tpResult.uops_MITE != tpResultSR.uops_MITE
|
if tpResultSR and (tpResult.uops != tpResultSR.uops or tpResult.fused_uops != tpResultSR.fused_uops or tpResult.uops_MITE != tpResultSR.uops_MITE
|
||||||
or abs(tpResult.TP-tpResultSR.TP) > .05):
|
or abs(sum(tpResult.unblocked_ports.values()) - sum(tpResultSR.unblocked_ports.values())) > .8
|
||||||
|
or tpResultSR.TP_single < .95 * tpResult.TP_single):
|
||||||
tpDictSameReg[instrNode] = tpResultSR
|
tpDictSameReg[instrNode] = tpResultSR
|
||||||
|
|
||||||
if hasExplMemOp:
|
if hasExplMemOp:
|
||||||
@@ -2773,7 +2815,7 @@ def main():
|
|||||||
latencyDict = {instrNodeDict[k.attrib['string']]:v for k,v in pickle.load(f).items()}
|
latencyDict = {instrNodeDict[k.attrib['string']]:v for k,v in pickle.load(f).items()}
|
||||||
elif not useIACA or iacaVersion == '2.1':
|
elif not useIACA or iacaVersion == '2.1':
|
||||||
for i, instrNode in enumerate(instrNodeList):
|
for i, instrNode in enumerate(instrNodeList):
|
||||||
#if not 'ADC' in instrNode.attrib['string']: continue
|
#if not 'AES' in instrNode.attrib['string']: continue
|
||||||
print 'Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
|
print 'Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
|
||||||
|
|
||||||
htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Latency' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
|
htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Latency' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
|
||||||
@@ -3056,7 +3098,8 @@ def main():
|
|||||||
if useIACA and instrNode in latencyDict:
|
if useIACA and instrNode in latencyDict:
|
||||||
resultNode.attrib['latency'] = str(latencyDict[instrNode])
|
resultNode.attrib['latency'] = str(latencyDict[instrNode])
|
||||||
|
|
||||||
resultNode.attrib['TP'+suffix] = "%.2f" % tpResult.TP
|
resultNode.attrib['TP_unrolled'+suffix] = "%.2f" % tpResult.TP_noLoop
|
||||||
|
resultNode.attrib['TP_loop'+suffix] = "%.2f" % tpResult.TP_loop
|
||||||
if instrNode in tpDictNoInteriteration:
|
if instrNode in tpDictNoInteriteration:
|
||||||
resultNode.attrib['TP_no_interiteration'] = "%.2f" % tpDictNoInteriteration[instrNode]
|
resultNode.attrib['TP_no_interiteration'] = "%.2f" % tpDictNoInteriteration[instrNode]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user