diff --git a/tools/cpuBench/cpuBench.py b/tools/cpuBench/cpuBench.py index 83122fa..70d6ce2 100755 --- a/tools/cpuBench/cpuBench.py +++ b/tools/cpuBench/cpuBench.py @@ -414,7 +414,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None, if 'R' in agen: address.append('RIP') if 'B' in agen: address.append('R14') - if 'I' in agen: address.append('2*R13') + if 'I' in agen: address.append('2*R14') if 'D' in agen: address.append('8') asm += ' [' + '+'.join(address) + ']' @@ -1708,13 +1708,13 @@ class LatConfigList: LatResult = namedtuple('LatResult', ['minLat','maxLat','lat_sameReg','isUpperBound']) -def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_mem, tpDict): +def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem, tpDict): cRep = min(100, 2 + 2 * int(math.ceil(tpDict[instrNode].TP_single / 2))) # must be a multiple of 2 if 'DIV' in instrNode.attrib['iclass'] or 'SQRT' in instrNode.attrib['iclass']: if not useDistinctRegs: return None if targetNode.attrib['type'] == 'flags': return None - if addr_mem == 'mem': return None + if addrMem == 'mem': return None if startNode.attrib.get('opmask', '') == '1' or targetNode.attrib.get('opmask', '') == '1': return None if instrNode.attrib.get('mask', '') == '1' and (startNode == targetNode): return None return getDivLatConfigLists(instrNode, startNode, targetNode, cRep) @@ -1943,7 +1943,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_me chainInstr = 'TEST ' + targetNode.attrib['memory-prefix'] + ' [' + getAddrReg(instrNode, targetNode) + '], 1' configList.isUpperBound = True configList.append(LatConfig(instrI, chainInstrs=chainInstr, chainLatency=1)) - elif startNode.attrib['type'] == 'mem': + elif startNode.attrib['type'] in ['agen', 'mem']: ################# # mem -> ... ################# @@ -1952,7 +1952,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_me return None addrReg = getAddrReg(instrNode, startNode) - memWidth = int(startNode.attrib['width']) + memWidth = int(startNode.attrib.get('width', 0)) if targetNode.attrib['type'] == 'reg': ################# @@ -1972,7 +1972,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_me if reg in GPRegs: instrI = getInstrInstanceFromNode(instrNode, [addrReg, 'R12'], [addrReg, 'R12'], useDistinctRegs, {targetNodeIdx:reg}) - if addr_mem == 'addr': + if addrMem == 'addr': # addr -> reg chainInstrs = 'MOVSX ' + regTo64(reg) + ', ' + regToSize(reg, min(32, regSize)) + ';' chainInstrs += 'XOR {}, {};'.format(addrReg, regTo64(reg)) * cRep + ('TEST R13, R13;' if instrReadsFlags else '') # cRep is a multiple of 2 @@ -1991,7 +1991,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_me elif 'MM' in reg: instrI = getInstrInstanceFromNode(instrNode, ['R12'], ['R12'], useDistinctRegs, {targetNodeIdx:reg}) - if addr_mem == 'addr': + if addrMem == 'addr': # addr -> reg configList.isUpperBound = True chainInstrs = 'MOVQ R12, {};'.format(getCanonicalReg(reg)) @@ -2000,7 +2000,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_me chainInstrs += 'XOR {}, {};'.format(addrReg, 'R12') * cRep + ('TEST R13, R13;' if instrReadsFlags else '') # cRep is a multiple of 2 chainLatency = 1 + basicLatency['XOR'] * cRep configList.append(LatConfig(instrI, chainInstrs=chainInstrs, chainLatency=chainLatency)) - elif addr_mem == 'addr_VSIB': + elif addrMem == 'addr_VSIB': # addr_VSIB -> reg configList.isUpperBound = True chainInstrs = 'VANDPD {0}14, {0}14, {0}{1};'.format(startNode.attrib['VSIB'], reg[3:]) * cRep @@ -2020,7 +2020,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_me instrI = getInstrInstanceFromNode(instrNode, [addrReg, 'R12'], [addrReg, 'R12'], useDistinctRegs) - if addr_mem == 'addr': + if addrMem == 'addr': # addr -> flag chainInstr = 'CMOV' + flag[0] + ' ' + addrReg + ', ' + addrReg chainLatency = basicLatency['CMOV' + flag[0]] @@ -2045,7 +2045,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addr_me if startNode == targetNode: instrI = getInstrInstanceFromNode(instrNode, [addrReg, 'R12'], [addrReg, 'R12'], useDistinctRegs=useDistinctRegs) - if addr_mem == 'addr': + if addrMem == 'addr': # addr -> mem configList.isUpperBound = True chainInstrs = 'MOV ' + regToSize('R12', min(64, memWidth)) + ', [' + addrReg + '];' @@ -2137,13 +2137,19 @@ def getLatencies(instrNode, instrNodeList, tpDict, htmlReports): opNode2Idx = int(opNode2.attrib['idx']) latencyNode = None - for addr_mem in (['addr', 'mem']+(['addr_VSIB'] if 'VSIB' in opNode1.attrib else []) if opNode1.attrib['type']=='mem' else ['']): + addrMemList = [''] + if opNode1.attrib['type']=='mem': + addrMemList = ['addr', 'mem']+(['addr_VSIB'] if 'VSIB' in opNode1.attrib else []) + elif opNode1.attrib['type']=='agen' and ('B' in instrNode.attrib['agen'] or 'I' in instrNode.attrib['agen']): + addrMemList = ['addr'] + + for addrMem in addrMemList: minLatDistinctRegs = 0 maxLatDistinctRegs = 0 configI = 0 for useDistinctRegs in ([True, False] if hasCommonRegister(instrNode) else [True]): - latConfigLists = getLatConfigLists(instrNode, opNode1, opNode2, useDistinctRegs, addr_mem, tpDict) + latConfigLists = getLatConfigLists(instrNode, opNode1, opNode2, useDistinctRegs, addrMem, tpDict) if latConfigLists is None: continue minLat = sys.maxint @@ -2303,7 +2309,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, htmlReports): latencyNode.attrib['start_op'] = str(opNode1.attrib['idx']) latencyNode.attrib['target_op'] = str(opNode2.attrib['idx']) - suffix = ('_'+addr_mem if addr_mem else '') + ('_same_reg' if not useDistinctRegs else '') + suffix = ('_'+addrMem if addrMem else '') + ('_same_reg' if not useDistinctRegs else '') if minLat == maxLat: latencyNode.attrib['cycles'+suffix] = str(minLat) if minLatIsUpperBound: @@ -2316,7 +2322,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, htmlReports): if maxLatIsUpperBound: latencyNode.attrib['max_cycles'+suffix+'_is_upper_bound'] = '1' - summaryLine = latencyNodeToStr(latencyNode, not useDistinctRegs, addr_mem) + summaryLine = latencyNodeToStr(latencyNode, not useDistinctRegs, addrMem) h2ID = 'lat' + str(opNode1Idx) + '->' + str(opNode2Idx) + suffix htmlHead.append('

' + summaryLine + '

') @@ -2658,6 +2664,7 @@ def main(): latencyDict = {instrNodeDict[k.attrib['string']]:v for k,v in pickle.load(f).items()} elif not useIACA or iacaVersion == '2.1': for i, instrNode in enumerate(instrNodeList): + #if not 'LEA' in instrNode.attrib['string']: continue print 'Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')' htmlReports = ['

' + instrNode.attrib['string'] + ' - Latency' + (' (IACA '+iacaVersion+')' if useIACA else '') + '

\n
\n']