This commit is contained in:
Andreas Abel
2021-03-13 21:04:52 +01:00
parent ca7f63370b
commit 313aa5ee30
19 changed files with 275 additions and 268 deletions

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Andreas Abel
# Copyright (C) 2021 Andreas Abel
#
# This file was modified from https://github.com/flababah/cpuid.py
#
@@ -408,7 +408,7 @@ def get_cache_info(cpu):
parameters.append('Physical Line partitions (P): ' + str(P))
parameters.append('Ways of associativity (W): ' + str(W))
parameters.append('Number of Sets (S): ' + str(S))
parameters.append('Cache Size: ' + str(W*P*L*S/1024) + ' kB')
parameters.append('Cache Size: ' + str(W*P*L*S//1024) + ' kB')
if get_bit(d, 0): parameters.append('WBINVD/INVD is not guaranteed to act upon lower level caches of non-originating threads sharing this cache')
else: parameters.append('WBINVD/INVD from threads sharing this cache acts upon lower level caches for threads sharing this cache')
@@ -447,7 +447,7 @@ def get_cache_info(cpu):
cacheInfo['L1D'] = {
'lineSize': L1DcLineSize,
'nSets': L1DcSize*1024/L1DcAssoc/L1DcLineSize,
'nSets': L1DcSize*1024//L1DcAssoc//L1DcLineSize,
'assoc': L1DcAssoc
}
@@ -463,7 +463,7 @@ def get_cache_info(cpu):
cacheInfo['L1I'] = {
'lineSize': L1IcLineSize,
'nSets': L1IcSize*1024/L1IcAssoc/L1IcLineSize,
'nSets': L1IcSize*1024//L1IcAssoc//L1IcLineSize,
'assoc': L1IcAssoc
}
@@ -484,7 +484,7 @@ def get_cache_info(cpu):
elif c_15_12 == 0xC: L2Assoc = 64
elif c_15_12 == 0xD: L2Assoc = 96
elif c_15_12 == 0xE: L2Assoc = 128
elif c_15_12 == 0x2: L2Assoc = L2Size*1024/L2LineSize
elif c_15_12 == 0x2: L2Assoc = L2Size*1024//L2LineSize
log.info(' L2LineSize: ' + str(L2LineSize) + ' B')
log.info(' L2LinesPerTag: ' + str(L2LinesPerTag))
@@ -493,7 +493,7 @@ def get_cache_info(cpu):
cacheInfo['L2'] = {
'lineSize': L2LineSize,
'nSets': L2Size*1024/L2Assoc/L2LineSize,
'nSets': L2Size*1024//L2Assoc//L2LineSize,
'assoc': L2Assoc
}
@@ -519,11 +519,11 @@ def get_cache_info(cpu):
log.info(' L3LineSize: ' + str(L3LineSize) + ' B')
log.info(' L3LinesPerTag: ' + str(L3LinesPerTag))
log.info(' L3Assoc: ' + str(L3Assoc))
log.info(' L3Size: ' + str(L3Size/1024) + ' MB')
log.info(' L3Size: ' + str(L3Size//1024) + ' MB')
cacheInfo['L3'] = {
'lineSize': L3LineSize,
'nSets': L3Size*1024/L3Assoc/L3LineSize,
'nSets': L3Size*1024//L3Assoc//L3LineSize,
'assoc': L3Assoc
}
@@ -551,13 +551,13 @@ if __name__ == "__main__":
yield (eax, regs)
eax += 1
print " ".join(x.ljust(8) for x in ("CPUID", "A", "B", "C", "D")).strip()
print(' '.join(x.ljust(8) for x in ('CPUID', 'A', 'B', 'C', 'D')).strip())
for eax, regs in valid_inputs():
print "%08x" % eax, " ".join("%08x" % reg for reg in regs)
print('%08x' % eax, ' '.join('%08x' % reg for reg in regs))
print ''
print get_basic_info(cpuid)
print('')
print(get_basic_info(cpuid))
print '\nCache information:'
print('\nCache information:')
get_cache_info(cpuid)

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
from itertools import count
from collections import namedtuple, OrderedDict
@@ -73,7 +74,7 @@ def main():
_, nbDict = getAgesOfBlocks(blocks, args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice,
clearHL=(not args.noClearHL), wbinvd=(not args.noWbinvd), returnNbResults=True, maxAge=args.maxAge,
nMeasurements=args.nMeasurements, agg=args.agg)
for event in sorted(e for e in nbDict.values()[0][0].keys() if 'HIT' in e or 'MISS' in e):
for event in sorted(e for e in list(nbDict.values())[0][0].keys() if 'HIT' in e or 'MISS' in e):
traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks]
title = 'Access Sequence: ' + (args.seq_init + ' ' + args.seq).replace('?','').strip() + ' <n fresh blocks> <block>?'
html.append(getPlotlyGraphDiv(title, '# of fresh blocks', event, traces))
@@ -82,7 +83,7 @@ def main():
with open(args.output ,'w') as f:
f.write('\n'.join(html))
print 'Graph written to ' + args.output
print('Graph written to ' + args.output)
if __name__ == "__main__":

View File

@@ -1,6 +1,6 @@
#!/usr/bin/python
import argparse
#!/usr/bin/env python3
import argparse
from cacheLib import *
import logging
@@ -16,11 +16,11 @@ def main():
cpuidInfo = getCpuidCacheInfo()
print ''
print getCacheInfo(1)
print getCacheInfo(2)
print('')
print(getCacheInfo(1))
print(getCacheInfo(2))
if 'L3' in cpuidInfo:
print getCacheInfo(3)
print(getCacheInfo(3))
if __name__ == "__main__":

View File

@@ -1,4 +1,3 @@
#!/usr/bin/python
from itertools import count
from collections import namedtuple
@@ -79,11 +78,11 @@ class CacheInfo:
def __str__(self):
return '\n'.join(['L' + str(self.level) + ':',
' Size: ' + str(self.size/1024) + ' kB',
' Size: ' + str(self.size//1024) + ' kB',
' Associativity: ' + str(self.assoc),
' Line Size: ' + str(self.lineSize) + ' B',
' Number of sets' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.nSets),
' Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize/1024) + ' kB',
' Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize//1024) + ' kB',
(' Number of CBoxes: ' + str(self.nCboxes) if self.nCboxes is not None else ''),
(' Number of slices: ' + str(self.nSlices) if self.nSlices is not None else '')])
@@ -134,13 +133,13 @@ def getCacheInfo(level):
assoc = cpuidInfo['assoc']
nSets = cpuidInfo['nSets']
stride = 2**((lineSize*nSets/getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
stride = 2**((lineSize*nSets//getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
ms = findMaximalNonEvictingL3SetInCBox(0, stride, assoc, 0)
log.debug('Maximal non-evicting L3 set: ' + str(len(ms)) + ' ' + str(ms))
nCboxes = getNCBoxUnits()
nSlices = nCboxes * int(math.ceil(float(len(ms))/assoc))
getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets/nSlices, nSlices, nCboxes)
getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets//nSlices, nSlices, nCboxes)
return getCacheInfo.L3CacheInfo
else:
raise ValueError('invalid level')
@@ -376,9 +375,9 @@ def getAddresses(level, wayID, cacheSetList, cBox=1, cSlice=0):
L3SetToWayIDMap[cBox][cSlice][L3Set][i] = addr
if not wayID in L3SetToWayIDMap[cBox][cSlice][L3Set]:
if getCacheInfo(3).nSlices == getNCBoxUnits():
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, L3SetToWayIDMap[cBox][cSlice][L3Set].values())))
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
else:
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, L3SetToWayIDMap[cBox][cSlice][L3Set].values())))
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
addresses.append(L3SetToWayIDMap[cBox][cSlice][L3Set][wayID])
return addresses
@@ -404,16 +403,16 @@ def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False):
for s in cacheSetsStr.split(','):
if '-' in s:
first, last = s.split('-')[:2]
cacheSetList += range(int(first), int(last)+1)
cacheSetList += list(range(int(first), int(last)+1))
else:
cacheSetList.append(int(s))
else:
nSets = getCacheInfo(level).nSets
if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes):
nHLSets = getCacheInfo(level-1).nSets
cacheSetList = range(nHLSets, nSets)
cacheSetList = list(range(nHLSets, nSets))
else:
cacheSetList = range(0, nSets)
cacheSetList = list(range(0, nSets))
return cacheSetList
@@ -509,7 +508,7 @@ def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0,
def printNB(nb_result):
for r in nb_result.items():
print r[0] + ': ' + str(r[1])
print(r[0] + ': ' + str(r[1]))
def hasL3Conflicts(addresses, clearHLAddrList, codeOffset):

View File

@@ -1,12 +1,12 @@
#!/usr/bin/python
from itertools import count, cycle, islice
from collections import namedtuple, OrderedDict
#!/usr/bin/env python3
import argparse
import sys
from itertools import count, cycle, islice
from collections import namedtuple, OrderedDict
from cacheLib import *
import cacheSim
from cacheLib import *
import logging
log = logging.getLogger(__name__)
@@ -37,7 +37,7 @@ def main():
policyClass = cacheSim.AllPolicies[args.sim]
seq = args.seq_init + (' ' + args.seq) * args.loop
hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets) / args.loop
print 'Hits: ' + str(hits)
print('Hits: ' + str(hits))
else:
nb = runCacheExperiment(args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL),
doNotUseOtherCBoxes=args.noUseOtherCBoxes, loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg)

View File

@@ -1,9 +1,7 @@
#!/usr/bin/python
import random
from itertools import count
from numpy import median
from cacheLib import *
import logging
@@ -85,7 +83,7 @@ class PLRUSim(ReplPolicySim):
def updateIndexBits(self, accIndex):
lastIdx = accIndex
for level in reversed(range(0, len(self.bits))):
curIdx = lastIdx/2
curIdx = lastIdx//2
self.bits[level][curIdx] = 1 - (lastIdx % 2)
lastIdx = curIdx
@@ -111,7 +109,7 @@ AllRandPLRUVariants = {
class LRU_PLRU4Sim(ReplPolicySim):
def __init__(self, assoc):
self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc/4)]
self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc//4)]
self.PLRUOrdered = list(self.PLRUs) # from MRU to LRU
def acc(self, block):
@@ -299,9 +297,9 @@ CommonPolicies = {
'SRRIP': AllDetQLRUVariants['QLRU_H00_M2_R0_U0_UMO'],
}
AllDetPolicies = dict(CommonPolicies.items() + AllDetQLRUVariants.items())
AllRandPolicies = dict(AllRandQLRUVariants.items() + AllRandPLRUVariants.items())
AllPolicies = dict(AllDetPolicies.items() + AllRandPolicies.items())
AllDetPolicies = dict(list(CommonPolicies.items()) + list(AllDetQLRUVariants.items()))
AllRandPolicies = dict(list(AllRandQLRUVariants.items()) + list(AllRandPLRUVariants.items()))
AllPolicies = dict(list(AllDetPolicies.items()) + list(AllRandPolicies.items()))
def parseCacheSetsStrSim(cacheSetsStr):
@@ -312,7 +310,7 @@ def parseCacheSetsStrSim(cacheSetsStr):
for s in cacheSetsStr.split(','):
if '-' in s:
first, last = s.split('-')[:2]
cacheSetList += range(int(first), int(last)+1)
cacheSetList += list(range(int(first), int(last)+1))
else:
cacheSetList.append(int(s))
@@ -381,8 +379,8 @@ def getPermutations(policySimClass, assoc):
initAges = getAges(initBlocks, seq, policySimClass, assoc)
accSeqStr = 'Access sequence: <wbinvd> ' + seq
print accSeqStr
print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}'
print(accSeqStr)
print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}')
blocks = ['B' + str(i) for i in range(0, assoc)]
baseSeq = ' '.join(initBlocks + blocks)
@@ -390,8 +388,8 @@ def getPermutations(policySimClass, assoc):
ages = getAges(blocks, baseSeq, policySimClass, assoc)
accSeqStr = 'Access sequence: <wbinvd> ' + baseSeq
print accSeqStr
print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}'
print(accSeqStr)
print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}')
blocksSortedByAge = [a[0] for a in sorted(ages.items(), key=lambda x: -x[1])] # most recent block first
@@ -408,5 +406,5 @@ def getPermutations(policySimClass, assoc):
break
perm[assoc-permAge] = bi
print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))
print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)))

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
import argparse
import sys
@@ -32,10 +33,10 @@ def main():
seq = re.sub('[?!]', '', ' '.join([args.seq_init, args.seq])).strip() + '?'
hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets)
if hits > 0:
print 'HIT'
print('HIT')
exit(1)
else:
print 'MISS'
print('MISS')
exit(0)
else:
setCount = len(parseCacheSetsStr(args.level, True, args.sets))
@@ -43,10 +44,10 @@ def main():
nb = runCacheExperiment(args.level, seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL),
loop=args.loop, wbinvd=(not args.noWbinvd))
if nb['L' + str(args.level) + '_HIT']/setCount > .5:
print 'HIT'
print('HIT')
exit(1)
else:
print 'MISS'
print('MISS')
exit(0)

View File

@@ -1,20 +1,20 @@
#!/usr/bin/python
from itertools import count
from collections import namedtuple, OrderedDict
#!/usr/bin/env python3
import argparse
import math
import os
import plotly.graph_objects as go
import re
import subprocess
import sys
from itertools import count
from collections import namedtuple, OrderedDict
from plotly.offline import plot
import plotly.graph_objects as go
import cacheSim
from cacheLib import *
from cacheGraph import *
import cacheSim
import logging
log = logging.getLogger(__name__)
@@ -36,8 +36,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
cBox=cBox, cSlice=cSlice)
accSeqStr = 'Access sequence: <wbinvd> ' + seq
print accSeqStr
print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}'
print(accSeqStr)
print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}')
event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent)
traces = [(b, [nb[event] for nb in nbDict[b]]) for b in initBlocks]
@@ -52,8 +52,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
cBox=cBox, cSlice=cSlice)
accSeqStr = 'Access sequence: <wbinvd> ' + baseSeq
print accSeqStr
print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}'
print(accSeqStr)
print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}')
event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent)
traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks]
@@ -77,7 +77,7 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
break
perm[assoc-permAge] = bi
print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))
print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)))
def main():

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
import argparse
import random
import sys
@@ -23,7 +24,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s
seq = initSeq + ' '.join(seqPrefix)
actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements)
sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets)
print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)
print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim))
if sim != actual:
break
@@ -32,7 +33,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s
seq = initSeq + ' '.join(tmpPrefix)
actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements)
sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets)
print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)
print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim))
if sim != actual:
seqPrefix = tmpPrefix
@@ -115,7 +116,7 @@ def main():
for seq in seqList:
fullSeq = ((args.initSeq + ' ') if args.initSeq else '') + seq
print fullSeq
print(fullSeq)
html += ['<tr><td>' + fullSeq + '</td>']
actualHits = set([getActualHits(fullSeq, args.level, args.sets, cBox, args.slice, args.nMeasurements) for _ in range(0, args.rep)])
@@ -151,14 +152,14 @@ def main():
html += ['</tr>']
if not args.randPolicies and not args.best:
print 'Possible policies: ' + ', '.join(possiblePolicies)
print('Possible policies: ' + ', '.join(possiblePolicies))
if not possiblePolicies: break
if not args.randPolicies and args.findCtrEx:
print ''
print 'Counter example(s): '
print('')
print('Counter example(s):')
for p, ctrEx in counterExamples.items():
print ' ' + p + ': ' + ctrEx
print(' ' + p + ': ' + ctrEx)
html += ['</table>', '</body>', '</html>']
@@ -166,10 +167,10 @@ def main():
f.write('\n'.join(html))
if not args.randPolicies and not args.best:
print 'Possible policies: ' + ', '.join(possiblePolicies)
print('Possible policies: ' + ', '.join(possiblePolicies))
else:
for p, d in reversed(sorted(dists.items(), key=lambda d: d[1])):
print p + ': ' + str(d)
print(p + ': ' + str(d))
if __name__ == "__main__":

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
import argparse
import random
@@ -31,7 +32,7 @@ def main():
nCBoxes = max(1, getNCBoxUnits())
nSlicesPerCBox = 1
if getCacheInfo(3).nSlices:
nSlicesPerCBox = getCacheInfo(3).nSlices / getCacheInfo(3).nCboxes
nSlicesPerCBox = getCacheInfo(3).nSlices // getCacheInfo(3).nCboxes
seqLength = (args.length if args.length is not None else assoc+1)
seq = ' '.join('B' + str(i) + '?' for i in range(0, seqLength))
@@ -42,7 +43,7 @@ def main():
html = ['<html>', '<head>', '<title>' + title + '</title>', '<script src="https://cdn.plot.ly/plotly-latest.min.js">', '</script>', '</head>', '<body>']
html += ['<h3>' + title + '</h3>']
setsForSlice = {cBox: {cSlice: range(0,nL3Sets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
setsForSlice = {cBox: {cSlice: list(range(0,nL3Sets)) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
L3HitsDict = {cBox: {cSlice: [[] for s in range(0, nL3Sets)] for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
prevOti = ''
@@ -69,11 +70,11 @@ def main():
nMeasurements=args.nMeasurements, agg='med')
if nb['L1_MISS'] < seqLength - .2:
print 'Hit in L1'
print('Hit in L1')
continue
if nb['L2_MISS'] < seqLength - .2:
print 'Hit in L2'
print('Hit in L2')
continue
L3Hits.append(nb['L3_HIT'])
@@ -121,7 +122,7 @@ def main():
with open(args.output ,'w') as f:
f.write('\n'.join(html))
print 'Output written to ' + args.output
print('Output written to ' + args.output)
if __name__ == "__main__":

View File

@@ -1,9 +1,9 @@
#!/usr/bin/python
#!/usr/bin/env python3
import argparse
import math
from plotly.offline import plot
import plotly.graph_objects as go
from plotly.offline import plot
from cacheLib import *
@@ -28,9 +28,9 @@ def main():
while pt <= args.endSize*1024:
tickvals.append(pt)
for x in ([int(math.pow(2, math.log(pt, 2) + i/16.0)) for i in range(0,16)] if pt < args.endSize*1024 else [pt]):
print x/1024
print(x//1024)
xValues.append(str(x))
addresses = range(0, x, args.stride)
addresses = list(range(0, x, args.stride))
nAddresses.append(len(addresses))
ec = getCodeForAddressLists([AddressList(addresses, False, False, False)], wbinvd=True)
nbDicts.append(runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit))
@@ -57,7 +57,7 @@ def main():
with open(args.output ,'w') as f:
f.write('\n'.join(html))
print 'Graph written to ' + args.output
print('Graph written to ' + args.output)
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
from collections import namedtuple
import xml.etree.ElementTree as ET
from xml.dom import minidom
@@ -69,13 +70,13 @@ def main():
iclassAsmDict.setdefault(re.sub('{.*} ', '', asm), set()).add(instrNode)
#for x in set(op for de in docList for op in de.operands):
# print x
# print(x)
xmlToDocDict = dict()
for de in sorted(docEntrySet):
if de.mnemonic not in iclassAsmDict:
print 'no XML entry found for ' + str(de)
print('no XML entry found for ' + str(de))
xmlFound = False
for instrNode in iclassAsmDict[de.mnemonic]:
@@ -135,15 +136,15 @@ def main():
elif (set(de.operands) == {None}) and (set(xmlToDocDict[instrNode].operands) != {None}):
pass
else:
print 'duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de))
print('duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de)))
else:
xmlFound = True
xmlToDocDict[instrNode] = de
if not xmlFound:
print 'no matching XML entry found for ' + str(de)
print('no matching XML entry found for ' + str(de))
print 'Found data for ' + str(len(xmlToDocDict)) + ' instruction variants'
print('Found data for ' + str(len(xmlToDocDict)) + ' instruction variants')
for instrNode, de in xmlToDocDict.items():
archNode = instrNode.find('./architecture[@name="{}"]'.format(args.arch))

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
from collections import namedtuple
import xml.etree.ElementTree as ET
from xml.dom import minidom
@@ -43,9 +44,9 @@ def main():
matchingDEs.remove(de)
if len(matchingDEs) == 0:
print 'No matching iform: ' + iform
print('No matching iform: ' + iform)
elif len(matchingDEs) > 1:
print 'Multiple matching iforms: ' + iform
print('Multiple matching iforms: ' + iform)
else:
de = next(iter(matchingDEs))

View File

@@ -1,8 +1,9 @@
#!/usr/bin/python
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
import argparse
import re
import urllib
import urllib.request
from xml.dom import minidom
from utils import *
@@ -12,7 +13,7 @@ def main():
parser.add_argument("output", help="Output XML file")
args = parser.parse_args()
html = urllib.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*')
html = urllib.request.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*')
lines = re.findall('href="\./(.*?)">(.*?)</a>.*?</td><td>(.*?)</td>', html) # Example: ('ADC.html', 'ADC', 'Add with Carry'),
lineDict = {(line[0],line[1]):line for line in lines}
@@ -128,7 +129,7 @@ def main():
matchingLines.append(line)
if len(matchingLines) > 1:
print 'Duplicate link found for ' + iclass
print('Duplicate link found for ' + iclass)
exit(1)
instrNode.attrib['url'] = 'uops.info/html-instr/' + canonicalizeInstrString(instrNode.attrib['string']) + '.html'

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
import argparse
import sys
@@ -66,13 +67,13 @@ def main():
else:
portsDiff = True
nPortsDiff += 1
if args.verbose: print 'PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts)
if args.verbose: print('PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts))
else:
nPortsMeasurementOnly += 1
else:
if otherPorts:
nPortsOtherOnly += 1
if args.verbose: print 'PortsOtherOnly: ' + instrNode.attrib['string']
if args.verbose: print('PortsOtherOnly: ' + instrNode.attrib['string'])
otherUops = [v for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('uops') and v.replace('.','',1).isdigit()]
mUops = ([v for a,v in measurementNode.attrib.items() if a.startswith('uops') and not 'retire_slots' in a] if measurementNode is not None else [])
@@ -86,13 +87,13 @@ def main():
nUopsEqPortsDiff += int(portsDiff)
else:
nUopsDiff += 1
if args.verbose: print 'UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops)
if args.verbose: print('UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops))
else:
nUopsMeasurementOnly += 1
else:
if otherUops:
nUopsOtherOnly += 1
if args.verbose: print 'UopsOtherOnly: ' + instrNode.attrib['string']
if args.verbose: print('UopsOtherOnly: ' + instrNode.attrib['string'])
otherLatencies = [float(v) for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('latency') and v.replace('.','',1).isdigit()]
@@ -113,54 +114,54 @@ def main():
nLatUBClose += 1
else:
nLatUBIncorrect += 1
if args.verbose: print 'LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)
if args.verbose: print('LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies))
else:
nLatNoUB += 1
if maxLat in otherLatencies:
nLatNoUBMaxEq += 1
else:
nLatNoUBMaxDiff += 1
if args.verbose: print 'LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)
if args.verbose: print('LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies))
else:
nLatMeasurementOnly += 1
else:
if otherLatencies:
nLatOtherOnly += 1
if args.verbose: print 'LatOtherOnly: ' + instrNode.attrib['string']
if args.verbose: print('LatOtherOnly: ' + instrNode.attrib['string'])
print 'Ports:'
print ' Measurement data only: ' + str(nPortsMeasurementOnly)
print ' Other data only: ' + str(nPortsOtherOnly)
print ' Both: ' + str(nPortsBoth)
print ' Eq: ' + str(nPortsEq)
print ' Diff: ' + str(nPortsDiff)
print ''
print('Ports:')
print(' Measurement data only: ' + str(nPortsMeasurementOnly))
print(' Other data only: ' + str(nPortsOtherOnly))
print(' Both: ' + str(nPortsBoth))
print(' Eq: ' + str(nPortsEq))
print(' Diff: ' + str(nPortsDiff))
print('')
print 'Uops:'
print ' Measurement data only: ' + str(nUopsMeasurementOnly)
print ' Other data only: ' + str(nUopsOtherOnly)
print ' Both: ' + str(nUopsBoth)
print ' Eq: ' + str(nUopsEq)
print ' PortsEq: ' + str(nUopsEqPortsEq)
print ' PortsDiff: ' + str(nUopsEqPortsDiff)
print ' Diff: ' + str(nUopsDiff)
print ''
print('Uops:')
print(' Measurement data only: ' + str(nUopsMeasurementOnly))
print(' Other data only: ' + str(nUopsOtherOnly))
print(' Both: ' + str(nUopsBoth))
print(' Eq: ' + str(nUopsEq))
print(' PortsEq: ' + str(nUopsEqPortsEq))
print(' PortsDiff: ' + str(nUopsEqPortsDiff))
print(' Diff: ' + str(nUopsDiff))
print('')
print 'Latency:'
print ' Measurement data only: ' + str(nLatMeasurementOnly)
print ' Other data only: ' + str(nLatOtherOnly)
print ' Both: ' + str(nLatBoth)
print ' Exact: ' + str(nLatNoUB)
print ' Eq (Max): ' + str(nLatNoUBMaxEq)
print ' Diff (Max): ' + str(nLatNoUBMaxDiff)
print ' Upper Bound: ' + str(nLatUB)
print ' Correct: ' + str(nLatUBCorrect)
print ' Exact: ' + str(nLatUBExact)
print ' Close: ' + str(nLatUBClose)
print ' Incorrect: ' + str(nLatUBIncorrect)
print ''
print('Latency:')
print(' Measurement data only: ' + str(nLatMeasurementOnly))
print(' Other data only: ' + str(nLatOtherOnly))
print(' Both: ' + str(nLatBoth))
print(' Exact: ' + str(nLatNoUB))
print(' Eq (Max): ' + str(nLatNoUBMaxEq))
print(' Diff (Max): ' + str(nLatNoUBMaxDiff))
print(' Upper Bound: ' + str(nLatUB))
print(' Correct: ' + str(nLatUBCorrect))
print(' Exact: ' + str(nLatUBExact))
print(' Close: ' + str(nLatUBClose))
print(' Incorrect: ' + str(nLatUBIncorrect))
print('')
print 'Throughput:'
print('Throughput:')
for TP_m, TP_o in [('TP', 'TP'), ('TP_ports', 'TP'), ('TP', 'TP_ports'), ('TP_ports', 'TP_ports')]:
nTPMeasurementOnly = 0
nTPOtherOnly = 0
@@ -184,28 +185,28 @@ def main():
nTPEq += 1
else:
nTPDiff += 1
if args.verbose: print 'TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)
if args.verbose: print('TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs))
diff = min(abs(float(m)-float(o)) for o in otherTPs for m in mTPs)
if diff <= .1:
nTPClose += 1
else:
nTPNotClose += 1
if args.verbose: print 'TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)
if args.verbose: print('TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs))
else:
nTPMeasurementOnly += 1
else:
if otherTPs:
nTPOtherOnly += 1
if args.verbose: print 'TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string'])
if args.verbose: print('TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string']))
print ' {} (measurements) - {} (other):'.format(TP_m, TP_o)
print ' Measurement data only: ' + str(nTPMeasurementOnly)
print ' Other data only: ' + str(nTPOtherOnly)
print ' Both: ' + str(nTPBoth)
print ' Eq: ' + str(nTPEq)
print ' Diff: ' + str(nTPDiff)
print ' Close: ' + str(nTPClose)
print ' NotClose: ' + str(nTPNotClose)
print(' {} (measurements) - {} (other):'.format(TP_m, TP_o))
print(' Measurement data only: ' + str(nTPMeasurementOnly))
print(' Other data only: ' + str(nTPOtherOnly))
print(' Both: ' + str(nTPBoth))
print(' Eq: ' + str(nTPEq))
print(' Diff: ' + str(nTPDiff))
print(' Close: ' + str(nTPClose))
print(' NotClose: ' + str(nTPNotClose))
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
from xml.dom import minidom
import argparse
@@ -29,7 +30,7 @@ def main():
for instrStr in sorted(instrNodeDict1):
instrNode1 = instrNodeDict1[instrStr]
if not instrStr in instrNodeDict2:
print 'No matching entry found for ' + instrStr
print('No matching entry found for ' + instrStr)
continue
instrNode2 = instrNodeDict2[instrStr]
for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'):
@@ -40,44 +41,43 @@ def main():
if tp1 != tp2:
tpDiff += 1
print instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2)
print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2))
if args.lat:
for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')):
latStr1 = ET.tostring(latNode1, encoding='utf-8').strip()
latStr2 = ET.tostring(latNode2, encoding='utf-8').strip()
latStr1 = ET.tostring(latNode1, encoding='utf-8').decode().strip()
latStr2 = ET.tostring(latNode2, encoding='utf-8').decode().strip()
if latStr1 != latStr2:
latDiff += 1
print instrStr
print ' ' + latStr1
print ' ' + latStr2
print(' ' + latStr1)
print(' ' + latStr2)
if args.ports:
p1 = mNode1.attrib.get('ports', '')
p2 = mNode2.attrib.get('ports', '')
if p1 != p2:
portsDiff += 1
print instrStr + ' - P1: ' + p1 + ' - P2: ' + p2
print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2)
if not args.TP and not args.lat and not args.ports:
xmlStr1 = ET.tostring(mNode1, encoding='utf-8').strip()
xmlStr2 = ET.tostring(mNode2, encoding='utf-8').strip()
xmlStr1 = ET.tostring(mNode1, encoding='utf-8').decode().strip()
xmlStr2 = ET.tostring(mNode2, encoding='utf-8').decode().strip()
if xmlStr1 != xmlStr2:
print '-------------------------------'
print instrStr
print xmlStr1
print xmlStr2
print '-------------------------------'
print('-------------------------------')
print(instrStr)
print(xmlStr1)
print(xmlStr2)
print('-------------------------------')
if args.TP:
print 'TPDiff: ' + str(tpDiff)
print('TPDiff: ' + str(tpDiff))
if args.lat:
print 'LatDiff: ' + str(latDiff)
print('LatDiff: ' + str(latDiff))
if args.ports:
print 'portsDiff: ' + str(portsDiff)
print('portsDiff: ' + str(portsDiff))
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
from xml.dom import minidom
@@ -79,7 +80,7 @@ def getIndexReg(instrNode, opNode):
# registers that are not used as implicit registers should come first; RAX (and parts of it) should come last, as some instructions have special encodings for that
# prefer low registers to high registers
def sortRegs(regsList):
return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, map(int, re.findall('\d+',r)), r))
return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, list(map(int, re.findall('\d+',r))), r))
# Initialize registers and memory
@@ -115,7 +116,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
init += ['MOV {}, 0'.format(reg)]
elif 'MM' in regPrefix and xtype.startswith('f'):
init += ['MOV RAX, 0x4000000040000000']
for i in range(0, getRegSize(reg)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
for i in range(0, getRegSize(reg)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
if isAVXInstr(instrNode):
init += ['VMOVUPD ' + reg + ', [R14]']
@@ -128,7 +129,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
elif opNode.attrib['type'] == 'mem':
if xtype.startswith('f'):
init += ['MOV RAX, 0x4000000040000000']
for i in range(0, int(opNode.attrib['width'])/8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX']
for i in range(0, int(opNode.attrib['width'])//8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX']
for opNode in instrNode.findall('./operand[@type="mem"]'):
if opNode.attrib.get('suppressed', '0') == '1': continue
@@ -179,7 +180,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
initObjFile = None
lateInitObjFile=None
if initCode:
if debugOutput: print 'init: ' + initCode
if debugOutput: print('init: ' + initCode)
objFile = '/tmp/ramdisk/init.o'
if useLateInit:
lateInitObjFile = objFile
@@ -191,7 +192,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
localHtmlReports.append('<li>Init: <pre>' + re.sub(';[ \t]*(.)', r';\n\1', initCode) + '</pre></li>\n')
localHtmlReports.append('<li><a href="javascript:;" onclick="this.outerHTML = \'<pre>' + nanoBenchCmd + '</pre>\'">Show nanoBench command</a></li>\n')
if debugOutput: print nanoBenchCmd
if debugOutput: print(nanoBenchCmd)
setNanoBenchParameters(unrollCount=unrollCount, loopCount=loopCount, warmUpCount=warmUpCount, basicMode=basicMode)
@@ -223,19 +224,19 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
if maxRepeat>0:
if any(v<-0.05 for v in ret.values()):
print 'Repeating experiment because there was a value < 0'
print('Repeating experiment because there was a value < 0')
return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1)
#sumPortUops = sum(v for e,v in ret.items() if 'PORT' in e and not '4' in e)
#if (sumPortUops % 1) > .2 and (sumPortUops % 1) < .8:
# print 'Repeating experiment because the sum of the port usages is not an integer'
# print ret
# print('Repeating experiment because the sum of the port usages is not an integer')
# print(ret)
# return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=basicMode, htmlReports=htmlReports, maxRepeat=maxRepeat-1)
if any('PORT' in e for e in ret):
maxPortUops = max(v/(len(e)-9) for e,v in ret.items() if 'PORT' in e)
if maxPortUops * .98 > ret['Core cycles']:
print 'Repeating experiment because there were more uops on a port than core cycles'
print('Repeating experiment because there were more uops on a port than core cycles')
return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1)
if htmlReports is not None:
@@ -250,10 +251,10 @@ def writeFile(fileName, content):
def getMachineCode(objFile):
try:
machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile])
machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile]).decode()
return machineCode.partition('<.text>:\n')[2]
except subprocess.CalledProcessError as e:
print "Error (getMachineCode): " + str(e)
print('Error (getMachineCode): ' + str(e))
def getCodeLength(asmCode):
@@ -420,7 +421,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
ignoreRegs |= set(doNotWriteRegs)|globalDoNotWriteRegs|set(opRegDict.values())
if operandNode.attrib.get('r', '0') == '1':
ignoreRegs |= set(doNotReadRegs)|writtenRegs|readRegs|set(opRegDict.values())
regsList = filter(lambda x: not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs), regsList)
regsList = [x for x in regsList if not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs)]
if not regsList:
return None;
reg = sortRegs(regsList)[0]
@@ -507,7 +508,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
def createIacaAsmFile(fileName, prefixInstr, prefixRep, instr):
asm = '.intel_syntax noprefix\n .byte 0x0F, 0x0B; mov ebx, 111; .byte 0x64, 0x67, 0x90\n'
if prefixInstr:
for i in xrange(prefixRep):
for i in range(prefixRep):
asm += prefixInstr + "\n"
asm += instr + "\n"
asm += "1:\n"
@@ -521,9 +522,9 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
readRegs = instrInstance.readRegs
writtenRegs = instrInstance.writtenRegs
if debugOutput: print ' instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs)
if debugOutput: print(' instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs))
blockInstrsList = getIndependentInstructions(blockInstrNode, True, False, writtenRegs|readRegs, writtenRegs|readRegs, 64)
if debugOutput: print ' bIL: ' + str(blockInstrsList)
if debugOutput: print(' bIL: ' + str(blockInstrsList))
htmlReports.append('<hr><h3>With blocking instructions for port' +
('s {' if len(blockedPorts)>1 else ' ') +
@@ -537,11 +538,11 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
print "Error: " + e.output
print('Error: ' + e.output)
return None
if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
print "IACA error"
print('IACA error')
return None
allPortsLine = re.search('\| Cycles \|.*', iacaOut).group(0)
@@ -584,7 +585,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
blockInstrAsm = ';'.join(islice(cycle(x.asm for x in blockInstrsList), blockInstrRep))
unrollCount = 1000/blockInstrRep # make sure that instrs. fit into icache
unrollCount = 1000//blockInstrRep # make sure that instrs. fit into icache
if isAMDCPU(): unrollCount = max(unrollCount, 100) # ZEN+ sometimes undercounts FP usage if code is short
@@ -596,7 +597,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
if float(measurementResult['Core cycles']) < -10:
#something went wrong; this happens for example on HSW with long sequences of JMP instructions
if debugOutput: print "Core cycles < -10 in getUopsOnBlockedPorts"
if debugOutput: print('Core cycles < -10 in getUopsOnBlockedPorts')
if sum(u for p, u in measurementResult.items() if ('UOPS_PORT' in p or 'FpuPipeAssignment.Total' in p)) < blockInstrRep-.5:
# something went wrong; fewer uops on ports than blockInstrRep
@@ -643,7 +644,7 @@ def getIndependentInstructions(instrNode, useDistinctRegs, useIndexedAddr, doNot
maxMemWidth = 0
for memNode in instrNode.findall('./operand[@type="mem"][@w="1"]'):
maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0'))/8)
maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0')) // 8)
offset += maxMemWidth
independentInstructions.append(instrI)
@@ -694,17 +695,17 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports):
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
print "Error: " + e.output
print('Error: ' + e.output)
return None
if debugOutput:
print instrNode.attrib['iform'] + ' - NoInteriteration'
print iaca_tp
print(instrNode.attrib['iform'] + ' - NoInteriteration')
print(iaca_tp)
htmlReports.append('<pre>' + iaca_tp + '</pre>\n')
if not iaca_tp or ' !' in iaca_tp or ' X' in iaca_tp or ' 0X' in iaca_tp or not 'Total Num Of Uops' in iaca_tp:
print "IACA error"
print('IACA error')
return None
cycles = float(iaca_tp.split('\n')[3].split()[2])
@@ -958,7 +959,7 @@ def getTPConfigsForDiv(instrNode):
if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM'
config.init = ['MOV RAX, ' + arg]
for i in range(0, getRegSize(regType)/8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX']
for i in range(0, getRegSize(regType)//8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX']
targetRegIdx = min(int(opNode.attrib['idx']) for opNode in instrNode.findall('./operand') if opNode.text and regType in opNode.text)
if memDivisor:
@@ -997,11 +998,11 @@ TPResult = namedtuple('TPResult', ['TP', 'TP_loop', 'TP_noLoop', 'TP_noDepBreaki
def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports):
configs = getTPConfigs(instrNode, useDistinctRegs, useIndexedAddr)
minTP = sys.maxint
minTP_loop = sys.maxint
minTP_noLoop = sys.maxint
minTP_noDepBreaking_noLoop = sys.maxint
minTP_single = sys.maxint
minTP = sys.maxsize
minTP_loop = sys.maxsize
minTP_noLoop = sys.maxsize
minTP_noDepBreaking_noLoop = sys.maxsize
minTP_single = sys.maxsize
if useIACA:
config = configs[0] # consider only first config as IACA does not seem to consider different values in registers
@@ -1024,17 +1025,17 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
logging.warn('Error: ' + e.output)
if minTP != sys.maxint:
if minTP != sys.maxsize:
htmlReports.append('<pre>' + e.output + '</pre>\n')
continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
else:
return None
if not iaca_out or ' ! ' in iaca_out or ' X ' in iaca_out or ' 0X ' in iaca_out or not 'Total Num Of Uops' in iaca_out:
print "IACA error"
print('IACA error')
return None
print instrNode.attrib['iform'] + ' - throughput'
print(instrNode.attrib['iform'] + ' - throughput')
htmlReports.append('<pre>' + iaca_out + '</pre>\n')
@@ -1087,7 +1088,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
instrIList = config.independentInstrs
instrLen = getCodeLength(instrIList[0].asm)
for ic in sorted(set([1, min(4, len(instrIList)), min(8, len(instrIList)), len(instrIList)])):
if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break
if len(instrIList) > 1: htmlReports.append('<h3 style="margin-left: 25px">With ' + str(ic) + ' independent instruction' + ('s' if ic>1 else '') + '</h3>\n')
htmlReports.append('<div style="margin-left: 50px">\n')
@@ -1095,7 +1096,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
init = list(chain.from_iterable(i.regMemInit for i in instrIList[0:ic])) + config.init
for useDepBreakingInstrs in ([False, True] if config.depBreakingInstrs else [False]):
if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break
depBreakingInstrs = ''
if useDepBreakingInstrs:
@@ -1103,7 +1104,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
htmlReports.append('<h4>With additional dependency-breaking instructions</h4>\n')
for repType in ['unrollOnly', 'loopSmall', 'loopBig']:
if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break
paddingTypes = ['']
if ((repType != 'unrollOnly') and (uopsMITE is not None) and (not uopsMS) and (math.ceil(32.0/instrLen) * uopsMITE > 18)
@@ -1138,7 +1139,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
else:
loopCount = 100
unrollCount *= 10
if minTP < sys.maxint and minTP > 100:
if minTP < sys.maxsize and minTP > 100:
unrollCount = 1
loopCount = 10
@@ -1162,7 +1163,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
#if any('PORT' in e for e in result):
# maxPortUops = max(v/(len(e)-9) for e,v in result.items() if e.startswith('UOPS_PORT') and not '4' in e)
# if maxPortUops * .98 > result['Core cycles']:
# print 'More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles'])
# print('More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles']))
# #invalid = True
#if not invalid:
@@ -1174,7 +1175,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
else:
minTP_loop = min(minTP_loop, cycles)
if ic == 1 and (minTP == sys.maxint or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
if ic == 1 and (minTP == sys.maxsize or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
minConfig = config
minTP_single = min(minTP_single, cycles)
@@ -1217,7 +1218,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
htmlReports.append('</div>')
if minTP < sys.maxint:
if minTP < sys.maxsize:
return TPResult(minTP, minTP_loop, minTP_noLoop, minTP_noDepBreaking_noLoop, minTP_single, uops, uopsFused, uopsMITE, uopsMS, divCycles, ILD_stalls,
complexDec, nAvailableSimpleDecoders, minConfig, ports_dict)
@@ -1246,7 +1247,7 @@ def getBasicLatencies(instrNodeList):
movsxResult = runExperiment(instrNodeDict['MOVSXD (R64, R32)'], 'MOVSX RAX, EAX')
movsxCycles = int(round(movsxResult['Core cycles']))
if movsxCycles != 1:
print 'Latency of MOVSX must be 1'
print('Latency of MOVSX must be 1')
sys.exit()
basicLatency['MOVSX'] = movsxCycles
@@ -1275,7 +1276,7 @@ def getBasicLatencies(instrNodeList):
testSetResult = runExperiment(None, 'TEST AL, AL; SET' + flag[0] + ' AL')
testSetCycles = int(round(testSetResult['Core cycles']))
if not testSetCycles == 2:
print 'Latencies of TEST and SET' + flag[0] + ' must be 1'
print('Latencies of TEST and SET' + flag[0] + ' must be 1')
sys.exit()
basicLatency['SET' + flag[0]] = 1
basicLatency['TEST'] = 1
@@ -1297,7 +1298,7 @@ def getBasicLatencies(instrNodeList):
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
basicLatency[instr] = int(round(result['Core cycles']))
if filter(lambda x: x.findall('[@iclass="VANDPS"]'), instrNodeList):
if any(x for x in instrNodeList if x.findall('[@iclass="VANDPS"]')):
for instr in ['VANDPS', 'VANDPD', 'VORPS', 'VORPD', 'VPAND', 'VPOR']:
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, XMM)'], instr + ' XMM1, XMM1, XMM1')
basicLatency[instr] = int(round(result['Core cycles']))
@@ -1310,7 +1311,7 @@ def getBasicLatencies(instrNodeList):
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
basicLatency[instr] = int(round(result['Core cycles']))
if filter(lambda x: x.findall('[@extension="AVX512EVEX"]'), instrNodeList):
if any(x for x in instrNodeList if x.findall('[@extension="AVX512EVEX"]')):
kmovq_result = runExperiment(instrNodeDict['KMOVQ (K, K)'], 'KMOVQ K1, K1')
basicLatency['KMOVQ'] = int(round(kmovq_result['Core cycles']))
@@ -1321,7 +1322,7 @@ def getBasicLatencies(instrNodeList):
basicLatency['VMOVUPS_' + regType + '_' + 'K'] = vmovups_cycles
if not vmovups_uops == 1:
print 'VMOVUPS must have exactly 1 uop'
print('VMOVUPS must have exactly 1 uop')
sys.exit()
vpmovq2m_result = runExperiment(instrNodeDict['VPMOVQ2M (K, ' + regType + ')'],
@@ -1337,7 +1338,7 @@ def getBasicLatencies(instrNodeList):
mov_10movsx_mov_result = runExperiment(None, 'mov ' + reg + ', [r14];' + ';'.join(10*['MOVSX R12, R12w']) + '; mov [r14], ' + reg , unrollCount=100)
basicLatency['MOV_10MOVSX_MOV_'+str(memWidth)] = int(round(mov_10movsx_mov_result['Core cycles']))
print 'Basic Latencies: ' + str(basicLatency)
print('Basic Latencies: ' + str(basicLatency))
# Returns a dict {opNode: instr}, s.t. opNode is both read and written, and instr breaks the dependency
# Returns a list of dependency breaking instructions for operands that are both read and written (with the exception of ignoreOperand, if specified).
@@ -1541,8 +1542,8 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep):
init = ['MOV RAX, ' + dividend]
init += ['MOV RBX, ' + divisor]
for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RBX']
for i in range(64, 64+getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RBX']
for i in range(64, 64+getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
if instrNode.attrib['iclass'] in ['DIVSS', 'DIVPS', 'DIVSD', 'DIVPD']:
init += ['MOVUP' + dataType + ' XMM1, [R14+64]']
@@ -1671,7 +1672,7 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep):
if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM'
init = ['MOV RAX, ' + arg]
for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
targetReg = regType + '0'
sourceBaseReg = regType + '1'
@@ -1778,7 +1779,7 @@ def getLatConfigsFromMemToReg(instrNode, instrI, memOpNode, targetReg, addrReg,
if memOpNode.attrib['width'] != chainOpNode1.attrib['width']: continue
if memOpNode.attrib.get('VSIB', '') != chainOpNode1.attrib.get('VSIB', ''): continue
for chainOpNode2 in filter(lambda x: targetReg in x.text.split(','), chainInstrNode.findall('./operand[@type="reg"][@w="1"]')):
for chainOpNode2 in [x for x in chainInstrNode.findall('./operand[@type="reg"][@w="1"]') if targetReg in x.text.split(',')]:
if chainOpNode2.attrib.get('optional', '') == '1': continue
chainsInstr = getInstrInstanceFromNode(chainInstrNode, [targetReg], [targetReg], True, {int(chainOpNode2.attrib['idx']):targetReg}).asm
result.append(LatConfig(instrI, chainInstrs=chainsInstr, chainLatency=1))
@@ -1971,7 +1972,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
else:
if len(regs2) == 1:
reg2 = sortRegs(regs2)[0]
otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg2), regs1)
otherRegs = [x for x in regs1 if getCanonicalReg(x) != getCanonicalReg(reg2)]
if otherRegs:
reg1 = sortRegs(otherRegs)[0]
else:
@@ -1988,7 +1989,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
reg2 = r
break
else:
otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg1), regs2)
otherRegs = [x for x in regs2 if getCanonicalReg(x) != getCanonicalReg(reg1)]
if otherRegs:
reg2 = sortRegs(otherRegs)[0]
@@ -2053,7 +2054,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
chainInstrInt, chainLatencyInt = getChainInstrForVectorRegs(instrNode, reg2, reg1, cRep, 'Int')
configList.append(LatConfig(instrI, chainInstrs=chainInstrInt, chainLatency=chainLatencyInt))
else:
print 'invalid reg prefix: ' + reg1Prefix
print('invalid reg prefix: ' + reg1Prefix)
return None
else:
configList.isUpperBound = True
@@ -2143,7 +2144,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
configList.extend(getLatConfigsFromMemToReg(instrNode, instrI, targetNode, reg, addrReg, cRep))
else:
# ToDo
print 'unsupported reg to mem'
print('unsupported reg to mem')
return None
elif startNode.attrib['type'] == 'flags':
#################
@@ -2225,7 +2226,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
if suppressedStart:
if not regs.issubset(GPRegs):
print 'read from suppressed mem to non-GPR reg not yet supported'
print('read from suppressed mem to non-GPR reg not yet supported')
return None
instrI = getInstrInstanceFromNode(instrNode, [addrReg, indexReg, 'R12'], [addrReg, indexReg, 'R12'], useDistinctRegs, {targetNodeIdx:reg},
@@ -2358,11 +2359,11 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
print "Error: " + e.output
print('Error: ' + e.output)
return None
if '!' in iaca_lat or not 'Latency' in iaca_lat:
print "IACA error"
print('IACA error')
return None
latency = iaca_lat.split('\n')[3].split()[1]
@@ -2444,7 +2445,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
latConfigLists = getLatConfigLists(instrNode, opNode1, opNode2, useDistinctRegs, addrMem, tpDict)
if latConfigLists is None: continue
minLat = sys.maxint
minLat = sys.maxsize
maxLat = 0
minLatIsUpperBound = False
@@ -2453,7 +2454,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
configHtmlReports = []
for latConfigList in latConfigLists:
minLatForCurList = sys.maxint
minLatForCurList = sys.maxsize
if not any((latConfig.init or latConfig.instrI.regMemInit) for latConfig in latConfigList.latConfigs):
# Test different register values for read-only registers
@@ -2463,7 +2464,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
readOnlyRegOpNodeIdx = int(readOnlyRegOpNode.attrib['idx'])
for latConfig in list(latConfigList.latConfigs):
if not readOnlyRegOpNodeIdx in latConfig.instrI.opRegDict:
print 'readOnlyRegOpNodeIdx not found in opRegDict'
print('readOnlyRegOpNodeIdx not found in opRegDict')
continue
reg = latConfig.instrI.opRegDict[readOnlyRegOpNodeIdx]
if (not reg in GPRegs) or (reg in High8Regs) or (reg in globalDoNotWriteRegs) or (reg in specialRegs): continue
@@ -2551,8 +2552,8 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
else:
latConfig.chainInstrs += 'VPCMPD {0}, {1}, {1}, 7;'.format(maskReg, 'XMM15')
mlDP = sys.maxint
mlnoDP = sys.maxint
mlDP = sys.maxsize
mlnoDP = sys.maxsize
for latConfig in latConfigList.latConfigs:
configI += 1
@@ -2576,7 +2577,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
configHtmlReports.append('</ul>\n')
if not measurementResult:
print 'no result found'
print('no result found')
continue
cycles = measurementResult['Core cycles']
@@ -2869,7 +2870,7 @@ def main():
else:
cpu = cpuid.CPUID()
arch = cpuid.micro_arch(cpu)
print cpuid.get_basic_info(cpu)
print(cpuid.get_basic_info(cpu))
if arch == 'unknown':
exit(1)
@@ -2906,7 +2907,7 @@ def main():
try:
subprocess.check_output('mkdir -p /tmp/ramdisk; sudo mount -t tmpfs -o size=100M none /tmp/ramdisk/', shell=True)
except subprocess.CalledProcessError as e:
print "Could not create ramdisk " + e.output
print('Could not create ramdisk ' + e.output)
exit(1)
XMLRoot = ET.parse(args.input).getroot()
@@ -2957,7 +2958,7 @@ def main():
else:
for i, instrNode in enumerate(instrNodeList):
#if not 'RCR (R64, 1)' in instrNode.attrib['string']: continue
print 'Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
print('Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')
htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Throughput and Uops' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
@@ -2968,7 +2969,7 @@ def main():
if hasExplMemOp: htmlReports.append('<h2 id="nonIndexedAddr">With a non-indexed addressing mode</h2>\n')
tpResult = getThroughputAndUops(instrNode, True, False, htmlReports)
print instrNode.attrib['string'] + " - tp: " + str(tpResult)
print(instrNode.attrib['string'] + " - tp: " + str(tpResult))
if tpResult:
tpDict[instrNode] = tpResult
@@ -3005,7 +3006,7 @@ def main():
with open('tp_' + arch + '.pickle', 'wb') as f:
pickle.dump((tpDict, tpDictSameReg, tpDictIndexedAddr, tpDictNoInteriteration), f)
num_ports = len(tpDict.values()[0].unblocked_ports)
num_ports = len(list(tpDict.values())[0].unblocked_ports)
########################
# Latency
@@ -3023,13 +3024,13 @@ def main():
elif not useIACA or iacaVersion == '2.1':
for i, instrNode in enumerate(instrNodeList):
#if not 'DIV' in instrNode.attrib['string']: continue
print 'Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
print('Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')
htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Latency' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
lat = getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports)
if lat is not None:
if debugOutput: print instrNode.attrib['iform'] + ': ' + str(lat)
if debugOutput: print(instrNode.attrib['iform'] + ': ' + str(lat))
latencyDict[instrNode] = lat
writeHtmlFile('html-lat/'+arch, instrNode, instrNode.attrib['string'], ''.join(htmlReports))
with open('lat_' + arch + '.pickle', 'wb') as f:
@@ -3080,21 +3081,21 @@ def main():
# their throughput is limited to 1 per cycle; thus, they are disallowed by the TP_noDepBreaking_noLoop check above
disallowedBlockingInstrs.remove(instrNodeDict['MOVD (R32, XMM)'])
print 'disallowedBlockingInstrs'
print('disallowedBlockingInstrs')
for instrNode in disallowedBlockingInstrs:
print ' ' + str(instrNode.attrib['string'])
print(' ' + str(instrNode.attrib['string']))
print 'tpDict'
print('tpDict')
for instr, tpResult in tpDict.items():
print ' ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports)
print(' ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports))
# we cannot start higher than .79 as IACA has .2 uops on each port for a port usage of, e.g., 1*p1256
# using uops_dict instead can be problematic because in IACA the uops on the individual ports do not always add up to this value
oneUopInstrs = [instr for instr, tpResult in tpDict.items() if instr not in disallowedBlockingInstrs and .79 < sum([v for v in tpResult.unblocked_ports.values() if v>.1]) < 1.11]
print 'oneUopInstrs'
print('oneUopInstrs')
for instrNode in oneUopInstrs:
print ' ' + str(instrNode.attrib['string'])
print(' ' + str(instrNode.attrib['string']))
# dicts from port combination to a set of instructions (either not containing AVX or SSE instructions bec. of transition penalty) that always uses these ports
blockingInstructionsDictNonAVX_set = {}
blockingInstructionsDictNonSSE_set = {}
@@ -3102,7 +3103,7 @@ def main():
for instrNode in oneUopInstrs:
usedPorts = frozenset({p for p, x in tpDict[instrNode].unblocked_ports.items() if x>0.1})
if usedPorts:
print instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]')))
print(instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]'))))
if not isSSEInstr(instrNode):
if not usedPorts in blockingInstructionsDictNonSSE_set: blockingInstructionsDictNonSSE_set[usedPorts] = set()
@@ -3118,10 +3119,10 @@ def main():
blockingInstructionsDictNonSSE = {comb: next(iter(sorted(instr_set, key=sort_key))) for comb, instr_set in blockingInstructionsDictNonSSE_set.items()}
#for comb, instr_set in blockingInstructionsDictNonAVX_set.items():
# print comb
# print [x.attrib['string'] for x in sorted(instr_set, key=sort_key)]
# print(comb)
# print([x.attrib['string'] for x in sorted(instr_set, key=sort_key)])
#print str(blockingInstructionsDictNonAVX.items())
#print(str(blockingInstructionsDictNonAVX.items()))
if isIntelCPU():
# mov to mem has always two uops: store address and store data; there is no instruction that uses just one of them
@@ -3138,26 +3139,26 @@ def main():
if storeAddressPorts not in blockingInstructionsDictNonAVX: blockingInstructionsDictNonAVX[storeAddressPorts] = movMemInstrNode
if storeAddressPorts not in blockingInstructionsDictNonSSE: blockingInstructionsDictNonSSE[storeAddressPorts] = movMemInstrNode
print 'Non-AVX:'
print('Non-AVX:')
for k,v in blockingInstructionsDictNonAVX.items():
print str(k) + ': ' + v.attrib['iform']
print 'Non-SSE:'
print(str(k) + ': ' + v.attrib['iform'])
print('Non-SSE:')
for k,v in blockingInstructionsDictNonSSE.items():
print str(k) + ': ' + v.attrib['iform']
print(str(k) + ': ' + v.attrib['iform'])
sortedPortCombinationsNonAVX = sorted(blockingInstructionsDictNonAVX.keys(), key=lambda x:(len(x), sorted(x)))
sortedPortCombinationsNonSSE = sorted(blockingInstructionsDictNonSSE.keys(), key=lambda x:(len(x), sorted(x)))
print 'sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX)
print('sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX))
for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (tpDict[x].config.preInstrNodes, x.attrib['string']))):
for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (len(tpDict[x].config.preInstrNodes), x.attrib['string']))):
#if not 'CVTPD2PI' in instrNode.attrib['string']: continue
print 'Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
print('Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')
htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Port Usage' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>']
for useDistinctRegs in ([True, False] if instrNode in tpDictSameReg else [True]):
for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]):
for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]):
tpResult = None
if not useDistinctRegs:
@@ -3176,7 +3177,7 @@ def main():
# use abs because on, e.g., IVB port usages might be smaller in the second half of the experiments if replays happen
used_ports = {p for p, x in tpResult.unblocked_ports.items() if abs(x)>0.05}
if debugOutput: print instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports)
if debugOutput: print(instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports))
if not isAVXInstr(instrNode):
blockingInstrs = blockingInstructionsDictNonAVX
@@ -3218,13 +3219,13 @@ def main():
blockInstrRep = min(blockInstrRep, 100)
uopsOnBlockedPorts = getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockingInstrs[combination], blockInstrRep, combination, tpResult.config, htmlReports)
if uopsOnBlockedPorts is None:
print 'no uops on blocked ports: ' + str(combination)
print('no uops on blocked ports: ' + str(combination))
continue
uopsOnBlockedPorts -= prevUopsOnCombination
if rem_uops < uopsOnBlockedPorts:
print 'More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts)
print('More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts))
if uopsOnBlockedPorts <= 0: continue
@@ -3338,8 +3339,8 @@ def main():
try:
resultNode.attrib['TP_ports'+suffix] = "%.2f" % getTP_LP(portUsageWithDivList)
except ValueError as err:
print 'Could not solve LP for ' + instrNode.attrib['string'] + ':'
print err
print('Could not solve LP for ' + instrNode.attrib['string'] + ':')
print(err)
with open(args.output, "w") as f:
reparsed = XMLRoot
@@ -3358,7 +3359,7 @@ def main():
except subprocess.CalledProcessError:
exit(1)
print 'Total number of microbenchmarks: ' + str(nExperiments)
print('Total number of microbenchmarks: ' + str(nExperiments))
if __name__ == "__main__":

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
from xml.dom import minidom
import argparse
@@ -20,7 +21,7 @@ def main():
for instrNode1 in root1.iter('instruction'):
if instrNode1.attrib['string'] not in instrNode2Dict:
print 'no matching entry found for ' + instrNode1.attrib['string']
print('no matching entry found for ' + instrNode1.attrib['string'])
continue
for instrNode2 in instrNode2Dict[instrNode1.attrib['string']]:
for archNode2 in instrNode2.iter('architecture'):

View File

@@ -137,7 +137,7 @@ def getLatencyTableEntry(measurementNode):
if measurementNode is None or measurementNode.find('./latency') is None:
return None
minLat = sys.maxint
minLat = sys.maxsize
maxLat = 0
minLatUB = False
maxLatUB = False