wbinvd in access sequences

This commit is contained in:
Andreas Abel
2019-12-02 17:42:37 +01:00
parent fa98c9ef71
commit e3629ead9b
4 changed files with 26 additions and 13 deletions

View File

@@ -19,7 +19,7 @@ The tool will make memory accesses to four different blocks in all of the specif
Between every two accesses to the same set in a lower-level cache, the tool automatically adds enough accesses to the higher-level caches (that map to different sets and/or slices in the lower-level cache) to make sure that the corresponding lines are evicted from the higher-level cache and the access actually reaches the lower-level cache. These additional accesses are excluded from the performance counter measurements.
By default, the `WBINVD` instruction is call before executing the access sequence. This can be disabled with the `-noWbinvd` option.
By default, the `wbinvd` instruction is called before executing the access sequence. This can be disabled with the `-noWbinvd` option. It is also possible to execute the `wbinvd` instruction between specific elements of an access sequence by inserting `<wbinvd>` at the corresponding location in the access sequence.
The tool has the following command-line options:

View File

@@ -53,6 +53,7 @@ def main():
blocksStr = args.blocks
else:
blocksStr = args.seq
blocksStr = blocksStr.replace('<wbinvd>', '')
blocks = list(OrderedDict.fromkeys(re.sub('[?!,;]', ' ', blocksStr).split()))
html = ['<html>', '<head>', '<script src="https://cdn.plot.ly/plotly-latest.min.js">', '</script>', '</head>', '<body>']

View File

@@ -174,7 +174,7 @@ def getCBoxOfAddress(address):
setNanoBenchParameters(config='', msrConfig=getDefaultCacheMSRConfig(), nMeasurements=10, unrollCount=1, loopCount=10, aggregateFunction='min',
basicMode=True, noMem=True)
ec = getCodeForAddressLists([AddressList([address],False,True)])
ec = getCodeForAddressLists([AddressList([address], False, True, False)])
nb = runNanoBench(code=ec.code, oneTimeInit=ec.oneTimeInit)
nCacheLookups = [nb['CACHE_LOOKUP_CBO_'+str(cBox)] for cBox in range(0, getNCBoxUnits())]
@@ -258,6 +258,14 @@ def getCodeForAddressLists(codeAddressLists, initAddressLists=[], wbinvd=False,
pfcEnabled = True
for addressList in addressLists:
if addressList.wbinvd:
if addressList.exclude and pfcEnabled:
codeList.append(PFC_STOP_ASM + '; ')
codeList.append('wbinvd; ')
if addressList.exclude and pfcEnabled:
codeList.append(PFC_START_ASM + '; ')
continue
addresses = addressList.addresses
if len(addresses) < 1: continue
@@ -399,7 +407,7 @@ def parseCacheSetsStr(level, clearHL, cacheSetsStr):
return cacheSetList
AddressList = namedtuple('AddressList', 'addresses exclude flush')
AddressList = namedtuple('AddressList', 'addresses exclude flush wbinvd')
# cacheSets=None means do access in all sets
# in this case, the first nL1Sets many sets of L2 will be reserved for clearing L1
@@ -411,7 +419,7 @@ def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, clearHL=T
clearHLAddrList = None
if (clearHL and level > 1):
clearHLAddrList = AddressList(getClearHLAddresses(level, cacheSetList, cBox), True, False)
clearHLAddrList = AddressList(getClearHLAddresses(level, cacheSetList, cBox), True, False, False)
initAddressLists = []
seqAddressLists = []
@@ -420,6 +428,10 @@ def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, clearHL=T
for seqString, addrLists in [(initSeq, initAddressLists), (seq, seqAddressLists)]:
for seqEl in seqString.split():
name = getBlockName(seqEl)
if name == '<wbinvd>':
addrLists.append(AddressList([], True, False, True))
continue
wayID = nameToID.setdefault(name, len(nameToID))
exclude = not '?' in seqEl
flush = '!' in seqEl
@@ -428,7 +440,7 @@ def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, clearHL=T
if clearHLAddrList is not None and not flush:
addrLists.append(clearHLAddrList)
addrLists.append(AddressList(addresses, exclude, flush))
addrLists.append(AddressList(addresses, exclude, flush, False))
ec = getCodeForAddressLists(seqAddressLists, initAddressLists, wbinvd)
@@ -462,7 +474,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox):
if cacheSet in evSetForCacheSet:
return evSetForCacheSet[cacheSet]
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False)
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False, False)
addresses = []
curAddress = cacheSet*getCacheInfo(3).lineSize
@@ -476,7 +488,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox):
if not getCBoxOfAddress(curAddress) == cBox: continue
addresses += [curAddress]
ec = getCodeForAddressLists([AddressList(addresses,False,False), clearHLAddrList])
ec = getCodeForAddressLists([AddressList(addresses, False, False, False), clearHLAddrList])
setNanoBenchParameters(config=getDefaultCacheConfig(), msrConfig='', nMeasurements=10, unrollCount=1, loopCount=100,
aggregateFunction='med', basicMode=True, noMem=True)
@@ -488,7 +500,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox):
for i in reversed(range(0, len(addresses))):
tmpAddresses = addresses[:i] + addresses[(i+1):]
ec = getCodeForAddressLists([AddressList(tmpAddresses,False,False), clearHLAddrList])
ec = getCodeForAddressLists([AddressList(tmpAddresses, False, False, False), clearHLAddrList])
nb = runNanoBench(code=ec.code, oneTimeInit=ec.oneTimeInit)
if nb['L3_HIT'] < len(tmpAddresses) - 0.9:
@@ -499,7 +511,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox):
def findCongruentL3Addresses(n, cacheSet, cBox, L3EvictionSet):
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False)
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False, False)
congrAddresses = []
L3WaySize = getCacheInfo(3).waySize
@@ -508,7 +520,7 @@ def findCongruentL3Addresses(n, cacheSet, cBox, L3EvictionSet):
if not getCBoxOfAddress(newAddr) == cBox: continue
tmpAddresses = L3EvictionSet[:getCacheInfo(3).assoc] + [newAddr]
ec = getCodeForAddressLists([AddressList(tmpAddresses,False,False), clearHLAddrList])
ec = getCodeForAddressLists([AddressList(tmpAddresses, False, False, False), clearHLAddrList])
setNanoBenchParameters(config=getEventConfig('L3_HIT'), msrConfig=None, nMeasurements=10, unrollCount=1, loopCount=100,
aggregateFunction='med', basicMode=True, noMem=True, verbose=None)
@@ -531,7 +543,7 @@ def findMaximalNonEvictingL3SetInCBox(start, stride, L3Assoc, cBox):
if getCBoxOfAddress(curAddress) != cBox:
clearHLAddresses.append(curAddress)
curAddress += stride
clearHLAddrList = AddressList(clearHLAddresses, True, False)
clearHLAddrList = AddressList(clearHLAddresses, True, False, False)
curAddress = start
while len(addresses) < L3Assoc:
@@ -547,7 +559,7 @@ def findMaximalNonEvictingL3SetInCBox(start, stride, L3Assoc, cBox):
continue
newAddresses = addresses + [curAddress]
ec = getCodeForAddressLists([AddressList(newAddresses,False,False), clearHLAddrList])
ec = getCodeForAddressLists([AddressList(newAddresses, False, False, False), clearHLAddrList])
setNanoBenchParameters(config=getEventConfig('L3_HIT'), msrConfig='', nMeasurements=10, unrollCount=1, loopCount=10,
aggregateFunction='med', basicMode=True, noMem=True)

View File

@@ -32,7 +32,7 @@ def main():
xValues.append(str(x))
addresses = range(0, x, args.stride)
nAddresses.append(len(addresses))
ec = getCodeForAddressLists([AddressList(addresses,False,False)], wbinvd=True)
ec = getCodeForAddressLists([AddressList(addresses, False, False, False)], wbinvd=True)
nbDicts.append(runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit))
pt *= 2