option for not using other slices for clearing HL caches

This commit is contained in:
Andreas Abel
2020-01-09 01:30:09 +01:00
parent 4e7954ff5d
commit 7655c3989f
3 changed files with 65 additions and 52 deletions

View File

@@ -303,12 +303,12 @@ def getCodeForAddressLists(codeAddressLists, initAddressLists=[], wbinvd=False,
return ExperimentCode(''.join(code), ''.join(init), ''.join(oneTimeInit))
def getClearHLAddresses(level, cacheSetList, cBox=1):
def getClearHLAddresses(level, cacheSetList, cBox, doNotUseOtherCBoxes):
lineSize = getCacheInfo(1).lineSize
if level == 1:
return []
elif (level == 2) or (level == 3 and getCacheInfo(3).nSlices is None):
elif (level == 2) or (level == 3 and (getCacheInfo(3).nSlices is None or doNotUseOtherCBoxes)):
nSets = getCacheInfo(level).nSets
if not all(nSets > getCacheInfo(lLevel).nSets for lLevel in range(1, level)):
raise ValueError('L' + str(level) + ' way size must be greater than lower level way sizes')
@@ -397,7 +397,7 @@ def getBlockSet(blockStr):
return int(re.match('\d+', blockStr.split('_')[-1]).group())
def parseCacheSetsStr(level, clearHL, cacheSetsStr):
def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False):
cacheSetList = []
if cacheSetsStr is not None:
for s in cacheSetsStr.split(','):
@@ -408,7 +408,7 @@ def parseCacheSetsStr(level, clearHL, cacheSetsStr):
cacheSetList.append(int(s))
else:
nSets = getCacheInfo(level).nSets
if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None):
if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes):
nHLSets = getCacheInfo(level-1).nSets
cacheSetList = range(nHLSets, nSets)
else:
@@ -434,16 +434,18 @@ def findCacheSetForCode(cacheSetList, level):
def getAllUsedCacheSets(cacheSetList, seq, initSeq=''):
cacheSetOverrideList = [s for s in set(map(getBlockSet, initSeq.split()+seq.split())) if s is not None]
if any(s in cacheSetList for s in cacheSetOverrideList):
raise ValueError('overridden cache sets must not also be in cacheSetList')
return sorted(set(cacheSetList + cacheSetOverrideList))
AddressList = namedtuple('AddressList', 'addresses exclude flush wbinvd')
def getCodeForCacheExperiment(level, seq, initSeq, cacheSetList, cBox, cSlice, clearHL, wbinvd):
def getCodeForCacheExperiment(level, seq, initSeq, cacheSetList, cBox, cSlice, clearHL, doNotUseOtherCBoxes, wbinvd):
allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq)
clearHLAddrList = None
if (clearHL and level > 1):
clearHLAddrList = AddressList(getClearHLAddresses(level, allUsedSets, cBox), True, False, False)
clearHLAddrList = AddressList(getClearHLAddresses(level, allUsedSets, cBox, doNotUseOtherCBoxes), True, False, False)
initAddressLists = []
seqAddressLists = []
@@ -485,11 +487,13 @@ def runCacheExperimentCode(code, initCode, oneTimeInitCode, loop, warmUpCount, c
# cacheSets=None means do access in all sets
# in this case, the first nL1Sets many sets of L2 will be reserved for clearing L1
# cSlice refers to the nth slice within a given cBox; the assigment of numbers to slices is arbitrary
# doNotUseOtherCBoxes determines whether accesses to clear higher levels will go to other CBoxes
# if wbinvd is set, wbinvd will be called before initSeq
def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, clearHL=True, loop=1, wbinvd=False, nMeasurements=10, warmUpCount=1,
codeSet=None, agg='avg'):
cacheSetList = parseCacheSetsStr(level, clearHL, cacheSets)
ec = getCodeForCacheExperiment(level, seq, initSeq=initSeq, cacheSetList=cacheSetList, cBox=cBox, cSlice=cSlice, clearHL=clearHL, wbinvd=wbinvd)
def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, clearHL=True, doNotUseOtherCBoxes=False, loop=1, wbinvd=False,
nMeasurements=10, warmUpCount=1, codeSet=None, agg='avg'):
cacheSetList = parseCacheSetsStr(level, clearHL, cacheSets, doNotUseOtherCBoxes)
ec = getCodeForCacheExperiment(level, seq, initSeq=initSeq, cacheSetList=cacheSetList, cBox=cBox, cSlice=cSlice, clearHL=clearHL,
doNotUseOtherCBoxes=doNotUseOtherCBoxes, wbinvd=wbinvd)
log.debug('\nOneTimeInit: ' + ec.oneTimeInit)
log.debug('\nInit: ' + ec.init)
@@ -534,7 +538,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox, cSlice):
L3Assoc = getCacheInfo(3).assoc
L3WaySize = getCacheInfo(3).waySize
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False, False)
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
codeOffset = lineSize * (cacheSet+10)
addresses = []
@@ -559,7 +563,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox, cSlice):
def findCongruentL3Addresses(n, cacheSet, cBox, L3EvictionSet):
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False, False)
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
codeOffset = getCacheInfo(1).lineSize * (cacheSet+10)
L3WaySize = getCacheInfo(3).waySize

View File

@@ -21,6 +21,7 @@ def main():
parser.add_argument("-cBox", help="cBox (default: 1)", type=int, default=1) # use 1 as default, as, e.g., on SNB, box 0 only has 15 ways instead of 16
parser.add_argument("-slice", help="Slice (within the cBox) (default: 0)", type=int, default=0)
parser.add_argument("-noClearHL", help="Do not clear higher levels", action='store_true')
parser.add_argument("-noUseOtherCBoxes", help="Do not use other CBoxes for clearing higher levels", action='store_true')
parser.add_argument("-nMeasurements", help="Number of measurements", type=int, default=10)
parser.add_argument("-agg", help="Aggregate function", default='med')
parser.add_argument("-loop", help="Loop count (Default: 1)", type=int, default=1)
@@ -39,7 +40,7 @@ def main():
print 'Hits: ' + str(hits)
else:
nb = runCacheExperiment(args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL),
loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg)
doNotUseOtherCBoxes=args.noUseOtherCBoxes, loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg)
printNB(nb)

View File

@@ -13,7 +13,6 @@ log = logging.getLogger(__name__)
def main():
parser = argparse.ArgumentParser(description='Tests if the L3 cache uses set dueling')
parser.add_argument("-level", help="Cache level (Default: 3)", type=int, default=3)
parser.add_argument("-nRuns", help="Maximum number of runs", type=int, default=25)
parser.add_argument("-loop", help="Loop count", type=int, default=25)
parser.add_argument("-length", help="Length of the acc. seq. (Default: associativity*4/3)", type=int)
@@ -25,8 +24,9 @@ def main():
logging.basicConfig(stream=sys.stdout, format='%(message)s', level=logging.getLevelName(args.logLevel))
assoc = getCacheInfo(args.level).assoc
nSets = getCacheInfo(args.level).nSets
assoc = getCacheInfo(3).assoc
nL3Sets = getCacheInfo(3).nSets
nL2Sets = getCacheInfo(2).nSets
lineSize = getCacheInfo(1).lineSize
nCBoxes = max(1, getNCBoxUnits())
nSlicesPerCBox = 1
@@ -38,52 +38,60 @@ def main():
hitSeq = ' '.join('B' + str(i) + '?' for i in range(0, assoc))
missSeq = ' '.join('B' + str(i) + '?' for i in range(0, 3*assoc))
title = cpuid.cpu_name(cpuid.CPUID()) + ', L' + str(args.level) + ' Hits'
title = cpuid.cpu_name(cpuid.CPUID()) + ', L3 Hits'
html = ['<html>', '<head>', '<title>' + title + '</title>', '<script src="https://cdn.plot.ly/plotly-latest.min.js">', '</script>', '</head>', '<body>']
html += ['<h3>' + title + '</h3>']
setsForSlice = {cBox: {cSlice: range(0,nSets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
yValuesForSlice = {cBox: {cSlice: [[] for s in range(0, nSets)] for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
setsForSlice = {cBox: {cSlice: range(0,nL3Sets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
L3HitsDict = {cBox: {cSlice: [[] for s in range(0, nL3Sets)] for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
prevOti = ''
i = -1
notChanged = -1
while notChanged < 10:
for doNotUseOtherCBoxes in ([False, True] if (not args.noClearHL and nL2Sets < nL3Sets) else [False]):
for useHitSeq in [False, True]:
i += 1
notChanged += 1
for cBox in range(0, nCBoxes):
for cSlice in range(0, nSlicesPerCBox):
yValuesList = yValuesForSlice[cBox][cSlice]
curSets = setsForSlice[cBox][cSlice]
random.shuffle(curSets)
prevSets = curSets[:]
for si, s in enumerate(prevSets):
codeSet = (s + random.randint(1, nSets - 100)) % nSets
codeSet = (s + random.randint(1, nL3Sets - 100)) % nL3Sets
codeOffset = lineSize * codeSet
yv = yValuesList[s]
L3Hits = L3HitsDict[cBox][cSlice][s]
ec = getCodeForCacheExperiment(args.level, seq, '', [s], cBox, cSlice, (not args.noClearHL), True)
ec = getCodeForCacheExperiment(3, seq, '', [s], cBox, cSlice, (not args.noClearHL), doNotUseOtherCBoxes, True)
nb = runCacheExperimentCode(ec.code, ec.init, prevOti + ec.oneTimeInit, loop=args.loop, warmUpCount=0, codeOffset=codeOffset,
nMeasurements=args.nMeasurements, agg='med')
yv.append(nb['L' + str(args.level) + '_HIT'])
yv.sort()
yvStr = str(yv) if len(yv) <= 5 else '[%s, %s, ..., %s, %s]' % (yv[0], yv[1], yv[-2], yv[-1])
if nb['L1_MISS'] < seqLength - .2:
print 'Hit in L1'
continue
if nb['L2_MISS'] < seqLength - .2:
print 'Hit in L2'
continue
L3Hits.append(nb['L3_HIT'])
L3Hits.sort()
L3HitsStr = str(L3Hits) if len(L3Hits) <= 5 else '[%s, %s, ..., %s, %s]' % (L3Hits[0], L3Hits[1], L3Hits[-2], L3Hits[-1])
log.info('CBox ' + str(cBox) + ', slice: ' + str(cSlice) + ', run ' + str(i) + ', set: ' + str(si+1) + '/' + str(len(prevSets)) +
' (' + str(s) + '), ' + yvStr)
' (' + str(s) + '), ' + L3HitsStr)
if len(yv) > 1:
if yv[-1]-yv[0] > 1:
if len(L3Hits) > 1:
if L3Hits[-1]-L3Hits[0] > 1:
curSets.remove(s)
notChanged = 0
else:
if useHitSeq:
ec = getCodeForCacheExperiment(args.level, hitSeq, '', [s], cBox, cSlice, (not args.noClearHL), True)
ec = getCodeForCacheExperiment(3, hitSeq, '', [s], cBox, cSlice, (not args.noClearHL), doNotUseOtherCBoxes, True)
else:
ec = getCodeForCacheExperiment(args.level, missSeq, '', [s], cBox, cSlice, (not args.noClearHL), True)
ec = getCodeForCacheExperiment(3, missSeq, '', [s], cBox, cSlice, (not args.noClearHL), doNotUseOtherCBoxes, True)
prevOti = ec.oneTimeInit + 'mov R15, 100; pLoop:' + ec.code + '; dec R15; jnz pLoop; '
for cBox in range(0, nCBoxes):
@@ -98,13 +106,13 @@ def main():
fig.update_layout(showlegend=True)
fig.update_xaxes(title_text='Set')
yValuesMinMax = [min(x) + (max(x)-min(x))/2 for x in yValuesForSlice[cBox][cSlice] if x]
yValuesMinMax = [min(s) + (max(s)-min(s))/2 for s in L3HitsDict[cBox][cSlice] if s]
fig.add_trace(go.Scatter(y=yValuesMinMax, mode='lines+markers', name='Min+(Max-Min)/2'))
yValuesMin = [min(x) for x in yValuesForSlice[cBox][cSlice] if x]
yValuesMin = [min(s) for s in L3HitsDict[cBox][cSlice] if s]
fig.add_trace(go.Scatter(y=yValuesMin, mode='lines+markers', visible = 'legendonly', name='Min'))
yValuesMax = [max(x) for x in yValuesForSlice[cBox][cSlice] if x]
yValuesMax = [max(s) for s in L3HitsDict[cBox][cSlice] if s]
fig.add_trace(go.Scatter(y=yValuesMax, mode='lines+markers', visible = 'legendonly', name='Max'))
html.append(plot(fig, include_plotlyjs=False, output_type='div'))