From 7655c3989fef90e91da070f56c5559b71a7098dd Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Thu, 9 Jan 2020 01:30:09 +0100 Subject: [PATCH] option for not using other slices for clearing HL caches --- tools/CacheAnalyzer/cacheLib.py | 28 +++++----- tools/CacheAnalyzer/cacheSeq.py | 3 +- tools/CacheAnalyzer/setDueling.py | 86 +++++++++++++++++-------------- 3 files changed, 65 insertions(+), 52 deletions(-) diff --git a/tools/CacheAnalyzer/cacheLib.py b/tools/CacheAnalyzer/cacheLib.py index 254ec43..7917881 100755 --- a/tools/CacheAnalyzer/cacheLib.py +++ b/tools/CacheAnalyzer/cacheLib.py @@ -303,12 +303,12 @@ def getCodeForAddressLists(codeAddressLists, initAddressLists=[], wbinvd=False, return ExperimentCode(''.join(code), ''.join(init), ''.join(oneTimeInit)) -def getClearHLAddresses(level, cacheSetList, cBox=1): +def getClearHLAddresses(level, cacheSetList, cBox, doNotUseOtherCBoxes): lineSize = getCacheInfo(1).lineSize if level == 1: return [] - elif (level == 2) or (level == 3 and getCacheInfo(3).nSlices is None): + elif (level == 2) or (level == 3 and (getCacheInfo(3).nSlices is None or doNotUseOtherCBoxes)): nSets = getCacheInfo(level).nSets if not all(nSets > getCacheInfo(lLevel).nSets for lLevel in range(1, level)): raise ValueError('L' + str(level) + ' way size must be greater than lower level way sizes') @@ -397,7 +397,7 @@ def getBlockSet(blockStr): return int(re.match('\d+', blockStr.split('_')[-1]).group()) -def parseCacheSetsStr(level, clearHL, cacheSetsStr): +def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False): cacheSetList = [] if cacheSetsStr is not None: for s in cacheSetsStr.split(','): @@ -408,7 +408,7 @@ def parseCacheSetsStr(level, clearHL, cacheSetsStr): cacheSetList.append(int(s)) else: nSets = getCacheInfo(level).nSets - if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None): + if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes): nHLSets = getCacheInfo(level-1).nSets cacheSetList = range(nHLSets, nSets) else: @@ -434,16 +434,18 @@ def findCacheSetForCode(cacheSetList, level): def getAllUsedCacheSets(cacheSetList, seq, initSeq=''): cacheSetOverrideList = [s for s in set(map(getBlockSet, initSeq.split()+seq.split())) if s is not None] + if any(s in cacheSetList for s in cacheSetOverrideList): + raise ValueError('overridden cache sets must not also be in cacheSetList') return sorted(set(cacheSetList + cacheSetOverrideList)) AddressList = namedtuple('AddressList', 'addresses exclude flush wbinvd') -def getCodeForCacheExperiment(level, seq, initSeq, cacheSetList, cBox, cSlice, clearHL, wbinvd): +def getCodeForCacheExperiment(level, seq, initSeq, cacheSetList, cBox, cSlice, clearHL, doNotUseOtherCBoxes, wbinvd): allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq) clearHLAddrList = None if (clearHL and level > 1): - clearHLAddrList = AddressList(getClearHLAddresses(level, allUsedSets, cBox), True, False, False) + clearHLAddrList = AddressList(getClearHLAddresses(level, allUsedSets, cBox, doNotUseOtherCBoxes), True, False, False) initAddressLists = [] seqAddressLists = [] @@ -485,11 +487,13 @@ def runCacheExperimentCode(code, initCode, oneTimeInitCode, loop, warmUpCount, c # cacheSets=None means do access in all sets # in this case, the first nL1Sets many sets of L2 will be reserved for clearing L1 # cSlice refers to the nth slice within a given cBox; the assigment of numbers to slices is arbitrary +# doNotUseOtherCBoxes determines whether accesses to clear higher levels will go to other CBoxes # if wbinvd is set, wbinvd will be called before initSeq -def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, clearHL=True, loop=1, wbinvd=False, nMeasurements=10, warmUpCount=1, - codeSet=None, agg='avg'): - cacheSetList = parseCacheSetsStr(level, clearHL, cacheSets) - ec = getCodeForCacheExperiment(level, seq, initSeq=initSeq, cacheSetList=cacheSetList, cBox=cBox, cSlice=cSlice, clearHL=clearHL, wbinvd=wbinvd) +def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, clearHL=True, doNotUseOtherCBoxes=False, loop=1, wbinvd=False, + nMeasurements=10, warmUpCount=1, codeSet=None, agg='avg'): + cacheSetList = parseCacheSetsStr(level, clearHL, cacheSets, doNotUseOtherCBoxes) + ec = getCodeForCacheExperiment(level, seq, initSeq=initSeq, cacheSetList=cacheSetList, cBox=cBox, cSlice=cSlice, clearHL=clearHL, + doNotUseOtherCBoxes=doNotUseOtherCBoxes, wbinvd=wbinvd) log.debug('\nOneTimeInit: ' + ec.oneTimeInit) log.debug('\nInit: ' + ec.init) @@ -534,7 +538,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox, cSlice): L3Assoc = getCacheInfo(3).assoc L3WaySize = getCacheInfo(3).waySize - clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False, False) + clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False) codeOffset = lineSize * (cacheSet+10) addresses = [] @@ -559,7 +563,7 @@ def findMinimalL3EvictionSet(cacheSet, cBox, cSlice): def findCongruentL3Addresses(n, cacheSet, cBox, L3EvictionSet): - clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox), True, False, False) + clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False) codeOffset = getCacheInfo(1).lineSize * (cacheSet+10) L3WaySize = getCacheInfo(3).waySize diff --git a/tools/CacheAnalyzer/cacheSeq.py b/tools/CacheAnalyzer/cacheSeq.py index 8e86974..aa1deda 100755 --- a/tools/CacheAnalyzer/cacheSeq.py +++ b/tools/CacheAnalyzer/cacheSeq.py @@ -21,6 +21,7 @@ def main(): parser.add_argument("-cBox", help="cBox (default: 1)", type=int, default=1) # use 1 as default, as, e.g., on SNB, box 0 only has 15 ways instead of 16 parser.add_argument("-slice", help="Slice (within the cBox) (default: 0)", type=int, default=0) parser.add_argument("-noClearHL", help="Do not clear higher levels", action='store_true') + parser.add_argument("-noUseOtherCBoxes", help="Do not use other CBoxes for clearing higher levels", action='store_true') parser.add_argument("-nMeasurements", help="Number of measurements", type=int, default=10) parser.add_argument("-agg", help="Aggregate function", default='med') parser.add_argument("-loop", help="Loop count (Default: 1)", type=int, default=1) @@ -39,7 +40,7 @@ def main(): print 'Hits: ' + str(hits) else: nb = runCacheExperiment(args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL), - loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg) + doNotUseOtherCBoxes=args.noUseOtherCBoxes, loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg) printNB(nb) diff --git a/tools/CacheAnalyzer/setDueling.py b/tools/CacheAnalyzer/setDueling.py index ad20263..2a8dd54 100755 --- a/tools/CacheAnalyzer/setDueling.py +++ b/tools/CacheAnalyzer/setDueling.py @@ -13,7 +13,6 @@ log = logging.getLogger(__name__) def main(): parser = argparse.ArgumentParser(description='Tests if the L3 cache uses set dueling') - parser.add_argument("-level", help="Cache level (Default: 3)", type=int, default=3) parser.add_argument("-nRuns", help="Maximum number of runs", type=int, default=25) parser.add_argument("-loop", help="Loop count", type=int, default=25) parser.add_argument("-length", help="Length of the acc. seq. (Default: associativity*4/3)", type=int) @@ -25,8 +24,9 @@ def main(): logging.basicConfig(stream=sys.stdout, format='%(message)s', level=logging.getLevelName(args.logLevel)) - assoc = getCacheInfo(args.level).assoc - nSets = getCacheInfo(args.level).nSets + assoc = getCacheInfo(3).assoc + nL3Sets = getCacheInfo(3).nSets + nL2Sets = getCacheInfo(2).nSets lineSize = getCacheInfo(1).lineSize nCBoxes = max(1, getNCBoxUnits()) nSlicesPerCBox = 1 @@ -38,53 +38,61 @@ def main(): hitSeq = ' '.join('B' + str(i) + '?' for i in range(0, assoc)) missSeq = ' '.join('B' + str(i) + '?' for i in range(0, 3*assoc)) - title = cpuid.cpu_name(cpuid.CPUID()) + ', L' + str(args.level) + ' Hits' + title = cpuid.cpu_name(cpuid.CPUID()) + ', L3 Hits' html = ['', '', '' + title + '', '', '', ''] html += ['

' + title + '

'] - setsForSlice = {cBox: {cSlice: range(0,nSets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)} - yValuesForSlice = {cBox: {cSlice: [[] for s in range(0, nSets)] for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)} + setsForSlice = {cBox: {cSlice: range(0,nL3Sets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)} + L3HitsDict = {cBox: {cSlice: [[] for s in range(0, nL3Sets)] for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)} prevOti = '' i = -1 notChanged = -1 while notChanged < 10: - for useHitSeq in [False, True]: - i += 1 - notChanged += 1 - for cBox in range(0, nCBoxes): - for cSlice in range(0, nSlicesPerCBox): - yValuesList = yValuesForSlice[cBox][cSlice] + for doNotUseOtherCBoxes in ([False, True] if (not args.noClearHL and nL2Sets < nL3Sets) else [False]): + for useHitSeq in [False, True]: + i += 1 + notChanged += 1 + for cBox in range(0, nCBoxes): + for cSlice in range(0, nSlicesPerCBox): + curSets = setsForSlice[cBox][cSlice] + random.shuffle(curSets) + prevSets = curSets[:] - curSets = setsForSlice[cBox][cSlice] - random.shuffle(curSets) - prevSets = curSets[:] + for si, s in enumerate(prevSets): + codeSet = (s + random.randint(1, nL3Sets - 100)) % nL3Sets + codeOffset = lineSize * codeSet + L3Hits = L3HitsDict[cBox][cSlice][s] - for si, s in enumerate(prevSets): - codeSet = (s + random.randint(1, nSets - 100)) % nSets - codeOffset = lineSize * codeSet - yv = yValuesList[s] + ec = getCodeForCacheExperiment(3, seq, '', [s], cBox, cSlice, (not args.noClearHL), doNotUseOtherCBoxes, True) + nb = runCacheExperimentCode(ec.code, ec.init, prevOti + ec.oneTimeInit, loop=args.loop, warmUpCount=0, codeOffset=codeOffset, + nMeasurements=args.nMeasurements, agg='med') - ec = getCodeForCacheExperiment(args.level, seq, '', [s], cBox, cSlice, (not args.noClearHL), True) - nb = runCacheExperimentCode(ec.code, ec.init, prevOti + ec.oneTimeInit, loop=args.loop, warmUpCount=0, codeOffset=codeOffset, - nMeasurements=args.nMeasurements, agg='med') - yv.append(nb['L' + str(args.level) + '_HIT']) - yv.sort() + if nb['L1_MISS'] < seqLength - .2: + print 'Hit in L1' + continue - yvStr = str(yv) if len(yv) <= 5 else '[%s, %s, ..., %s, %s]' % (yv[0], yv[1], yv[-2], yv[-1]) - log.info('CBox ' + str(cBox) + ', slice: ' + str(cSlice) + ', run ' + str(i) + ', set: ' + str(si+1) + '/' + str(len(prevSets)) + - ' (' + str(s) + '), ' + yvStr) + if nb['L2_MISS'] < seqLength - .2: + print 'Hit in L2' + continue - if len(yv) > 1: - if yv[-1]-yv[0] > 1: - curSets.remove(s) - notChanged = 0 - else: - if useHitSeq: - ec = getCodeForCacheExperiment(args.level, hitSeq, '', [s], cBox, cSlice, (not args.noClearHL), True) + L3Hits.append(nb['L3_HIT']) + L3Hits.sort() + + L3HitsStr = str(L3Hits) if len(L3Hits) <= 5 else '[%s, %s, ..., %s, %s]' % (L3Hits[0], L3Hits[1], L3Hits[-2], L3Hits[-1]) + log.info('CBox ' + str(cBox) + ', slice: ' + str(cSlice) + ', run ' + str(i) + ', set: ' + str(si+1) + '/' + str(len(prevSets)) + + ' (' + str(s) + '), ' + L3HitsStr) + + if len(L3Hits) > 1: + if L3Hits[-1]-L3Hits[0] > 1: + curSets.remove(s) + notChanged = 0 else: - ec = getCodeForCacheExperiment(args.level, missSeq, '', [s], cBox, cSlice, (not args.noClearHL), True) - prevOti = ec.oneTimeInit + 'mov R15, 100; pLoop:' + ec.code + '; dec R15; jnz pLoop; ' + if useHitSeq: + ec = getCodeForCacheExperiment(3, hitSeq, '', [s], cBox, cSlice, (not args.noClearHL), doNotUseOtherCBoxes, True) + else: + ec = getCodeForCacheExperiment(3, missSeq, '', [s], cBox, cSlice, (not args.noClearHL), doNotUseOtherCBoxes, True) + prevOti = ec.oneTimeInit + 'mov R15, 100; pLoop:' + ec.code + '; dec R15; jnz pLoop; ' for cBox in range(0, nCBoxes): for cSlice in range(0, nSlicesPerCBox): @@ -98,13 +106,13 @@ def main(): fig.update_layout(showlegend=True) fig.update_xaxes(title_text='Set') - yValuesMinMax = [min(x) + (max(x)-min(x))/2 for x in yValuesForSlice[cBox][cSlice] if x] + yValuesMinMax = [min(s) + (max(s)-min(s))/2 for s in L3HitsDict[cBox][cSlice] if s] fig.add_trace(go.Scatter(y=yValuesMinMax, mode='lines+markers', name='Min+(Max-Min)/2')) - yValuesMin = [min(x) for x in yValuesForSlice[cBox][cSlice] if x] + yValuesMin = [min(s) for s in L3HitsDict[cBox][cSlice] if s] fig.add_trace(go.Scatter(y=yValuesMin, mode='lines+markers', visible = 'legendonly', name='Min')) - yValuesMax = [max(x) for x in yValuesForSlice[cBox][cSlice] if x] + yValuesMax = [max(s) for s in L3HitsDict[cBox][cSlice] if s] fig.add_trace(go.Scatter(y=yValuesMax, mode='lines+markers', visible = 'legendonly', name='Max')) html.append(plot(fig, include_plotlyjs=False, output_type='div'))