From 313aa5ee3013b0334d7b24e2fd49f4a14a415add Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Sat, 13 Mar 2021 21:04:52 +0100 Subject: [PATCH] python 3 --- tools/CPUID/cpuid.py | 28 +-- tools/CacheAnalyzer/cacheGraph.py | 7 +- tools/CacheAnalyzer/cacheInfo.py | 12 +- tools/CacheAnalyzer/cacheLib.py | 21 +- tools/CacheAnalyzer/cacheSeq.py | 10 +- tools/CacheAnalyzer/cacheSim.py | 24 ++- tools/CacheAnalyzer/hitMiss.py | 11 +- tools/CacheAnalyzer/permPolicy.py | 20 +- tools/CacheAnalyzer/replPolicy.py | 21 +- tools/CacheAnalyzer/setDueling.py | 13 +- tools/CacheAnalyzer/strideGraph.py | 12 +- tools/cpuBench/addAMDDocToXML.py | 13 +- tools/cpuBench/addDocToXML.py | 7 +- tools/cpuBench/addURLsToXML.py | 9 +- tools/cpuBench/compareMeasurementsToOther.py | 99 +++++----- tools/cpuBench/compareXML.py | 38 ++-- tools/cpuBench/cpuBench.py | 191 ++++++++++--------- tools/cpuBench/mergeXML.py | 5 +- tools/cpuBench/utils.py | 2 +- 19 files changed, 275 insertions(+), 268 deletions(-) diff --git a/tools/CPUID/cpuid.py b/tools/CPUID/cpuid.py index d7e2cd0..7a15f8d 100755 --- a/tools/CPUID/cpuid.py +++ b/tools/CPUID/cpuid.py @@ -1,7 +1,7 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright (C) 2019 Andreas Abel +# Copyright (C) 2021 Andreas Abel # # This file was modified from https://github.com/flababah/cpuid.py # @@ -408,7 +408,7 @@ def get_cache_info(cpu): parameters.append('Physical Line partitions (P): ' + str(P)) parameters.append('Ways of associativity (W): ' + str(W)) parameters.append('Number of Sets (S): ' + str(S)) - parameters.append('Cache Size: ' + str(W*P*L*S/1024) + ' kB') + parameters.append('Cache Size: ' + str(W*P*L*S//1024) + ' kB') if get_bit(d, 0): parameters.append('WBINVD/INVD is not guaranteed to act upon lower level caches of non-originating threads sharing this cache') else: parameters.append('WBINVD/INVD from threads sharing this cache acts upon lower level caches for threads sharing this cache') @@ -447,7 +447,7 @@ def get_cache_info(cpu): cacheInfo['L1D'] = { 'lineSize': L1DcLineSize, - 'nSets': L1DcSize*1024/L1DcAssoc/L1DcLineSize, + 'nSets': L1DcSize*1024//L1DcAssoc//L1DcLineSize, 'assoc': L1DcAssoc } @@ -463,7 +463,7 @@ def get_cache_info(cpu): cacheInfo['L1I'] = { 'lineSize': L1IcLineSize, - 'nSets': L1IcSize*1024/L1IcAssoc/L1IcLineSize, + 'nSets': L1IcSize*1024//L1IcAssoc//L1IcLineSize, 'assoc': L1IcAssoc } @@ -484,7 +484,7 @@ def get_cache_info(cpu): elif c_15_12 == 0xC: L2Assoc = 64 elif c_15_12 == 0xD: L2Assoc = 96 elif c_15_12 == 0xE: L2Assoc = 128 - elif c_15_12 == 0x2: L2Assoc = L2Size*1024/L2LineSize + elif c_15_12 == 0x2: L2Assoc = L2Size*1024//L2LineSize log.info(' L2LineSize: ' + str(L2LineSize) + ' B') log.info(' L2LinesPerTag: ' + str(L2LinesPerTag)) @@ -493,7 +493,7 @@ def get_cache_info(cpu): cacheInfo['L2'] = { 'lineSize': L2LineSize, - 'nSets': L2Size*1024/L2Assoc/L2LineSize, + 'nSets': L2Size*1024//L2Assoc//L2LineSize, 'assoc': L2Assoc } @@ -519,11 +519,11 @@ def get_cache_info(cpu): log.info(' L3LineSize: ' + str(L3LineSize) + ' B') log.info(' L3LinesPerTag: ' + str(L3LinesPerTag)) log.info(' L3Assoc: ' + str(L3Assoc)) - log.info(' L3Size: ' + str(L3Size/1024) + ' MB') + log.info(' L3Size: ' + str(L3Size//1024) + ' MB') cacheInfo['L3'] = { 'lineSize': L3LineSize, - 'nSets': L3Size*1024/L3Assoc/L3LineSize, + 'nSets': L3Size*1024//L3Assoc//L3LineSize, 'assoc': L3Assoc } @@ -551,13 +551,13 @@ if __name__ == "__main__": yield (eax, regs) eax += 1 - print " ".join(x.ljust(8) for x in ("CPUID", "A", "B", "C", "D")).strip() + print(' '.join(x.ljust(8) for x in ('CPUID', 'A', 'B', 'C', 'D')).strip()) for eax, regs in valid_inputs(): - print "%08x" % eax, " ".join("%08x" % reg for reg in regs) + print('%08x' % eax, ' '.join('%08x' % reg for reg in regs)) - print '' - print get_basic_info(cpuid) + print('') + print(get_basic_info(cpuid)) - print '\nCache information:' + print('\nCache information:') get_cache_info(cpuid) diff --git a/tools/CacheAnalyzer/cacheGraph.py b/tools/CacheAnalyzer/cacheGraph.py index c75f673..c931bbe 100755 --- a/tools/CacheAnalyzer/cacheGraph.py +++ b/tools/CacheAnalyzer/cacheGraph.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + from itertools import count from collections import namedtuple, OrderedDict @@ -73,7 +74,7 @@ def main(): _, nbDict = getAgesOfBlocks(blocks, args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL), wbinvd=(not args.noWbinvd), returnNbResults=True, maxAge=args.maxAge, nMeasurements=args.nMeasurements, agg=args.agg) - for event in sorted(e for e in nbDict.values()[0][0].keys() if 'HIT' in e or 'MISS' in e): + for event in sorted(e for e in list(nbDict.values())[0][0].keys() if 'HIT' in e or 'MISS' in e): traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks] title = 'Access Sequence: ' + (args.seq_init + ' ' + args.seq).replace('?','').strip() + ' ?' html.append(getPlotlyGraphDiv(title, '# of fresh blocks', event, traces)) @@ -82,7 +83,7 @@ def main(): with open(args.output ,'w') as f: f.write('\n'.join(html)) - print 'Graph written to ' + args.output + print('Graph written to ' + args.output) if __name__ == "__main__": diff --git a/tools/CacheAnalyzer/cacheInfo.py b/tools/CacheAnalyzer/cacheInfo.py index 920e9f5..6bf568a 100755 --- a/tools/CacheAnalyzer/cacheInfo.py +++ b/tools/CacheAnalyzer/cacheInfo.py @@ -1,6 +1,6 @@ -#!/usr/bin/python -import argparse +#!/usr/bin/env python3 +import argparse from cacheLib import * import logging @@ -16,11 +16,11 @@ def main(): cpuidInfo = getCpuidCacheInfo() - print '' - print getCacheInfo(1) - print getCacheInfo(2) + print('') + print(getCacheInfo(1)) + print(getCacheInfo(2)) if 'L3' in cpuidInfo: - print getCacheInfo(3) + print(getCacheInfo(3)) if __name__ == "__main__": diff --git a/tools/CacheAnalyzer/cacheLib.py b/tools/CacheAnalyzer/cacheLib.py index da368c7..9ac573d 100755 --- a/tools/CacheAnalyzer/cacheLib.py +++ b/tools/CacheAnalyzer/cacheLib.py @@ -1,4 +1,3 @@ -#!/usr/bin/python from itertools import count from collections import namedtuple @@ -79,11 +78,11 @@ class CacheInfo: def __str__(self): return '\n'.join(['L' + str(self.level) + ':', - ' Size: ' + str(self.size/1024) + ' kB', + ' Size: ' + str(self.size//1024) + ' kB', ' Associativity: ' + str(self.assoc), ' Line Size: ' + str(self.lineSize) + ' B', ' Number of sets' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.nSets), - ' Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize/1024) + ' kB', + ' Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize//1024) + ' kB', (' Number of CBoxes: ' + str(self.nCboxes) if self.nCboxes is not None else ''), (' Number of slices: ' + str(self.nSlices) if self.nSlices is not None else '')]) @@ -134,13 +133,13 @@ def getCacheInfo(level): assoc = cpuidInfo['assoc'] nSets = cpuidInfo['nSets'] - stride = 2**((lineSize*nSets/getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits + stride = 2**((lineSize*nSets//getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits ms = findMaximalNonEvictingL3SetInCBox(0, stride, assoc, 0) log.debug('Maximal non-evicting L3 set: ' + str(len(ms)) + ' ' + str(ms)) nCboxes = getNCBoxUnits() nSlices = nCboxes * int(math.ceil(float(len(ms))/assoc)) - getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets/nSlices, nSlices, nCboxes) + getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets//nSlices, nSlices, nCboxes) return getCacheInfo.L3CacheInfo else: raise ValueError('invalid level') @@ -376,9 +375,9 @@ def getAddresses(level, wayID, cacheSetList, cBox=1, cSlice=0): L3SetToWayIDMap[cBox][cSlice][L3Set][i] = addr if not wayID in L3SetToWayIDMap[cBox][cSlice][L3Set]: if getCacheInfo(3).nSlices == getNCBoxUnits(): - L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, L3SetToWayIDMap[cBox][cSlice][L3Set].values()))) + L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values())))) else: - L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, L3SetToWayIDMap[cBox][cSlice][L3Set].values()))) + L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values())))) addresses.append(L3SetToWayIDMap[cBox][cSlice][L3Set][wayID]) return addresses @@ -404,16 +403,16 @@ def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False): for s in cacheSetsStr.split(','): if '-' in s: first, last = s.split('-')[:2] - cacheSetList += range(int(first), int(last)+1) + cacheSetList += list(range(int(first), int(last)+1)) else: cacheSetList.append(int(s)) else: nSets = getCacheInfo(level).nSets if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes): nHLSets = getCacheInfo(level-1).nSets - cacheSetList = range(nHLSets, nSets) + cacheSetList = list(range(nHLSets, nSets)) else: - cacheSetList = range(0, nSets) + cacheSetList = list(range(0, nSets)) return cacheSetList @@ -509,7 +508,7 @@ def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, def printNB(nb_result): for r in nb_result.items(): - print r[0] + ': ' + str(r[1]) + print(r[0] + ': ' + str(r[1])) def hasL3Conflicts(addresses, clearHLAddrList, codeOffset): diff --git a/tools/CacheAnalyzer/cacheSeq.py b/tools/CacheAnalyzer/cacheSeq.py index aa1deda..448093f 100755 --- a/tools/CacheAnalyzer/cacheSeq.py +++ b/tools/CacheAnalyzer/cacheSeq.py @@ -1,12 +1,12 @@ -#!/usr/bin/python -from itertools import count, cycle, islice -from collections import namedtuple, OrderedDict +#!/usr/bin/env python3 import argparse import sys +from itertools import count, cycle, islice +from collections import namedtuple, OrderedDict -from cacheLib import * import cacheSim +from cacheLib import * import logging log = logging.getLogger(__name__) @@ -37,7 +37,7 @@ def main(): policyClass = cacheSim.AllPolicies[args.sim] seq = args.seq_init + (' ' + args.seq) * args.loop hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets) / args.loop - print 'Hits: ' + str(hits) + print('Hits: ' + str(hits)) else: nb = runCacheExperiment(args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL), doNotUseOtherCBoxes=args.noUseOtherCBoxes, loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg) diff --git a/tools/CacheAnalyzer/cacheSim.py b/tools/CacheAnalyzer/cacheSim.py index 74414ac..511b69c 100755 --- a/tools/CacheAnalyzer/cacheSim.py +++ b/tools/CacheAnalyzer/cacheSim.py @@ -1,9 +1,7 @@ -#!/usr/bin/python import random from itertools import count from numpy import median - from cacheLib import * import logging @@ -85,7 +83,7 @@ class PLRUSim(ReplPolicySim): def updateIndexBits(self, accIndex): lastIdx = accIndex for level in reversed(range(0, len(self.bits))): - curIdx = lastIdx/2 + curIdx = lastIdx//2 self.bits[level][curIdx] = 1 - (lastIdx % 2) lastIdx = curIdx @@ -111,7 +109,7 @@ AllRandPLRUVariants = { class LRU_PLRU4Sim(ReplPolicySim): def __init__(self, assoc): - self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc/4)] + self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc//4)] self.PLRUOrdered = list(self.PLRUs) # from MRU to LRU def acc(self, block): @@ -299,9 +297,9 @@ CommonPolicies = { 'SRRIP': AllDetQLRUVariants['QLRU_H00_M2_R0_U0_UMO'], } -AllDetPolicies = dict(CommonPolicies.items() + AllDetQLRUVariants.items()) -AllRandPolicies = dict(AllRandQLRUVariants.items() + AllRandPLRUVariants.items()) -AllPolicies = dict(AllDetPolicies.items() + AllRandPolicies.items()) +AllDetPolicies = dict(list(CommonPolicies.items()) + list(AllDetQLRUVariants.items())) +AllRandPolicies = dict(list(AllRandQLRUVariants.items()) + list(AllRandPLRUVariants.items())) +AllPolicies = dict(list(AllDetPolicies.items()) + list(AllRandPolicies.items())) def parseCacheSetsStrSim(cacheSetsStr): @@ -312,7 +310,7 @@ def parseCacheSetsStrSim(cacheSetsStr): for s in cacheSetsStr.split(','): if '-' in s: first, last = s.split('-')[:2] - cacheSetList += range(int(first), int(last)+1) + cacheSetList += list(range(int(first), int(last)+1)) else: cacheSetList.append(int(s)) @@ -381,8 +379,8 @@ def getPermutations(policySimClass, assoc): initAges = getAges(initBlocks, seq, policySimClass, assoc) accSeqStr = 'Access sequence: ' + seq - print accSeqStr - print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}' + print(accSeqStr) + print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}') blocks = ['B' + str(i) for i in range(0, assoc)] baseSeq = ' '.join(initBlocks + blocks) @@ -390,8 +388,8 @@ def getPermutations(policySimClass, assoc): ages = getAges(blocks, baseSeq, policySimClass, assoc) accSeqStr = 'Access sequence: ' + baseSeq - print accSeqStr - print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}' + print(accSeqStr) + print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}') blocksSortedByAge = [a[0] for a in sorted(ages.items(), key=lambda x: -x[1])] # most recent block first @@ -408,5 +406,5 @@ def getPermutations(policySimClass, assoc): break perm[assoc-permAge] = bi - print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)) + print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))) diff --git a/tools/CacheAnalyzer/hitMiss.py b/tools/CacheAnalyzer/hitMiss.py index 0bd5666..117f0aa 100755 --- a/tools/CacheAnalyzer/hitMiss.py +++ b/tools/CacheAnalyzer/hitMiss.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import argparse import sys @@ -32,10 +33,10 @@ def main(): seq = re.sub('[?!]', '', ' '.join([args.seq_init, args.seq])).strip() + '?' hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets) if hits > 0: - print 'HIT' + print('HIT') exit(1) else: - print 'MISS' + print('MISS') exit(0) else: setCount = len(parseCacheSetsStr(args.level, True, args.sets)) @@ -43,10 +44,10 @@ def main(): nb = runCacheExperiment(args.level, seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL), loop=args.loop, wbinvd=(not args.noWbinvd)) if nb['L' + str(args.level) + '_HIT']/setCount > .5: - print 'HIT' + print('HIT') exit(1) else: - print 'MISS' + print('MISS') exit(0) diff --git a/tools/CacheAnalyzer/permPolicy.py b/tools/CacheAnalyzer/permPolicy.py index 951ecb1..a495773 100755 --- a/tools/CacheAnalyzer/permPolicy.py +++ b/tools/CacheAnalyzer/permPolicy.py @@ -1,20 +1,20 @@ -#!/usr/bin/python -from itertools import count -from collections import namedtuple, OrderedDict +#!/usr/bin/env python3 import argparse import math import os +import plotly.graph_objects as go import re import subprocess import sys +from itertools import count +from collections import namedtuple, OrderedDict from plotly.offline import plot -import plotly.graph_objects as go +import cacheSim from cacheLib import * from cacheGraph import * -import cacheSim import logging log = logging.getLogger(__name__) @@ -36,8 +36,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non cBox=cBox, cSlice=cSlice) accSeqStr = 'Access sequence: ' + seq - print accSeqStr - print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}' + print(accSeqStr) + print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}') event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent) traces = [(b, [nb[event] for nb in nbDict[b]]) for b in initBlocks] @@ -52,8 +52,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non cBox=cBox, cSlice=cSlice) accSeqStr = 'Access sequence: ' + baseSeq - print accSeqStr - print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}' + print(accSeqStr) + print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}') event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent) traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks] @@ -77,7 +77,7 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non break perm[assoc-permAge] = bi - print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)) + print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))) def main(): diff --git a/tools/CacheAnalyzer/replPolicy.py b/tools/CacheAnalyzer/replPolicy.py index d3b5873..5e69e99 100755 --- a/tools/CacheAnalyzer/replPolicy.py +++ b/tools/CacheAnalyzer/replPolicy.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import argparse import random import sys @@ -23,7 +24,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s seq = initSeq + ' '.join(seqPrefix) actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements) sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets) - print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim) + print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)) if sim != actual: break @@ -32,7 +33,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s seq = initSeq + ' '.join(tmpPrefix) actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements) sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets) - print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim) + print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)) if sim != actual: seqPrefix = tmpPrefix @@ -115,7 +116,7 @@ def main(): for seq in seqList: fullSeq = ((args.initSeq + ' ') if args.initSeq else '') + seq - print fullSeq + print(fullSeq) html += ['' + fullSeq + ''] actualHits = set([getActualHits(fullSeq, args.level, args.sets, cBox, args.slice, args.nMeasurements) for _ in range(0, args.rep)]) @@ -151,14 +152,14 @@ def main(): html += [''] if not args.randPolicies and not args.best: - print 'Possible policies: ' + ', '.join(possiblePolicies) + print('Possible policies: ' + ', '.join(possiblePolicies)) if not possiblePolicies: break if not args.randPolicies and args.findCtrEx: - print '' - print 'Counter example(s): ' + print('') + print('Counter example(s):') for p, ctrEx in counterExamples.items(): - print ' ' + p + ': ' + ctrEx + print(' ' + p + ': ' + ctrEx) html += ['', '', ''] @@ -166,10 +167,10 @@ def main(): f.write('\n'.join(html)) if not args.randPolicies and not args.best: - print 'Possible policies: ' + ', '.join(possiblePolicies) + print('Possible policies: ' + ', '.join(possiblePolicies)) else: for p, d in reversed(sorted(dists.items(), key=lambda d: d[1])): - print p + ': ' + str(d) + print(p + ': ' + str(d)) if __name__ == "__main__": diff --git a/tools/CacheAnalyzer/setDueling.py b/tools/CacheAnalyzer/setDueling.py index 230b5eb..8f73478 100755 --- a/tools/CacheAnalyzer/setDueling.py +++ b/tools/CacheAnalyzer/setDueling.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import argparse import random @@ -31,7 +32,7 @@ def main(): nCBoxes = max(1, getNCBoxUnits()) nSlicesPerCBox = 1 if getCacheInfo(3).nSlices: - nSlicesPerCBox = getCacheInfo(3).nSlices / getCacheInfo(3).nCboxes + nSlicesPerCBox = getCacheInfo(3).nSlices // getCacheInfo(3).nCboxes seqLength = (args.length if args.length is not None else assoc+1) seq = ' '.join('B' + str(i) + '?' for i in range(0, seqLength)) @@ -42,7 +43,7 @@ def main(): html = ['', '', '' + title + '', '', '', ''] html += ['

' + title + '

'] - setsForSlice = {cBox: {cSlice: range(0,nL3Sets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)} + setsForSlice = {cBox: {cSlice: list(range(0,nL3Sets)) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)} L3HitsDict = {cBox: {cSlice: [[] for s in range(0, nL3Sets)] for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)} prevOti = '' @@ -69,11 +70,11 @@ def main(): nMeasurements=args.nMeasurements, agg='med') if nb['L1_MISS'] < seqLength - .2: - print 'Hit in L1' + print('Hit in L1') continue if nb['L2_MISS'] < seqLength - .2: - print 'Hit in L2' + print('Hit in L2') continue L3Hits.append(nb['L3_HIT']) @@ -121,7 +122,7 @@ def main(): with open(args.output ,'w') as f: f.write('\n'.join(html)) - print 'Output written to ' + args.output + print('Output written to ' + args.output) if __name__ == "__main__": diff --git a/tools/CacheAnalyzer/strideGraph.py b/tools/CacheAnalyzer/strideGraph.py index 4ef5109..d6cc569 100755 --- a/tools/CacheAnalyzer/strideGraph.py +++ b/tools/CacheAnalyzer/strideGraph.py @@ -1,9 +1,9 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import argparse import math - -from plotly.offline import plot import plotly.graph_objects as go +from plotly.offline import plot from cacheLib import * @@ -28,9 +28,9 @@ def main(): while pt <= args.endSize*1024: tickvals.append(pt) for x in ([int(math.pow(2, math.log(pt, 2) + i/16.0)) for i in range(0,16)] if pt < args.endSize*1024 else [pt]): - print x/1024 + print(x//1024) xValues.append(str(x)) - addresses = range(0, x, args.stride) + addresses = list(range(0, x, args.stride)) nAddresses.append(len(addresses)) ec = getCodeForAddressLists([AddressList(addresses, False, False, False)], wbinvd=True) nbDicts.append(runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit)) @@ -57,7 +57,7 @@ def main(): with open(args.output ,'w') as f: f.write('\n'.join(html)) - print 'Graph written to ' + args.output + print('Graph written to ' + args.output) if __name__ == "__main__": main() diff --git a/tools/cpuBench/addAMDDocToXML.py b/tools/cpuBench/addAMDDocToXML.py index a624aa1..212c39d 100755 --- a/tools/cpuBench/addAMDDocToXML.py +++ b/tools/cpuBench/addAMDDocToXML.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + from collections import namedtuple import xml.etree.ElementTree as ET from xml.dom import minidom @@ -69,13 +70,13 @@ def main(): iclassAsmDict.setdefault(re.sub('{.*} ', '', asm), set()).add(instrNode) #for x in set(op for de in docList for op in de.operands): - # print x + # print(x) xmlToDocDict = dict() for de in sorted(docEntrySet): if de.mnemonic not in iclassAsmDict: - print 'no XML entry found for ' + str(de) + print('no XML entry found for ' + str(de)) xmlFound = False for instrNode in iclassAsmDict[de.mnemonic]: @@ -135,15 +136,15 @@ def main(): elif (set(de.operands) == {None}) and (set(xmlToDocDict[instrNode].operands) != {None}): pass else: - print 'duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de)) + print('duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de))) else: xmlFound = True xmlToDocDict[instrNode] = de if not xmlFound: - print 'no matching XML entry found for ' + str(de) + print('no matching XML entry found for ' + str(de)) - print 'Found data for ' + str(len(xmlToDocDict)) + ' instruction variants' + print('Found data for ' + str(len(xmlToDocDict)) + ' instruction variants') for instrNode, de in xmlToDocDict.items(): archNode = instrNode.find('./architecture[@name="{}"]'.format(args.arch)) diff --git a/tools/cpuBench/addDocToXML.py b/tools/cpuBench/addDocToXML.py index bb78144..093b405 100755 --- a/tools/cpuBench/addDocToXML.py +++ b/tools/cpuBench/addDocToXML.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + from collections import namedtuple import xml.etree.ElementTree as ET from xml.dom import minidom @@ -43,9 +44,9 @@ def main(): matchingDEs.remove(de) if len(matchingDEs) == 0: - print 'No matching iform: ' + iform + print('No matching iform: ' + iform) elif len(matchingDEs) > 1: - print 'Multiple matching iforms: ' + iform + print('Multiple matching iforms: ' + iform) else: de = next(iter(matchingDEs)) diff --git a/tools/cpuBench/addURLsToXML.py b/tools/cpuBench/addURLsToXML.py index ade794b..bb4711a 100755 --- a/tools/cpuBench/addURLsToXML.py +++ b/tools/cpuBench/addURLsToXML.py @@ -1,8 +1,9 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import xml.etree.ElementTree as ET import argparse import re -import urllib +import urllib.request from xml.dom import minidom from utils import * @@ -12,7 +13,7 @@ def main(): parser.add_argument("output", help="Output XML file") args = parser.parse_args() - html = urllib.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*') + html = urllib.request.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*') lines = re.findall('href="\./(.*?)">(.*?).*?(.*?)', html) # Example: ('ADC.html', 'ADC', 'Add with Carry'), lineDict = {(line[0],line[1]):line for line in lines} @@ -128,7 +129,7 @@ def main(): matchingLines.append(line) if len(matchingLines) > 1: - print 'Duplicate link found for ' + iclass + print('Duplicate link found for ' + iclass) exit(1) instrNode.attrib['url'] = 'uops.info/html-instr/' + canonicalizeInstrString(instrNode.attrib['string']) + '.html' diff --git a/tools/cpuBench/compareMeasurementsToOther.py b/tools/cpuBench/compareMeasurementsToOther.py index 9175013..d22ec85 100755 --- a/tools/cpuBench/compareMeasurementsToOther.py +++ b/tools/cpuBench/compareMeasurementsToOther.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import xml.etree.ElementTree as ET import argparse import sys @@ -66,13 +67,13 @@ def main(): else: portsDiff = True nPortsDiff += 1 - if args.verbose: print 'PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts) + if args.verbose: print('PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts)) else: nPortsMeasurementOnly += 1 else: if otherPorts: nPortsOtherOnly += 1 - if args.verbose: print 'PortsOtherOnly: ' + instrNode.attrib['string'] + if args.verbose: print('PortsOtherOnly: ' + instrNode.attrib['string']) otherUops = [v for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('uops') and v.replace('.','',1).isdigit()] mUops = ([v for a,v in measurementNode.attrib.items() if a.startswith('uops') and not 'retire_slots' in a] if measurementNode is not None else []) @@ -86,13 +87,13 @@ def main(): nUopsEqPortsDiff += int(portsDiff) else: nUopsDiff += 1 - if args.verbose: print 'UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops) + if args.verbose: print('UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops)) else: nUopsMeasurementOnly += 1 else: if otherUops: nUopsOtherOnly += 1 - if args.verbose: print 'UopsOtherOnly: ' + instrNode.attrib['string'] + if args.verbose: print('UopsOtherOnly: ' + instrNode.attrib['string']) otherLatencies = [float(v) for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('latency') and v.replace('.','',1).isdigit()] @@ -113,54 +114,54 @@ def main(): nLatUBClose += 1 else: nLatUBIncorrect += 1 - if args.verbose: print 'LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies) + if args.verbose: print('LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)) else: nLatNoUB += 1 if maxLat in otherLatencies: nLatNoUBMaxEq += 1 else: nLatNoUBMaxDiff += 1 - if args.verbose: print 'LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies) + if args.verbose: print('LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)) else: nLatMeasurementOnly += 1 else: if otherLatencies: nLatOtherOnly += 1 - if args.verbose: print 'LatOtherOnly: ' + instrNode.attrib['string'] + if args.verbose: print('LatOtherOnly: ' + instrNode.attrib['string']) - print 'Ports:' - print ' Measurement data only: ' + str(nPortsMeasurementOnly) - print ' Other data only: ' + str(nPortsOtherOnly) - print ' Both: ' + str(nPortsBoth) - print ' Eq: ' + str(nPortsEq) - print ' Diff: ' + str(nPortsDiff) - print '' + print('Ports:') + print(' Measurement data only: ' + str(nPortsMeasurementOnly)) + print(' Other data only: ' + str(nPortsOtherOnly)) + print(' Both: ' + str(nPortsBoth)) + print(' Eq: ' + str(nPortsEq)) + print(' Diff: ' + str(nPortsDiff)) + print('') - print 'Uops:' - print ' Measurement data only: ' + str(nUopsMeasurementOnly) - print ' Other data only: ' + str(nUopsOtherOnly) - print ' Both: ' + str(nUopsBoth) - print ' Eq: ' + str(nUopsEq) - print ' PortsEq: ' + str(nUopsEqPortsEq) - print ' PortsDiff: ' + str(nUopsEqPortsDiff) - print ' Diff: ' + str(nUopsDiff) - print '' + print('Uops:') + print(' Measurement data only: ' + str(nUopsMeasurementOnly)) + print(' Other data only: ' + str(nUopsOtherOnly)) + print(' Both: ' + str(nUopsBoth)) + print(' Eq: ' + str(nUopsEq)) + print(' PortsEq: ' + str(nUopsEqPortsEq)) + print(' PortsDiff: ' + str(nUopsEqPortsDiff)) + print(' Diff: ' + str(nUopsDiff)) + print('') - print 'Latency:' - print ' Measurement data only: ' + str(nLatMeasurementOnly) - print ' Other data only: ' + str(nLatOtherOnly) - print ' Both: ' + str(nLatBoth) - print ' Exact: ' + str(nLatNoUB) - print ' Eq (Max): ' + str(nLatNoUBMaxEq) - print ' Diff (Max): ' + str(nLatNoUBMaxDiff) - print ' Upper Bound: ' + str(nLatUB) - print ' Correct: ' + str(nLatUBCorrect) - print ' Exact: ' + str(nLatUBExact) - print ' Close: ' + str(nLatUBClose) - print ' Incorrect: ' + str(nLatUBIncorrect) - print '' + print('Latency:') + print(' Measurement data only: ' + str(nLatMeasurementOnly)) + print(' Other data only: ' + str(nLatOtherOnly)) + print(' Both: ' + str(nLatBoth)) + print(' Exact: ' + str(nLatNoUB)) + print(' Eq (Max): ' + str(nLatNoUBMaxEq)) + print(' Diff (Max): ' + str(nLatNoUBMaxDiff)) + print(' Upper Bound: ' + str(nLatUB)) + print(' Correct: ' + str(nLatUBCorrect)) + print(' Exact: ' + str(nLatUBExact)) + print(' Close: ' + str(nLatUBClose)) + print(' Incorrect: ' + str(nLatUBIncorrect)) + print('') - print 'Throughput:' + print('Throughput:') for TP_m, TP_o in [('TP', 'TP'), ('TP_ports', 'TP'), ('TP', 'TP_ports'), ('TP_ports', 'TP_ports')]: nTPMeasurementOnly = 0 nTPOtherOnly = 0 @@ -184,28 +185,28 @@ def main(): nTPEq += 1 else: nTPDiff += 1 - if args.verbose: print 'TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs) + if args.verbose: print('TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)) diff = min(abs(float(m)-float(o)) for o in otherTPs for m in mTPs) if diff <= .1: nTPClose += 1 else: nTPNotClose += 1 - if args.verbose: print 'TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs) + if args.verbose: print('TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)) else: nTPMeasurementOnly += 1 else: if otherTPs: nTPOtherOnly += 1 - if args.verbose: print 'TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string']) + if args.verbose: print('TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string'])) - print ' {} (measurements) - {} (other):'.format(TP_m, TP_o) - print ' Measurement data only: ' + str(nTPMeasurementOnly) - print ' Other data only: ' + str(nTPOtherOnly) - print ' Both: ' + str(nTPBoth) - print ' Eq: ' + str(nTPEq) - print ' Diff: ' + str(nTPDiff) - print ' Close: ' + str(nTPClose) - print ' NotClose: ' + str(nTPNotClose) + print(' {} (measurements) - {} (other):'.format(TP_m, TP_o)) + print(' Measurement data only: ' + str(nTPMeasurementOnly)) + print(' Other data only: ' + str(nTPOtherOnly)) + print(' Both: ' + str(nTPBoth)) + print(' Eq: ' + str(nTPEq)) + print(' Diff: ' + str(nTPDiff)) + print(' Close: ' + str(nTPClose)) + print(' NotClose: ' + str(nTPNotClose)) if __name__ == "__main__": main() diff --git a/tools/cpuBench/compareXML.py b/tools/cpuBench/compareXML.py index 2766e13..0781a32 100755 --- a/tools/cpuBench/compareXML.py +++ b/tools/cpuBench/compareXML.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import xml.etree.ElementTree as ET from xml.dom import minidom import argparse @@ -29,7 +30,7 @@ def main(): for instrStr in sorted(instrNodeDict1): instrNode1 = instrNodeDict1[instrStr] if not instrStr in instrNodeDict2: - print 'No matching entry found for ' + instrStr + print('No matching entry found for ' + instrStr) continue instrNode2 = instrNodeDict2[instrStr] for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'): @@ -40,44 +41,43 @@ def main(): if tp1 != tp2: tpDiff += 1 - print instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2) + print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2)) if args.lat: for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')): - latStr1 = ET.tostring(latNode1, encoding='utf-8').strip() - latStr2 = ET.tostring(latNode2, encoding='utf-8').strip() + latStr1 = ET.tostring(latNode1, encoding='utf-8').decode().strip() + latStr2 = ET.tostring(latNode2, encoding='utf-8').decode().strip() if latStr1 != latStr2: latDiff += 1 - print instrStr - print ' ' + latStr1 - print ' ' + latStr2 + print(' ' + latStr1) + print(' ' + latStr2) if args.ports: p1 = mNode1.attrib.get('ports', '') p2 = mNode2.attrib.get('ports', '') if p1 != p2: portsDiff += 1 - print instrStr + ' - P1: ' + p1 + ' - P2: ' + p2 + print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2) if not args.TP and not args.lat and not args.ports: - xmlStr1 = ET.tostring(mNode1, encoding='utf-8').strip() - xmlStr2 = ET.tostring(mNode2, encoding='utf-8').strip() + xmlStr1 = ET.tostring(mNode1, encoding='utf-8').decode().strip() + xmlStr2 = ET.tostring(mNode2, encoding='utf-8').decode().strip() if xmlStr1 != xmlStr2: - print '-------------------------------' - print instrStr - print xmlStr1 - print xmlStr2 - print '-------------------------------' + print('-------------------------------') + print(instrStr) + print(xmlStr1) + print(xmlStr2) + print('-------------------------------') if args.TP: - print 'TPDiff: ' + str(tpDiff) + print('TPDiff: ' + str(tpDiff)) if args.lat: - print 'LatDiff: ' + str(latDiff) + print('LatDiff: ' + str(latDiff)) if args.ports: - print 'portsDiff: ' + str(portsDiff) + print('portsDiff: ' + str(portsDiff)) if __name__ == "__main__": main() diff --git a/tools/cpuBench/cpuBench.py b/tools/cpuBench/cpuBench.py index 466d6b3..93254c1 100755 --- a/tools/cpuBench/cpuBench.py +++ b/tools/cpuBench/cpuBench.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import xml.etree.ElementTree as ET from xml.etree.ElementTree import Element, SubElement, Comment, tostring from xml.dom import minidom @@ -79,7 +80,7 @@ def getIndexReg(instrNode, opNode): # registers that are not used as implicit registers should come first; RAX (and parts of it) should come last, as some instructions have special encodings for that # prefer low registers to high registers def sortRegs(regsList): - return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, map(int, re.findall('\d+',r)), r)) + return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, list(map(int, re.findall('\d+',r))), r)) # Initialize registers and memory @@ -115,7 +116,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr): init += ['MOV {}, 0'.format(reg)] elif 'MM' in regPrefix and xtype.startswith('f'): init += ['MOV RAX, 0x4000000040000000'] - for i in range(0, getRegSize(reg)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX'] + for i in range(0, getRegSize(reg)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX'] if isAVXInstr(instrNode): init += ['VMOVUPD ' + reg + ', [R14]'] @@ -128,7 +129,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr): elif opNode.attrib['type'] == 'mem': if xtype.startswith('f'): init += ['MOV RAX, 0x4000000040000000'] - for i in range(0, int(opNode.attrib['width'])/8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX'] + for i in range(0, int(opNode.attrib['width'])//8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX'] for opNode in instrNode.findall('./operand[@type="mem"]'): if opNode.attrib.get('suppressed', '0') == '1': continue @@ -179,7 +180,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0, initObjFile = None lateInitObjFile=None if initCode: - if debugOutput: print 'init: ' + initCode + if debugOutput: print('init: ' + initCode) objFile = '/tmp/ramdisk/init.o' if useLateInit: lateInitObjFile = objFile @@ -191,7 +192,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0, localHtmlReports.append('
  • Init:
    ' + re.sub(';[ \t]*(.)', r';\n\1', initCode) + '
  • \n') localHtmlReports.append('
  • Show nanoBench command
  • \n') - if debugOutput: print nanoBenchCmd + if debugOutput: print(nanoBenchCmd) setNanoBenchParameters(unrollCount=unrollCount, loopCount=loopCount, warmUpCount=warmUpCount, basicMode=basicMode) @@ -223,19 +224,19 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0, if maxRepeat>0: if any(v<-0.05 for v in ret.values()): - print 'Repeating experiment because there was a value < 0' + print('Repeating experiment because there was a value < 0') return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1) #sumPortUops = sum(v for e,v in ret.items() if 'PORT' in e and not '4' in e) #if (sumPortUops % 1) > .2 and (sumPortUops % 1) < .8: - # print 'Repeating experiment because the sum of the port usages is not an integer' - # print ret + # print('Repeating experiment because the sum of the port usages is not an integer') + # print(ret) # return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=basicMode, htmlReports=htmlReports, maxRepeat=maxRepeat-1) if any('PORT' in e for e in ret): maxPortUops = max(v/(len(e)-9) for e,v in ret.items() if 'PORT' in e) if maxPortUops * .98 > ret['Core cycles']: - print 'Repeating experiment because there were more uops on a port than core cycles' + print('Repeating experiment because there were more uops on a port than core cycles') return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1) if htmlReports is not None: @@ -250,10 +251,10 @@ def writeFile(fileName, content): def getMachineCode(objFile): try: - machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile]) + machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile]).decode() return machineCode.partition('<.text>:\n')[2] except subprocess.CalledProcessError as e: - print "Error (getMachineCode): " + str(e) + print('Error (getMachineCode): ' + str(e)) def getCodeLength(asmCode): @@ -420,7 +421,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None, ignoreRegs |= set(doNotWriteRegs)|globalDoNotWriteRegs|set(opRegDict.values()) if operandNode.attrib.get('r', '0') == '1': ignoreRegs |= set(doNotReadRegs)|writtenRegs|readRegs|set(opRegDict.values()) - regsList = filter(lambda x: not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs), regsList) + regsList = [x for x in regsList if not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs)] if not regsList: return None; reg = sortRegs(regsList)[0] @@ -507,7 +508,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None, def createIacaAsmFile(fileName, prefixInstr, prefixRep, instr): asm = '.intel_syntax noprefix\n .byte 0x0F, 0x0B; mov ebx, 111; .byte 0x64, 0x67, 0x90\n' if prefixInstr: - for i in xrange(prefixRep): + for i in range(prefixRep): asm += prefixInstr + "\n" asm += instr + "\n" asm += "1:\n" @@ -521,9 +522,9 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr readRegs = instrInstance.readRegs writtenRegs = instrInstance.writtenRegs - if debugOutput: print ' instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs) + if debugOutput: print(' instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs)) blockInstrsList = getIndependentInstructions(blockInstrNode, True, False, writtenRegs|readRegs, writtenRegs|readRegs, 64) - if debugOutput: print ' bIL: ' + str(blockInstrsList) + if debugOutput: print(' bIL: ' + str(blockInstrsList)) htmlReports.append('

    With blocking instructions for port' + ('s {' if len(blockedPorts)>1 else ' ') + @@ -537,11 +538,11 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: - print "Error: " + e.output + print('Error: ' + e.output) return None if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut: - print "IACA error" + print('IACA error') return None allPortsLine = re.search('\| Cycles \|.*', iacaOut).group(0) @@ -584,7 +585,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr blockInstrAsm = ';'.join(islice(cycle(x.asm for x in blockInstrsList), blockInstrRep)) - unrollCount = 1000/blockInstrRep # make sure that instrs. fit into icache + unrollCount = 1000//blockInstrRep # make sure that instrs. fit into icache if isAMDCPU(): unrollCount = max(unrollCount, 100) # ZEN+ sometimes undercounts FP usage if code is short @@ -596,7 +597,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr if float(measurementResult['Core cycles']) < -10: #something went wrong; this happens for example on HSW with long sequences of JMP instructions - if debugOutput: print "Core cycles < -10 in getUopsOnBlockedPorts" + if debugOutput: print('Core cycles < -10 in getUopsOnBlockedPorts') if sum(u for p, u in measurementResult.items() if ('UOPS_PORT' in p or 'FpuPipeAssignment.Total' in p)) < blockInstrRep-.5: # something went wrong; fewer uops on ports than blockInstrRep @@ -643,7 +644,7 @@ def getIndependentInstructions(instrNode, useDistinctRegs, useIndexedAddr, doNot maxMemWidth = 0 for memNode in instrNode.findall('./operand[@type="mem"][@w="1"]'): - maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0'))/8) + maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0')) // 8) offset += maxMemWidth independentInstructions.append(instrI) @@ -694,17 +695,17 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports): subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: - print "Error: " + e.output + print('Error: ' + e.output) return None if debugOutput: - print instrNode.attrib['iform'] + ' - NoInteriteration' - print iaca_tp + print(instrNode.attrib['iform'] + ' - NoInteriteration') + print(iaca_tp) htmlReports.append('
    ' + iaca_tp + '
    \n') if not iaca_tp or ' !' in iaca_tp or ' X' in iaca_tp or ' 0X' in iaca_tp or not 'Total Num Of Uops' in iaca_tp: - print "IACA error" + print('IACA error') return None cycles = float(iaca_tp.split('\n')[3].split()[2]) @@ -958,7 +959,7 @@ def getTPConfigsForDiv(instrNode): if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM' config.init = ['MOV RAX, ' + arg] - for i in range(0, getRegSize(regType)/8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX'] + for i in range(0, getRegSize(regType)//8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX'] targetRegIdx = min(int(opNode.attrib['idx']) for opNode in instrNode.findall('./operand') if opNode.text and regType in opNode.text) if memDivisor: @@ -997,11 +998,11 @@ TPResult = namedtuple('TPResult', ['TP', 'TP_loop', 'TP_noLoop', 'TP_noDepBreaki def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports): configs = getTPConfigs(instrNode, useDistinctRegs, useIndexedAddr) - minTP = sys.maxint - minTP_loop = sys.maxint - minTP_noLoop = sys.maxint - minTP_noDepBreaking_noLoop = sys.maxint - minTP_single = sys.maxint + minTP = sys.maxsize + minTP_loop = sys.maxsize + minTP_noLoop = sys.maxsize + minTP_noDepBreaking_noLoop = sys.maxsize + minTP_single = sys.maxsize if useIACA: config = configs[0] # consider only first config as IACA does not seem to consider different values in registers @@ -1024,17 +1025,17 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: logging.warn('Error: ' + e.output) - if minTP != sys.maxint: + if minTP != sys.maxsize: htmlReports.append('
    ' + e.output + '
    \n') continue # on SNB, IACA 2.2 crashes on only some (larger) inputs else: return None if not iaca_out or ' ! ' in iaca_out or ' X ' in iaca_out or ' 0X ' in iaca_out or not 'Total Num Of Uops' in iaca_out: - print "IACA error" + print('IACA error') return None - print instrNode.attrib['iform'] + ' - throughput' + print(instrNode.attrib['iform'] + ' - throughput') htmlReports.append('
    ' + iaca_out + '
    \n') @@ -1087,7 +1088,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports instrIList = config.independentInstrs instrLen = getCodeLength(instrIList[0].asm) for ic in sorted(set([1, min(4, len(instrIList)), min(8, len(instrIList)), len(instrIList)])): - if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break + if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break if len(instrIList) > 1: htmlReports.append('

    With ' + str(ic) + ' independent instruction' + ('s' if ic>1 else '') + '

    \n') htmlReports.append('
    \n') @@ -1095,7 +1096,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports init = list(chain.from_iterable(i.regMemInit for i in instrIList[0:ic])) + config.init for useDepBreakingInstrs in ([False, True] if config.depBreakingInstrs else [False]): - if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break + if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break depBreakingInstrs = '' if useDepBreakingInstrs: @@ -1103,7 +1104,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports htmlReports.append('

    With additional dependency-breaking instructions

    \n') for repType in ['unrollOnly', 'loopSmall', 'loopBig']: - if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break + if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break paddingTypes = [''] if ((repType != 'unrollOnly') and (uopsMITE is not None) and (not uopsMS) and (math.ceil(32.0/instrLen) * uopsMITE > 18) @@ -1138,7 +1139,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports else: loopCount = 100 unrollCount *= 10 - if minTP < sys.maxint and minTP > 100: + if minTP < sys.maxsize and minTP > 100: unrollCount = 1 loopCount = 10 @@ -1162,7 +1163,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports #if any('PORT' in e for e in result): # maxPortUops = max(v/(len(e)-9) for e,v in result.items() if e.startswith('UOPS_PORT') and not '4' in e) # if maxPortUops * .98 > result['Core cycles']: - # print 'More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles']) + # print('More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles'])) # #invalid = True #if not invalid: @@ -1174,7 +1175,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports else: minTP_loop = min(minTP_loop, cycles) - if ic == 1 and (minTP == sys.maxint or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly': + if ic == 1 and (minTP == sys.maxsize or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly': minConfig = config minTP_single = min(minTP_single, cycles) @@ -1217,7 +1218,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports htmlReports.append('
    ') - if minTP < sys.maxint: + if minTP < sys.maxsize: return TPResult(minTP, minTP_loop, minTP_noLoop, minTP_noDepBreaking_noLoop, minTP_single, uops, uopsFused, uopsMITE, uopsMS, divCycles, ILD_stalls, complexDec, nAvailableSimpleDecoders, minConfig, ports_dict) @@ -1246,7 +1247,7 @@ def getBasicLatencies(instrNodeList): movsxResult = runExperiment(instrNodeDict['MOVSXD (R64, R32)'], 'MOVSX RAX, EAX') movsxCycles = int(round(movsxResult['Core cycles'])) if movsxCycles != 1: - print 'Latency of MOVSX must be 1' + print('Latency of MOVSX must be 1') sys.exit() basicLatency['MOVSX'] = movsxCycles @@ -1275,7 +1276,7 @@ def getBasicLatencies(instrNodeList): testSetResult = runExperiment(None, 'TEST AL, AL; SET' + flag[0] + ' AL') testSetCycles = int(round(testSetResult['Core cycles'])) if not testSetCycles == 2: - print 'Latencies of TEST and SET' + flag[0] + ' must be 1' + print('Latencies of TEST and SET' + flag[0] + ' must be 1') sys.exit() basicLatency['SET' + flag[0]] = 1 basicLatency['TEST'] = 1 @@ -1297,7 +1298,7 @@ def getBasicLatencies(instrNodeList): result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0') basicLatency[instr] = int(round(result['Core cycles'])) - if filter(lambda x: x.findall('[@iclass="VANDPS"]'), instrNodeList): + if any(x for x in instrNodeList if x.findall('[@iclass="VANDPS"]')): for instr in ['VANDPS', 'VANDPD', 'VORPS', 'VORPD', 'VPAND', 'VPOR']: result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, XMM)'], instr + ' XMM1, XMM1, XMM1') basicLatency[instr] = int(round(result['Core cycles'])) @@ -1310,7 +1311,7 @@ def getBasicLatencies(instrNodeList): result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0') basicLatency[instr] = int(round(result['Core cycles'])) - if filter(lambda x: x.findall('[@extension="AVX512EVEX"]'), instrNodeList): + if any(x for x in instrNodeList if x.findall('[@extension="AVX512EVEX"]')): kmovq_result = runExperiment(instrNodeDict['KMOVQ (K, K)'], 'KMOVQ K1, K1') basicLatency['KMOVQ'] = int(round(kmovq_result['Core cycles'])) @@ -1321,7 +1322,7 @@ def getBasicLatencies(instrNodeList): basicLatency['VMOVUPS_' + regType + '_' + 'K'] = vmovups_cycles if not vmovups_uops == 1: - print 'VMOVUPS must have exactly 1 uop' + print('VMOVUPS must have exactly 1 uop') sys.exit() vpmovq2m_result = runExperiment(instrNodeDict['VPMOVQ2M (K, ' + regType + ')'], @@ -1337,7 +1338,7 @@ def getBasicLatencies(instrNodeList): mov_10movsx_mov_result = runExperiment(None, 'mov ' + reg + ', [r14];' + ';'.join(10*['MOVSX R12, R12w']) + '; mov [r14], ' + reg , unrollCount=100) basicLatency['MOV_10MOVSX_MOV_'+str(memWidth)] = int(round(mov_10movsx_mov_result['Core cycles'])) - print 'Basic Latencies: ' + str(basicLatency) + print('Basic Latencies: ' + str(basicLatency)) # Returns a dict {opNode: instr}, s.t. opNode is both read and written, and instr breaks the dependency # Returns a list of dependency breaking instructions for operands that are both read and written (with the exception of ignoreOperand, if specified). @@ -1541,8 +1542,8 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep): init = ['MOV RAX, ' + dividend] init += ['MOV RBX, ' + divisor] - for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RBX'] - for i in range(64, 64+getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX'] + for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RBX'] + for i in range(64, 64+getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX'] if instrNode.attrib['iclass'] in ['DIVSS', 'DIVPS', 'DIVSD', 'DIVPD']: init += ['MOVUP' + dataType + ' XMM1, [R14+64]'] @@ -1671,7 +1672,7 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep): if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM' init = ['MOV RAX, ' + arg] - for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX'] + for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX'] targetReg = regType + '0' sourceBaseReg = regType + '1' @@ -1778,7 +1779,7 @@ def getLatConfigsFromMemToReg(instrNode, instrI, memOpNode, targetReg, addrReg, if memOpNode.attrib['width'] != chainOpNode1.attrib['width']: continue if memOpNode.attrib.get('VSIB', '') != chainOpNode1.attrib.get('VSIB', ''): continue - for chainOpNode2 in filter(lambda x: targetReg in x.text.split(','), chainInstrNode.findall('./operand[@type="reg"][@w="1"]')): + for chainOpNode2 in [x for x in chainInstrNode.findall('./operand[@type="reg"][@w="1"]') if targetReg in x.text.split(',')]: if chainOpNode2.attrib.get('optional', '') == '1': continue chainsInstr = getInstrInstanceFromNode(chainInstrNode, [targetReg], [targetReg], True, {int(chainOpNode2.attrib['idx']):targetReg}).asm result.append(LatConfig(instrI, chainInstrs=chainsInstr, chainLatency=1)) @@ -1971,7 +1972,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem else: if len(regs2) == 1: reg2 = sortRegs(regs2)[0] - otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg2), regs1) + otherRegs = [x for x in regs1 if getCanonicalReg(x) != getCanonicalReg(reg2)] if otherRegs: reg1 = sortRegs(otherRegs)[0] else: @@ -1988,7 +1989,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem reg2 = r break else: - otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg1), regs2) + otherRegs = [x for x in regs2 if getCanonicalReg(x) != getCanonicalReg(reg1)] if otherRegs: reg2 = sortRegs(otherRegs)[0] @@ -2053,7 +2054,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem chainInstrInt, chainLatencyInt = getChainInstrForVectorRegs(instrNode, reg2, reg1, cRep, 'Int') configList.append(LatConfig(instrI, chainInstrs=chainInstrInt, chainLatency=chainLatencyInt)) else: - print 'invalid reg prefix: ' + reg1Prefix + print('invalid reg prefix: ' + reg1Prefix) return None else: configList.isUpperBound = True @@ -2143,7 +2144,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem configList.extend(getLatConfigsFromMemToReg(instrNode, instrI, targetNode, reg, addrReg, cRep)) else: # ToDo - print 'unsupported reg to mem' + print('unsupported reg to mem') return None elif startNode.attrib['type'] == 'flags': ################# @@ -2225,7 +2226,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem if suppressedStart: if not regs.issubset(GPRegs): - print 'read from suppressed mem to non-GPR reg not yet supported' + print('read from suppressed mem to non-GPR reg not yet supported') return None instrI = getInstrInstanceFromNode(instrNode, [addrReg, indexReg, 'R12'], [addrReg, indexReg, 'R12'], useDistinctRegs, {targetNodeIdx:reg}, @@ -2358,11 +2359,11 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports): subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o']) iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: - print "Error: " + e.output + print('Error: ' + e.output) return None if '!' in iaca_lat or not 'Latency' in iaca_lat: - print "IACA error" + print('IACA error') return None latency = iaca_lat.split('\n')[3].split()[1] @@ -2444,7 +2445,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports): latConfigLists = getLatConfigLists(instrNode, opNode1, opNode2, useDistinctRegs, addrMem, tpDict) if latConfigLists is None: continue - minLat = sys.maxint + minLat = sys.maxsize maxLat = 0 minLatIsUpperBound = False @@ -2453,7 +2454,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports): configHtmlReports = [] for latConfigList in latConfigLists: - minLatForCurList = sys.maxint + minLatForCurList = sys.maxsize if not any((latConfig.init or latConfig.instrI.regMemInit) for latConfig in latConfigList.latConfigs): # Test different register values for read-only registers @@ -2463,7 +2464,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports): readOnlyRegOpNodeIdx = int(readOnlyRegOpNode.attrib['idx']) for latConfig in list(latConfigList.latConfigs): if not readOnlyRegOpNodeIdx in latConfig.instrI.opRegDict: - print 'readOnlyRegOpNodeIdx not found in opRegDict' + print('readOnlyRegOpNodeIdx not found in opRegDict') continue reg = latConfig.instrI.opRegDict[readOnlyRegOpNodeIdx] if (not reg in GPRegs) or (reg in High8Regs) or (reg in globalDoNotWriteRegs) or (reg in specialRegs): continue @@ -2551,8 +2552,8 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports): else: latConfig.chainInstrs += 'VPCMPD {0}, {1}, {1}, 7;'.format(maskReg, 'XMM15') - mlDP = sys.maxint - mlnoDP = sys.maxint + mlDP = sys.maxsize + mlnoDP = sys.maxsize for latConfig in latConfigList.latConfigs: configI += 1 @@ -2576,7 +2577,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports): configHtmlReports.append('\n') if not measurementResult: - print 'no result found' + print('no result found') continue cycles = measurementResult['Core cycles'] @@ -2869,7 +2870,7 @@ def main(): else: cpu = cpuid.CPUID() arch = cpuid.micro_arch(cpu) - print cpuid.get_basic_info(cpu) + print(cpuid.get_basic_info(cpu)) if arch == 'unknown': exit(1) @@ -2906,7 +2907,7 @@ def main(): try: subprocess.check_output('mkdir -p /tmp/ramdisk; sudo mount -t tmpfs -o size=100M none /tmp/ramdisk/', shell=True) except subprocess.CalledProcessError as e: - print "Could not create ramdisk " + e.output + print('Could not create ramdisk ' + e.output) exit(1) XMLRoot = ET.parse(args.input).getroot() @@ -2957,7 +2958,7 @@ def main(): else: for i, instrNode in enumerate(instrNodeList): #if not 'RCR (R64, 1)' in instrNode.attrib['string']: continue - print 'Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')' + print('Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')') htmlReports = ['

    ' + instrNode.attrib['string'] + ' - Throughput and Uops' + (' (IACA '+iacaVersion+')' if useIACA else '') + '

    \n
    \n'] @@ -2968,7 +2969,7 @@ def main(): if hasExplMemOp: htmlReports.append('

    With a non-indexed addressing mode

    \n') tpResult = getThroughputAndUops(instrNode, True, False, htmlReports) - print instrNode.attrib['string'] + " - tp: " + str(tpResult) + print(instrNode.attrib['string'] + " - tp: " + str(tpResult)) if tpResult: tpDict[instrNode] = tpResult @@ -3005,7 +3006,7 @@ def main(): with open('tp_' + arch + '.pickle', 'wb') as f: pickle.dump((tpDict, tpDictSameReg, tpDictIndexedAddr, tpDictNoInteriteration), f) - num_ports = len(tpDict.values()[0].unblocked_ports) + num_ports = len(list(tpDict.values())[0].unblocked_ports) ######################## # Latency @@ -3023,13 +3024,13 @@ def main(): elif not useIACA or iacaVersion == '2.1': for i, instrNode in enumerate(instrNodeList): #if not 'DIV' in instrNode.attrib['string']: continue - print 'Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')' + print('Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')') htmlReports = ['

    ' + instrNode.attrib['string'] + ' - Latency' + (' (IACA '+iacaVersion+')' if useIACA else '') + '

    \n
    \n'] lat = getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports) if lat is not None: - if debugOutput: print instrNode.attrib['iform'] + ': ' + str(lat) + if debugOutput: print(instrNode.attrib['iform'] + ': ' + str(lat)) latencyDict[instrNode] = lat writeHtmlFile('html-lat/'+arch, instrNode, instrNode.attrib['string'], ''.join(htmlReports)) with open('lat_' + arch + '.pickle', 'wb') as f: @@ -3080,21 +3081,21 @@ def main(): # their throughput is limited to 1 per cycle; thus, they are disallowed by the TP_noDepBreaking_noLoop check above disallowedBlockingInstrs.remove(instrNodeDict['MOVD (R32, XMM)']) - print 'disallowedBlockingInstrs' + print('disallowedBlockingInstrs') for instrNode in disallowedBlockingInstrs: - print ' ' + str(instrNode.attrib['string']) + print(' ' + str(instrNode.attrib['string'])) - print 'tpDict' + print('tpDict') for instr, tpResult in tpDict.items(): - print ' ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports) + print(' ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports)) # we cannot start higher than .79 as IACA has .2 uops on each port for a port usage of, e.g., 1*p1256 # using uops_dict instead can be problematic because in IACA the uops on the individual ports do not always add up to this value oneUopInstrs = [instr for instr, tpResult in tpDict.items() if instr not in disallowedBlockingInstrs and .79 < sum([v for v in tpResult.unblocked_ports.values() if v>.1]) < 1.11] - print 'oneUopInstrs' + print('oneUopInstrs') for instrNode in oneUopInstrs: - print ' ' + str(instrNode.attrib['string']) + print(' ' + str(instrNode.attrib['string'])) # dicts from port combination to a set of instructions (either not containing AVX or SSE instructions bec. of transition penalty) that always uses these ports blockingInstructionsDictNonAVX_set = {} blockingInstructionsDictNonSSE_set = {} @@ -3102,7 +3103,7 @@ def main(): for instrNode in oneUopInstrs: usedPorts = frozenset({p for p, x in tpDict[instrNode].unblocked_ports.items() if x>0.1}) if usedPorts: - print instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]'))) + print(instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]')))) if not isSSEInstr(instrNode): if not usedPorts in blockingInstructionsDictNonSSE_set: blockingInstructionsDictNonSSE_set[usedPorts] = set() @@ -3118,10 +3119,10 @@ def main(): blockingInstructionsDictNonSSE = {comb: next(iter(sorted(instr_set, key=sort_key))) for comb, instr_set in blockingInstructionsDictNonSSE_set.items()} #for comb, instr_set in blockingInstructionsDictNonAVX_set.items(): - # print comb - # print [x.attrib['string'] for x in sorted(instr_set, key=sort_key)] + # print(comb) + # print([x.attrib['string'] for x in sorted(instr_set, key=sort_key)]) - #print str(blockingInstructionsDictNonAVX.items()) + #print(str(blockingInstructionsDictNonAVX.items())) if isIntelCPU(): # mov to mem has always two uops: store address and store data; there is no instruction that uses just one of them @@ -3138,26 +3139,26 @@ def main(): if storeAddressPorts not in blockingInstructionsDictNonAVX: blockingInstructionsDictNonAVX[storeAddressPorts] = movMemInstrNode if storeAddressPorts not in blockingInstructionsDictNonSSE: blockingInstructionsDictNonSSE[storeAddressPorts] = movMemInstrNode - print 'Non-AVX:' + print('Non-AVX:') for k,v in blockingInstructionsDictNonAVX.items(): - print str(k) + ': ' + v.attrib['iform'] - print 'Non-SSE:' + print(str(k) + ': ' + v.attrib['iform']) + print('Non-SSE:') for k,v in blockingInstructionsDictNonSSE.items(): - print str(k) + ': ' + v.attrib['iform'] + print(str(k) + ': ' + v.attrib['iform']) sortedPortCombinationsNonAVX = sorted(blockingInstructionsDictNonAVX.keys(), key=lambda x:(len(x), sorted(x))) sortedPortCombinationsNonSSE = sorted(blockingInstructionsDictNonSSE.keys(), key=lambda x:(len(x), sorted(x))) - print 'sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX) + print('sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX)) - for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (tpDict[x].config.preInstrNodes, x.attrib['string']))): + for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (len(tpDict[x].config.preInstrNodes), x.attrib['string']))): #if not 'CVTPD2PI' in instrNode.attrib['string']: continue - print 'Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')' + print('Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')') htmlReports = ['

    ' + instrNode.attrib['string'] + ' - Port Usage' + (' (IACA '+iacaVersion+')' if useIACA else '') + '

    '] for useDistinctRegs in ([True, False] if instrNode in tpDictSameReg else [True]): - for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]): + for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]): tpResult = None if not useDistinctRegs: @@ -3176,7 +3177,7 @@ def main(): # use abs because on, e.g., IVB port usages might be smaller in the second half of the experiments if replays happen used_ports = {p for p, x in tpResult.unblocked_ports.items() if abs(x)>0.05} - if debugOutput: print instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports) + if debugOutput: print(instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports)) if not isAVXInstr(instrNode): blockingInstrs = blockingInstructionsDictNonAVX @@ -3218,13 +3219,13 @@ def main(): blockInstrRep = min(blockInstrRep, 100) uopsOnBlockedPorts = getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockingInstrs[combination], blockInstrRep, combination, tpResult.config, htmlReports) if uopsOnBlockedPorts is None: - print 'no uops on blocked ports: ' + str(combination) + print('no uops on blocked ports: ' + str(combination)) continue uopsOnBlockedPorts -= prevUopsOnCombination if rem_uops < uopsOnBlockedPorts: - print 'More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts) + print('More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts)) if uopsOnBlockedPorts <= 0: continue @@ -3338,8 +3339,8 @@ def main(): try: resultNode.attrib['TP_ports'+suffix] = "%.2f" % getTP_LP(portUsageWithDivList) except ValueError as err: - print 'Could not solve LP for ' + instrNode.attrib['string'] + ':' - print err + print('Could not solve LP for ' + instrNode.attrib['string'] + ':') + print(err) with open(args.output, "w") as f: reparsed = XMLRoot @@ -3358,7 +3359,7 @@ def main(): except subprocess.CalledProcessError: exit(1) - print 'Total number of microbenchmarks: ' + str(nExperiments) + print('Total number of microbenchmarks: ' + str(nExperiments)) if __name__ == "__main__": diff --git a/tools/cpuBench/mergeXML.py b/tools/cpuBench/mergeXML.py index a46f120..e81b229 100755 --- a/tools/cpuBench/mergeXML.py +++ b/tools/cpuBench/mergeXML.py @@ -1,4 +1,5 @@ -#!/usr/bin/python +#!/usr/bin/env python3 + import xml.etree.ElementTree as ET from xml.dom import minidom import argparse @@ -20,7 +21,7 @@ def main(): for instrNode1 in root1.iter('instruction'): if instrNode1.attrib['string'] not in instrNode2Dict: - print 'no matching entry found for ' + instrNode1.attrib['string'] + print('no matching entry found for ' + instrNode1.attrib['string']) continue for instrNode2 in instrNode2Dict[instrNode1.attrib['string']]: for archNode2 in instrNode2.iter('architecture'): diff --git a/tools/cpuBench/utils.py b/tools/cpuBench/utils.py index 7657d1a..a29862c 100755 --- a/tools/cpuBench/utils.py +++ b/tools/cpuBench/utils.py @@ -137,7 +137,7 @@ def getLatencyTableEntry(measurementNode): if measurementNode is None or measurementNode.find('./latency') is None: return None - minLat = sys.maxint + minLat = sys.maxsize maxLat = 0 minLatUB = False maxLatUB = False