From 313aa5ee3013b0334d7b24e2fd49f4a14a415add Mon Sep 17 00:00:00 2001
From: Andreas Abel <abel@cs.uni-saarland.de>
Date: Sat, 13 Mar 2021 21:04:52 +0100
Subject: [PATCH] python 3

---
 tools/CPUID/cpuid.py                         |  28 +--
 tools/CacheAnalyzer/cacheGraph.py            |   7 +-
 tools/CacheAnalyzer/cacheInfo.py             |  12 +-
 tools/CacheAnalyzer/cacheLib.py              |  21 +-
 tools/CacheAnalyzer/cacheSeq.py              |  10 +-
 tools/CacheAnalyzer/cacheSim.py              |  24 ++-
 tools/CacheAnalyzer/hitMiss.py               |  11 +-
 tools/CacheAnalyzer/permPolicy.py            |  20 +-
 tools/CacheAnalyzer/replPolicy.py            |  21 +-
 tools/CacheAnalyzer/setDueling.py            |  13 +-
 tools/CacheAnalyzer/strideGraph.py           |  12 +-
 tools/cpuBench/addAMDDocToXML.py             |  13 +-
 tools/cpuBench/addDocToXML.py                |   7 +-
 tools/cpuBench/addURLsToXML.py               |   9 +-
 tools/cpuBench/compareMeasurementsToOther.py |  99 +++++-----
 tools/cpuBench/compareXML.py                 |  38 ++--
 tools/cpuBench/cpuBench.py                   | 191 ++++++++++---------
 tools/cpuBench/mergeXML.py                   |   5 +-
 tools/cpuBench/utils.py                      |   2 +-
 19 files changed, 275 insertions(+), 268 deletions(-)

diff --git a/tools/CPUID/cpuid.py b/tools/CPUID/cpuid.py
index d7e2cd0..7a15f8d 100755
--- a/tools/CPUID/cpuid.py
+++ b/tools/CPUID/cpuid.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2019 Andreas Abel
+# Copyright (C) 2021 Andreas Abel
 #
 # This file was modified from https://github.com/flababah/cpuid.py
 #
@@ -408,7 +408,7 @@ def get_cache_info(cpu):
          parameters.append('Physical Line partitions (P): ' + str(P))
          parameters.append('Ways of associativity (W): ' + str(W))
          parameters.append('Number of Sets (S): ' + str(S))
-         parameters.append('Cache Size: ' + str(W*P*L*S/1024) + ' kB')
+         parameters.append('Cache Size: ' + str(W*P*L*S//1024) + ' kB')
 
          if get_bit(d, 0): parameters.append('WBINVD/INVD is not guaranteed to act upon lower level caches of non-originating threads sharing this cache')
          else: parameters.append('WBINVD/INVD from threads sharing this cache acts upon lower level caches for threads sharing this cache')
@@ -447,7 +447,7 @@ def get_cache_info(cpu):
 
       cacheInfo['L1D'] = {
          'lineSize': L1DcLineSize,
-         'nSets': L1DcSize*1024/L1DcAssoc/L1DcLineSize,
+         'nSets': L1DcSize*1024//L1DcAssoc//L1DcLineSize,
          'assoc': L1DcAssoc
       }
 
@@ -463,7 +463,7 @@ def get_cache_info(cpu):
 
       cacheInfo['L1I'] = {
          'lineSize': L1IcLineSize,
-         'nSets': L1IcSize*1024/L1IcAssoc/L1IcLineSize,
+         'nSets': L1IcSize*1024//L1IcAssoc//L1IcLineSize,
          'assoc': L1IcAssoc
       }
 
@@ -484,7 +484,7 @@ def get_cache_info(cpu):
       elif c_15_12 == 0xC: L2Assoc = 64
       elif c_15_12 == 0xD: L2Assoc = 96
       elif c_15_12 == 0xE: L2Assoc = 128
-      elif c_15_12 == 0x2: L2Assoc = L2Size*1024/L2LineSize
+      elif c_15_12 == 0x2: L2Assoc = L2Size*1024//L2LineSize
 
       log.info('  L2LineSize: ' + str(L2LineSize) + ' B')
       log.info('  L2LinesPerTag: ' + str(L2LinesPerTag))
@@ -493,7 +493,7 @@ def get_cache_info(cpu):
 
       cacheInfo['L2'] = {
          'lineSize': L2LineSize,
-         'nSets': L2Size*1024/L2Assoc/L2LineSize,
+         'nSets': L2Size*1024//L2Assoc//L2LineSize,
          'assoc': L2Assoc
       }
 
@@ -519,11 +519,11 @@ def get_cache_info(cpu):
       log.info('  L3LineSize: ' + str(L3LineSize) + ' B')
       log.info('  L3LinesPerTag: ' + str(L3LinesPerTag))
       log.info('  L3Assoc: ' + str(L3Assoc))
-      log.info('  L3Size: ' + str(L3Size/1024) + ' MB')
+      log.info('  L3Size: ' + str(L3Size//1024) + ' MB')
 
       cacheInfo['L3'] = {
          'lineSize': L3LineSize,
-         'nSets': L3Size*1024/L3Assoc/L3LineSize,
+         'nSets': L3Size*1024//L3Assoc//L3LineSize,
          'assoc': L3Assoc
       }
 
@@ -551,13 +551,13 @@ if __name__ == "__main__":
                 yield (eax, regs)
                 eax += 1
 
-    print " ".join(x.ljust(8) for x in ("CPUID", "A", "B", "C", "D")).strip()
+    print(' '.join(x.ljust(8) for x in ('CPUID', 'A', 'B', 'C', 'D')).strip())
     for eax, regs in valid_inputs():
-        print "%08x" % eax, " ".join("%08x" % reg for reg in regs)
+        print('%08x' % eax, ' '.join('%08x' % reg for reg in regs))
 
-    print ''
-    print get_basic_info(cpuid)
+    print('')
+    print(get_basic_info(cpuid))
 
-    print '\nCache information:'
+    print('\nCache information:')
     get_cache_info(cpuid)
 
diff --git a/tools/CacheAnalyzer/cacheGraph.py b/tools/CacheAnalyzer/cacheGraph.py
index c75f673..c931bbe 100755
--- a/tools/CacheAnalyzer/cacheGraph.py
+++ b/tools/CacheAnalyzer/cacheGraph.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 from itertools import count
 from collections import namedtuple, OrderedDict
 
@@ -73,7 +74,7 @@ def main():
       _, nbDict = getAgesOfBlocks(blocks, args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice,
                                   clearHL=(not args.noClearHL), wbinvd=(not args.noWbinvd), returnNbResults=True, maxAge=args.maxAge,
                                   nMeasurements=args.nMeasurements, agg=args.agg)
-      for event in sorted(e for e in nbDict.values()[0][0].keys() if 'HIT' in e or 'MISS' in e):
+      for event in sorted(e for e in list(nbDict.values())[0][0].keys() if 'HIT' in e or 'MISS' in e):
          traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks]
          title = 'Access Sequence: ' + (args.seq_init + ' ' + args.seq).replace('?','').strip() + ' <n fresh blocks> <block>?'
          html.append(getPlotlyGraphDiv(title, '# of fresh blocks', event, traces))
@@ -82,7 +83,7 @@ def main():
 
    with open(args.output ,'w') as f:
       f.write('\n'.join(html))
-      print 'Graph written to ' + args.output
+      print('Graph written to ' + args.output)
 
 
 if __name__ == "__main__":
diff --git a/tools/CacheAnalyzer/cacheInfo.py b/tools/CacheAnalyzer/cacheInfo.py
index 920e9f5..6bf568a 100755
--- a/tools/CacheAnalyzer/cacheInfo.py
+++ b/tools/CacheAnalyzer/cacheInfo.py
@@ -1,6 +1,6 @@
-#!/usr/bin/python
-import argparse
+#!/usr/bin/env python3
 
+import argparse
 from cacheLib import *
 
 import logging
@@ -16,11 +16,11 @@ def main():
 
    cpuidInfo = getCpuidCacheInfo()
 
-   print ''
-   print getCacheInfo(1)
-   print getCacheInfo(2)
+   print('')
+   print(getCacheInfo(1))
+   print(getCacheInfo(2))
    if 'L3' in cpuidInfo:
-      print getCacheInfo(3)
+      print(getCacheInfo(3))
 
 
 if __name__ == "__main__":
diff --git a/tools/CacheAnalyzer/cacheLib.py b/tools/CacheAnalyzer/cacheLib.py
index da368c7..9ac573d 100755
--- a/tools/CacheAnalyzer/cacheLib.py
+++ b/tools/CacheAnalyzer/cacheLib.py
@@ -1,4 +1,3 @@
-#!/usr/bin/python
 from itertools import count
 from collections import namedtuple
 
@@ -79,11 +78,11 @@ class CacheInfo:
 
    def __str__(self):
       return '\n'.join(['L' + str(self.level) + ':',
-                        '  Size: ' + str(self.size/1024) + ' kB',
+                        '  Size: ' + str(self.size//1024) + ' kB',
                         '  Associativity: ' + str(self.assoc),
                         '  Line Size: ' + str(self.lineSize) + ' B',
                         '  Number of sets' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.nSets),
-                        '  Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize/1024) + ' kB',
+                        '  Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize//1024) + ' kB',
                        ('  Number of CBoxes: ' + str(self.nCboxes) if self.nCboxes is not None else ''),
                        ('  Number of slices: ' + str(self.nSlices) if self.nSlices is not None else '')])
 
@@ -134,13 +133,13 @@ def getCacheInfo(level):
             assoc = cpuidInfo['assoc']
             nSets = cpuidInfo['nSets']
 
-            stride = 2**((lineSize*nSets/getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
+            stride = 2**((lineSize*nSets//getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
             ms = findMaximalNonEvictingL3SetInCBox(0, stride, assoc, 0)
             log.debug('Maximal non-evicting L3 set: ' + str(len(ms)) + ' ' + str(ms))
             nCboxes = getNCBoxUnits()
             nSlices = nCboxes * int(math.ceil(float(len(ms))/assoc))
 
-            getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets/nSlices, nSlices, nCboxes)
+            getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets//nSlices, nSlices, nCboxes)
       return getCacheInfo.L3CacheInfo
    else:
       raise ValueError('invalid level')
@@ -376,9 +375,9 @@ def getAddresses(level, wayID, cacheSetList, cBox=1, cSlice=0):
                   L3SetToWayIDMap[cBox][cSlice][L3Set][i] = addr
          if not wayID in L3SetToWayIDMap[cBox][cSlice][L3Set]:
             if getCacheInfo(3).nSlices == getNCBoxUnits():
-               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, L3SetToWayIDMap[cBox][cSlice][L3Set].values())))
+               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
             else:
-               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, L3SetToWayIDMap[cBox][cSlice][L3Set].values())))
+               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
          addresses.append(L3SetToWayIDMap[cBox][cSlice][L3Set][wayID])
 
       return addresses
@@ -404,16 +403,16 @@ def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False):
       for s in cacheSetsStr.split(','):
          if '-' in s:
             first, last = s.split('-')[:2]
-            cacheSetList += range(int(first), int(last)+1)
+            cacheSetList += list(range(int(first), int(last)+1))
          else:
             cacheSetList.append(int(s))
    else:
       nSets = getCacheInfo(level).nSets
       if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes):
          nHLSets = getCacheInfo(level-1).nSets
-         cacheSetList = range(nHLSets, nSets)
+         cacheSetList = list(range(nHLSets, nSets))
       else:
-         cacheSetList = range(0, nSets)
+         cacheSetList = list(range(0, nSets))
    return cacheSetList
 
 
@@ -509,7 +508,7 @@ def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0,
 
 def printNB(nb_result):
    for r in nb_result.items():
-      print r[0] + ': ' + str(r[1])
+      print(r[0] + ': ' + str(r[1]))
 
 
 def hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
diff --git a/tools/CacheAnalyzer/cacheSeq.py b/tools/CacheAnalyzer/cacheSeq.py
index aa1deda..448093f 100755
--- a/tools/CacheAnalyzer/cacheSeq.py
+++ b/tools/CacheAnalyzer/cacheSeq.py
@@ -1,12 +1,12 @@
-#!/usr/bin/python
-from itertools import count, cycle, islice
-from collections import namedtuple, OrderedDict
+#!/usr/bin/env python3
 
 import argparse
 import sys
+from itertools import count, cycle, islice
+from collections import namedtuple, OrderedDict
 
-from cacheLib import *
 import cacheSim
+from cacheLib import *
 
 import logging
 log = logging.getLogger(__name__)
@@ -37,7 +37,7 @@ def main():
       policyClass = cacheSim.AllPolicies[args.sim]
       seq = args.seq_init + (' ' + args.seq) * args.loop
       hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets) / args.loop
-      print 'Hits: ' + str(hits)
+      print('Hits: ' + str(hits))
    else:
       nb = runCacheExperiment(args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL),
                               doNotUseOtherCBoxes=args.noUseOtherCBoxes, loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg)
diff --git a/tools/CacheAnalyzer/cacheSim.py b/tools/CacheAnalyzer/cacheSim.py
index 74414ac..511b69c 100755
--- a/tools/CacheAnalyzer/cacheSim.py
+++ b/tools/CacheAnalyzer/cacheSim.py
@@ -1,9 +1,7 @@
-#!/usr/bin/python
 import random
 
 from itertools import count
 from numpy import median
-
 from cacheLib import *
 
 import logging
@@ -85,7 +83,7 @@ class PLRUSim(ReplPolicySim):
    def updateIndexBits(self, accIndex):
       lastIdx = accIndex
       for level in reversed(range(0, len(self.bits))):
-         curIdx = lastIdx/2
+         curIdx = lastIdx//2
          self.bits[level][curIdx] = 1 - (lastIdx % 2)
          lastIdx = curIdx
 
@@ -111,7 +109,7 @@ AllRandPLRUVariants = {
 
 class LRU_PLRU4Sim(ReplPolicySim):
    def __init__(self, assoc):
-      self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc/4)]
+      self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc//4)]
       self.PLRUOrdered = list(self.PLRUs) # from MRU to LRU
 
    def acc(self, block):
@@ -299,9 +297,9 @@ CommonPolicies = {
    'SRRIP': AllDetQLRUVariants['QLRU_H00_M2_R0_U0_UMO'],
 }
 
-AllDetPolicies = dict(CommonPolicies.items() + AllDetQLRUVariants.items())
-AllRandPolicies = dict(AllRandQLRUVariants.items() + AllRandPLRUVariants.items())
-AllPolicies = dict(AllDetPolicies.items() + AllRandPolicies.items())
+AllDetPolicies = dict(list(CommonPolicies.items()) + list(AllDetQLRUVariants.items()))
+AllRandPolicies = dict(list(AllRandQLRUVariants.items()) + list(AllRandPLRUVariants.items()))
+AllPolicies = dict(list(AllDetPolicies.items()) + list(AllRandPolicies.items()))
 
 
 def parseCacheSetsStrSim(cacheSetsStr):
@@ -312,7 +310,7 @@ def parseCacheSetsStrSim(cacheSetsStr):
    for s in cacheSetsStr.split(','):
       if '-' in s:
          first, last = s.split('-')[:2]
-         cacheSetList += range(int(first), int(last)+1)
+         cacheSetList += list(range(int(first), int(last)+1))
       else:
          cacheSetList.append(int(s))
 
@@ -381,8 +379,8 @@ def getPermutations(policySimClass, assoc):
    initAges = getAges(initBlocks, seq, policySimClass, assoc)
 
    accSeqStr = 'Access sequence: <wbinvd> ' + seq
-   print accSeqStr
-   print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}'
+   print(accSeqStr)
+   print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}')
 
    blocks = ['B' + str(i) for i in range(0, assoc)]
    baseSeq = ' '.join(initBlocks + blocks)
@@ -390,8 +388,8 @@ def getPermutations(policySimClass, assoc):
    ages = getAges(blocks, baseSeq, policySimClass, assoc)
 
    accSeqStr = 'Access sequence: <wbinvd> ' + baseSeq
-   print accSeqStr
-   print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}'
+   print(accSeqStr)
+   print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}')
 
    blocksSortedByAge = [a[0] for a in sorted(ages.items(), key=lambda x: -x[1])] # most recent block first
 
@@ -408,5 +406,5 @@ def getPermutations(policySimClass, assoc):
             break
          perm[assoc-permAge] = bi
 
-      print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))
+      print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)))
 
diff --git a/tools/CacheAnalyzer/hitMiss.py b/tools/CacheAnalyzer/hitMiss.py
index 0bd5666..117f0aa 100755
--- a/tools/CacheAnalyzer/hitMiss.py
+++ b/tools/CacheAnalyzer/hitMiss.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import sys
 
@@ -32,10 +33,10 @@ def main():
       seq = re.sub('[?!]', '', ' '.join([args.seq_init, args.seq])).strip() + '?'
       hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets)
       if hits > 0:
-         print 'HIT'
+         print('HIT')
          exit(1)
       else:
-         print 'MISS'
+         print('MISS')
          exit(0)
    else:
       setCount = len(parseCacheSetsStr(args.level, True, args.sets))
@@ -43,10 +44,10 @@ def main():
       nb = runCacheExperiment(args.level, seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL),
                               loop=args.loop, wbinvd=(not args.noWbinvd))
       if nb['L' + str(args.level) + '_HIT']/setCount > .5:
-         print 'HIT'
+         print('HIT')
          exit(1)
       else:
-         print 'MISS'
+         print('MISS')
          exit(0)
 
 
diff --git a/tools/CacheAnalyzer/permPolicy.py b/tools/CacheAnalyzer/permPolicy.py
index 951ecb1..a495773 100755
--- a/tools/CacheAnalyzer/permPolicy.py
+++ b/tools/CacheAnalyzer/permPolicy.py
@@ -1,20 +1,20 @@
-#!/usr/bin/python
-from itertools import count
-from collections import namedtuple, OrderedDict
+#!/usr/bin/env python3
 
 import argparse
 import math
 import os
+import plotly.graph_objects as go
 import re
 import subprocess
 import sys
 
+from itertools import count
+from collections import namedtuple, OrderedDict
 from plotly.offline import plot
-import plotly.graph_objects as go
 
+import cacheSim
 from cacheLib import *
 from cacheGraph import *
-import cacheSim
 
 import logging
 log = logging.getLogger(__name__)
@@ -36,8 +36,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
                                          cBox=cBox, cSlice=cSlice)
 
       accSeqStr = 'Access sequence: <wbinvd> ' + seq
-      print accSeqStr
-      print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}'
+      print(accSeqStr)
+      print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}')
 
       event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent)
       traces = [(b, [nb[event] for nb in nbDict[b]]) for b in initBlocks]
@@ -52,8 +52,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
                                   cBox=cBox, cSlice=cSlice)
 
    accSeqStr = 'Access sequence: <wbinvd> ' + baseSeq
-   print accSeqStr
-   print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}'
+   print(accSeqStr)
+   print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}')
 
    event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent)
    traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks]
@@ -77,7 +77,7 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
             break
          perm[assoc-permAge] = bi
 
-      print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))
+      print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)))
 
 
 def main():
diff --git a/tools/CacheAnalyzer/replPolicy.py b/tools/CacheAnalyzer/replPolicy.py
index d3b5873..5e69e99 100755
--- a/tools/CacheAnalyzer/replPolicy.py
+++ b/tools/CacheAnalyzer/replPolicy.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import random
 import sys
@@ -23,7 +24,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s
       seq = initSeq + ' '.join(seqPrefix)
       actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements)
       sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets)
-      print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)
+      print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim))
       if sim != actual:
          break
 
@@ -32,7 +33,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s
       seq = initSeq + ' '.join(tmpPrefix)
       actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements)
       sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets)
-      print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)
+      print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim))
       if sim != actual:
          seqPrefix = tmpPrefix
 
@@ -115,7 +116,7 @@ def main():
 
    for seq in seqList:
       fullSeq = ((args.initSeq + ' ') if args.initSeq else '') + seq
-      print fullSeq
+      print(fullSeq)
 
       html += ['<tr><td>' + fullSeq + '</td>']
       actualHits = set([getActualHits(fullSeq, args.level, args.sets, cBox, args.slice, args.nMeasurements) for _ in range(0, args.rep)])
@@ -151,14 +152,14 @@ def main():
       html += ['</tr>']
 
       if not args.randPolicies and not args.best:
-         print 'Possible policies: ' + ', '.join(possiblePolicies)
+         print('Possible policies: ' + ', '.join(possiblePolicies))
          if not possiblePolicies: break
 
    if not args.randPolicies and args.findCtrEx:
-      print ''
-      print 'Counter example(s): '
+      print('')
+      print('Counter example(s):')
       for p, ctrEx in counterExamples.items():
-         print '  ' + p + ': ' + ctrEx
+         print('  ' + p + ': ' + ctrEx)
 
    html += ['</table>', '</body>', '</html>']
 
@@ -166,10 +167,10 @@ def main():
       f.write('\n'.join(html))
 
    if not args.randPolicies and not args.best:
-      print 'Possible policies: ' + ', '.join(possiblePolicies)
+      print('Possible policies: ' + ', '.join(possiblePolicies))
    else:
       for p, d in reversed(sorted(dists.items(), key=lambda d: d[1])):
-         print p + ': ' + str(d)
+         print(p + ': ' + str(d))
 
 
 if __name__ == "__main__":
diff --git a/tools/CacheAnalyzer/setDueling.py b/tools/CacheAnalyzer/setDueling.py
index 230b5eb..8f73478 100755
--- a/tools/CacheAnalyzer/setDueling.py
+++ b/tools/CacheAnalyzer/setDueling.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import random
 
@@ -31,7 +32,7 @@ def main():
    nCBoxes = max(1, getNCBoxUnits())
    nSlicesPerCBox = 1
    if getCacheInfo(3).nSlices:
-      nSlicesPerCBox = getCacheInfo(3).nSlices / getCacheInfo(3).nCboxes
+      nSlicesPerCBox = getCacheInfo(3).nSlices // getCacheInfo(3).nCboxes
 
    seqLength = (args.length if args.length is not None else assoc+1)
    seq = ' '.join('B' + str(i) + '?' for i in range(0, seqLength))
@@ -42,7 +43,7 @@ def main():
    html = ['<html>', '<head>', '<title>' + title + '</title>', '<script src="https://cdn.plot.ly/plotly-latest.min.js">', '</script>', '</head>', '<body>']
    html += ['<h3>' + title + '</h3>']
 
-   setsForSlice = {cBox: {cSlice: range(0,nL3Sets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
+   setsForSlice = {cBox: {cSlice: list(range(0,nL3Sets)) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
    L3HitsDict = {cBox: {cSlice: [[] for s in range(0, nL3Sets)]  for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
 
    prevOti = ''
@@ -69,11 +70,11 @@ def main():
                                                  nMeasurements=args.nMeasurements, agg='med')
 
                      if nb['L1_MISS'] < seqLength - .2:
-                        print 'Hit in L1'
+                        print('Hit in L1')
                         continue
 
                      if nb['L2_MISS'] < seqLength - .2:
-                        print 'Hit in L2'
+                        print('Hit in L2')
                         continue
 
                      L3Hits.append(nb['L3_HIT'])
@@ -121,7 +122,7 @@ def main():
 
    with open(args.output ,'w') as f:
       f.write('\n'.join(html))
-      print 'Output written to ' + args.output
+      print('Output written to ' + args.output)
 
 
 if __name__ == "__main__":
diff --git a/tools/CacheAnalyzer/strideGraph.py b/tools/CacheAnalyzer/strideGraph.py
index 4ef5109..d6cc569 100755
--- a/tools/CacheAnalyzer/strideGraph.py
+++ b/tools/CacheAnalyzer/strideGraph.py
@@ -1,9 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import math
-
-from plotly.offline import plot
 import plotly.graph_objects as go
+from plotly.offline import plot
 
 from cacheLib import *
 
@@ -28,9 +28,9 @@ def main():
    while pt <= args.endSize*1024:
       tickvals.append(pt)
       for x in ([int(math.pow(2, math.log(pt, 2) + i/16.0)) for i in range(0,16)] if pt < args.endSize*1024 else [pt]):
-         print x/1024
+         print(x//1024)
          xValues.append(str(x))
-         addresses = range(0, x, args.stride)
+         addresses = list(range(0, x, args.stride))
          nAddresses.append(len(addresses))
          ec = getCodeForAddressLists([AddressList(addresses, False, False, False)], wbinvd=True)
          nbDicts.append(runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit))
@@ -57,7 +57,7 @@ def main():
 
    with open(args.output ,'w') as f:
       f.write('\n'.join(html))
-      print 'Graph written to ' + args.output
+      print('Graph written to ' + args.output)
 
 if __name__ == "__main__":
     main()
diff --git a/tools/cpuBench/addAMDDocToXML.py b/tools/cpuBench/addAMDDocToXML.py
index a624aa1..212c39d 100755
--- a/tools/cpuBench/addAMDDocToXML.py
+++ b/tools/cpuBench/addAMDDocToXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 from collections import namedtuple
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
@@ -69,13 +70,13 @@ def main():
       iclassAsmDict.setdefault(re.sub('{.*} ', '', asm), set()).add(instrNode)
 
    #for x in set(op for de in docList for op in de.operands):
-   #   print x
+   #   print(x)
 
    xmlToDocDict = dict()
 
    for de in sorted(docEntrySet):
       if de.mnemonic not in iclassAsmDict:
-         print 'no XML entry found for ' + str(de)
+         print('no XML entry found for ' + str(de))
 
       xmlFound = False
       for instrNode in iclassAsmDict[de.mnemonic]:
@@ -135,15 +136,15 @@ def main():
             elif (set(de.operands) == {None}) and (set(xmlToDocDict[instrNode].operands) != {None}):
                pass
             else:
-               print 'duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de))
+               print('duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de)))
          else:
             xmlFound = True
             xmlToDocDict[instrNode] = de
 
       if not xmlFound:
-         print 'no matching XML entry found for ' + str(de)
+         print('no matching XML entry found for ' + str(de))
 
-   print 'Found data for ' + str(len(xmlToDocDict)) + ' instruction variants'
+   print('Found data for ' + str(len(xmlToDocDict)) + ' instruction variants')
 
    for instrNode, de in xmlToDocDict.items():
       archNode = instrNode.find('./architecture[@name="{}"]'.format(args.arch))
diff --git a/tools/cpuBench/addDocToXML.py b/tools/cpuBench/addDocToXML.py
index bb78144..093b405 100755
--- a/tools/cpuBench/addDocToXML.py
+++ b/tools/cpuBench/addDocToXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 from collections import namedtuple
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
@@ -43,9 +44,9 @@ def main():
                   matchingDEs.remove(de)
 
          if len(matchingDEs) == 0:
-            print 'No matching iform: ' + iform
+            print('No matching iform: ' + iform)
          elif len(matchingDEs) > 1:
-            print 'Multiple matching iforms: ' + iform
+            print('Multiple matching iforms: ' + iform)
          else:
             de = next(iter(matchingDEs))
 
diff --git a/tools/cpuBench/addURLsToXML.py b/tools/cpuBench/addURLsToXML.py
index ade794b..bb4711a 100755
--- a/tools/cpuBench/addURLsToXML.py
+++ b/tools/cpuBench/addURLsToXML.py
@@ -1,8 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 import argparse
 import re
-import urllib
+import urllib.request
 from xml.dom import minidom
 from utils import *
 
@@ -12,7 +13,7 @@ def main():
    parser.add_argument("output", help="Output XML file")
    args = parser.parse_args()
 
-   html = urllib.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*')
+   html = urllib.request.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*')
    lines = re.findall('href="\./(.*?)">(.*?)</a>.*?</td><td>(.*?)</td>', html) # Example: ('ADC.html', 'ADC', 'Add with Carry'),
    lineDict = {(line[0],line[1]):line for line in lines}
 
@@ -128,7 +129,7 @@ def main():
                matchingLines.append(line)
 
       if len(matchingLines) > 1:
-         print 'Duplicate link found for ' + iclass
+         print('Duplicate link found for ' + iclass)
          exit(1)
 
       instrNode.attrib['url'] = 'uops.info/html-instr/' + canonicalizeInstrString(instrNode.attrib['string']) + '.html'
diff --git a/tools/cpuBench/compareMeasurementsToOther.py b/tools/cpuBench/compareMeasurementsToOther.py
index 9175013..d22ec85 100755
--- a/tools/cpuBench/compareMeasurementsToOther.py
+++ b/tools/cpuBench/compareMeasurementsToOther.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 import argparse
 import sys
@@ -66,13 +67,13 @@ def main():
             else:
                portsDiff = True
                nPortsDiff += 1
-               if args.verbose: print 'PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts)
+               if args.verbose: print('PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts))
          else:
             nPortsMeasurementOnly += 1
       else:
          if otherPorts:
             nPortsOtherOnly += 1
-            if args.verbose: print 'PortsOtherOnly: ' + instrNode.attrib['string']
+            if args.verbose: print('PortsOtherOnly: ' + instrNode.attrib['string'])
 
       otherUops = [v for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('uops') and v.replace('.','',1).isdigit()]
       mUops = ([v for a,v in measurementNode.attrib.items() if a.startswith('uops') and not 'retire_slots' in a] if measurementNode is not None else [])
@@ -86,13 +87,13 @@ def main():
                nUopsEqPortsDiff += int(portsDiff)
             else:
                nUopsDiff += 1
-               if args.verbose: print 'UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops)
+               if args.verbose: print('UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops))
          else:
             nUopsMeasurementOnly += 1
       else:
          if otherUops:
             nUopsOtherOnly += 1
-            if args.verbose: print 'UopsOtherOnly: ' + instrNode.attrib['string']
+            if args.verbose: print('UopsOtherOnly: ' + instrNode.attrib['string'])
 
 
       otherLatencies = [float(v) for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('latency') and v.replace('.','',1).isdigit()]
@@ -113,54 +114,54 @@ def main():
                      nLatUBClose += 1
                else:
                   nLatUBIncorrect += 1
-                  if args.verbose: print 'LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)
+                  if args.verbose: print('LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies))
             else:
                nLatNoUB += 1
                if maxLat in otherLatencies:
                   nLatNoUBMaxEq += 1
                else:
                   nLatNoUBMaxDiff += 1
-                  if args.verbose: print 'LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)
+                  if args.verbose: print('LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies))
          else:
             nLatMeasurementOnly += 1
       else:
          if otherLatencies:
             nLatOtherOnly += 1
-            if args.verbose: print 'LatOtherOnly: ' + instrNode.attrib['string']
+            if args.verbose: print('LatOtherOnly: ' + instrNode.attrib['string'])
 
-   print 'Ports:'
-   print '  Measurement data only: ' + str(nPortsMeasurementOnly)
-   print '  Other data only: ' + str(nPortsOtherOnly)
-   print '  Both: ' + str(nPortsBoth)
-   print '    Eq: ' + str(nPortsEq)
-   print '    Diff: ' + str(nPortsDiff)
-   print ''
+   print('Ports:')
+   print('  Measurement data only: ' + str(nPortsMeasurementOnly))
+   print('  Other data only: ' + str(nPortsOtherOnly))
+   print('  Both: ' + str(nPortsBoth))
+   print('    Eq: ' + str(nPortsEq))
+   print('    Diff: ' + str(nPortsDiff))
+   print('')
 
-   print 'Uops:'
-   print '  Measurement data only: ' + str(nUopsMeasurementOnly)
-   print '  Other data only: ' + str(nUopsOtherOnly)
-   print '  Both: ' + str(nUopsBoth)
-   print '    Eq: ' + str(nUopsEq)
-   print '      PortsEq: ' + str(nUopsEqPortsEq)
-   print '      PortsDiff: ' + str(nUopsEqPortsDiff)
-   print '    Diff: ' + str(nUopsDiff)
-   print ''
+   print('Uops:')
+   print('  Measurement data only: ' + str(nUopsMeasurementOnly))
+   print('  Other data only: ' + str(nUopsOtherOnly))
+   print('  Both: ' + str(nUopsBoth))
+   print('    Eq: ' + str(nUopsEq))
+   print('      PortsEq: ' + str(nUopsEqPortsEq))
+   print('      PortsDiff: ' + str(nUopsEqPortsDiff))
+   print('    Diff: ' + str(nUopsDiff))
+   print('')
 
-   print 'Latency:'
-   print '  Measurement data only: ' + str(nLatMeasurementOnly)
-   print '  Other data only: ' + str(nLatOtherOnly)
-   print '  Both: ' + str(nLatBoth)
-   print '    Exact: ' + str(nLatNoUB)
-   print '      Eq (Max): ' + str(nLatNoUBMaxEq)
-   print '      Diff (Max): ' + str(nLatNoUBMaxDiff)
-   print '    Upper Bound: ' + str(nLatUB)
-   print '      Correct: ' + str(nLatUBCorrect)
-   print '        Exact: ' + str(nLatUBExact)
-   print '        Close: ' + str(nLatUBClose)
-   print '      Incorrect: ' + str(nLatUBIncorrect)
-   print ''
+   print('Latency:')
+   print('  Measurement data only: ' + str(nLatMeasurementOnly))
+   print('  Other data only: ' + str(nLatOtherOnly))
+   print('  Both: ' + str(nLatBoth))
+   print('    Exact: ' + str(nLatNoUB))
+   print('      Eq (Max): ' + str(nLatNoUBMaxEq))
+   print('      Diff (Max): ' + str(nLatNoUBMaxDiff))
+   print('    Upper Bound: ' + str(nLatUB))
+   print('      Correct: ' + str(nLatUBCorrect))
+   print('        Exact: ' + str(nLatUBExact))
+   print('        Close: ' + str(nLatUBClose))
+   print('      Incorrect: ' + str(nLatUBIncorrect))
+   print('')
 
-   print 'Throughput:'
+   print('Throughput:')
    for TP_m, TP_o in [('TP', 'TP'), ('TP_ports', 'TP'), ('TP', 'TP_ports'), ('TP_ports', 'TP_ports')]:
       nTPMeasurementOnly = 0
       nTPOtherOnly = 0
@@ -184,28 +185,28 @@ def main():
                   nTPEq += 1
                else:
                   nTPDiff += 1
-                  if args.verbose: print 'TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)
+                  if args.verbose: print('TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs))
                diff = min(abs(float(m)-float(o)) for o in otherTPs for m in mTPs)
                if diff <= .1:
                   nTPClose += 1
                else:
                   nTPNotClose += 1
-                  if args.verbose: print 'TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)
+                  if args.verbose: print('TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs))
             else:
                nTPMeasurementOnly += 1
          else:
             if otherTPs:
                nTPOtherOnly += 1
-               if args.verbose: print 'TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string'])
+               if args.verbose: print('TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string']))
 
-      print '  {} (measurements) - {} (other):'.format(TP_m, TP_o)
-      print '    Measurement data only: ' + str(nTPMeasurementOnly)
-      print '    Other data only: ' + str(nTPOtherOnly)
-      print '    Both: ' + str(nTPBoth)
-      print '      Eq: ' + str(nTPEq)
-      print '      Diff: ' + str(nTPDiff)
-      print '      Close: ' + str(nTPClose)
-      print '      NotClose: ' + str(nTPNotClose)
+      print('  {} (measurements) - {} (other):'.format(TP_m, TP_o))
+      print('    Measurement data only: ' + str(nTPMeasurementOnly))
+      print('    Other data only: ' + str(nTPOtherOnly))
+      print('    Both: ' + str(nTPBoth))
+      print('      Eq: ' + str(nTPEq))
+      print('      Diff: ' + str(nTPDiff))
+      print('      Close: ' + str(nTPClose))
+      print('      NotClose: ' + str(nTPNotClose))
 
 if __name__ == "__main__":
     main()
diff --git a/tools/cpuBench/compareXML.py b/tools/cpuBench/compareXML.py
index 2766e13..0781a32 100755
--- a/tools/cpuBench/compareXML.py
+++ b/tools/cpuBench/compareXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
 import argparse
@@ -29,7 +30,7 @@ def main():
    for instrStr in sorted(instrNodeDict1):
       instrNode1 = instrNodeDict1[instrStr]
       if not instrStr in instrNodeDict2:
-         print 'No matching entry found for ' + instrStr
+         print('No matching entry found for ' + instrStr)
          continue
       instrNode2 = instrNodeDict2[instrStr]
       for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'):
@@ -40,44 +41,43 @@ def main():
 
                if tp1 != tp2:
                   tpDiff += 1
-                  print instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2)
+                  print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2))
 
             if args.lat:
                for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')):
-                  latStr1 = ET.tostring(latNode1, encoding='utf-8').strip()
-                  latStr2 = ET.tostring(latNode2, encoding='utf-8').strip()
+                  latStr1 = ET.tostring(latNode1, encoding='utf-8').decode().strip()
+                  latStr2 = ET.tostring(latNode2, encoding='utf-8').decode().strip()
                   if latStr1 != latStr2:
                      latDiff += 1
-                     print instrStr
-                     print '  ' + latStr1
-                     print '  ' + latStr2
+                     print('  ' + latStr1)
+                     print('  ' + latStr2)
 
             if args.ports:
                p1 = mNode1.attrib.get('ports', '')
                p2 = mNode2.attrib.get('ports', '')
                if p1 != p2:
                   portsDiff += 1
-                  print instrStr + ' - P1: ' + p1 + ' - P2: ' + p2
+                  print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2)
 
             if not args.TP and not args.lat and not args.ports:
-               xmlStr1 = ET.tostring(mNode1, encoding='utf-8').strip()
-               xmlStr2 = ET.tostring(mNode2, encoding='utf-8').strip()
+               xmlStr1 = ET.tostring(mNode1, encoding='utf-8').decode().strip()
+               xmlStr2 = ET.tostring(mNode2, encoding='utf-8').decode().strip()
 
                if xmlStr1 != xmlStr2:
-                  print '-------------------------------'
-                  print instrStr
-                  print xmlStr1
-                  print xmlStr2
-                  print '-------------------------------'
+                  print('-------------------------------')
+                  print(instrStr)
+                  print(xmlStr1)
+                  print(xmlStr2)
+                  print('-------------------------------')
 
    if args.TP:
-      print 'TPDiff: ' + str(tpDiff)
+      print('TPDiff: ' + str(tpDiff))
 
    if args.lat:
-      print 'LatDiff: ' + str(latDiff)
+      print('LatDiff: ' + str(latDiff))
 
    if args.ports:
-      print 'portsDiff: ' + str(portsDiff)
+      print('portsDiff: ' + str(portsDiff))
 
 if __name__ == "__main__":
     main()
diff --git a/tools/cpuBench/cpuBench.py b/tools/cpuBench/cpuBench.py
index 466d6b3..93254c1 100755
--- a/tools/cpuBench/cpuBench.py
+++ b/tools/cpuBench/cpuBench.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 from xml.etree.ElementTree import Element, SubElement, Comment, tostring
 from xml.dom import minidom
@@ -79,7 +80,7 @@ def getIndexReg(instrNode, opNode):
 # registers that are not used as implicit registers should come first; RAX (and parts of it) should come last, as some instructions have special encodings for that
 # prefer low registers to high registers
 def sortRegs(regsList):
-   return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, map(int, re.findall('\d+',r)), r))
+   return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, list(map(int, re.findall('\d+',r))), r))
 
 
 # Initialize registers and memory
@@ -115,7 +116,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
                init += ['MOV {}, 0'.format(reg)]
             elif 'MM' in regPrefix and xtype.startswith('f'):
                init += ['MOV RAX, 0x4000000040000000']
-               for i in range(0, getRegSize(reg)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
+               for i in range(0, getRegSize(reg)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
 
                if isAVXInstr(instrNode):
                   init += ['VMOVUPD ' + reg + ', [R14]']
@@ -128,7 +129,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
          elif opNode.attrib['type'] == 'mem':
             if xtype.startswith('f'):
                init += ['MOV RAX, 0x4000000040000000']
-               for i in range(0, int(opNode.attrib['width'])/8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX']
+               for i in range(0, int(opNode.attrib['width'])//8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX']
 
       for opNode in instrNode.findall('./operand[@type="mem"]'):
          if opNode.attrib.get('suppressed', '0') == '1': continue
@@ -179,7 +180,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
    initObjFile = None
    lateInitObjFile=None
    if initCode:
-      if debugOutput: print 'init: ' + initCode
+      if debugOutput: print('init: ' + initCode)
       objFile = '/tmp/ramdisk/init.o'
       if useLateInit:
          lateInitObjFile = objFile
@@ -191,7 +192,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
       localHtmlReports.append('<li>Init: <pre>' + re.sub(';[ \t]*(.)', r';\n\1', initCode) + '</pre></li>\n')
 
    localHtmlReports.append('<li><a href="javascript:;" onclick="this.outerHTML = \'<pre>' + nanoBenchCmd + '</pre>\'">Show nanoBench command</a></li>\n')
-   if debugOutput: print nanoBenchCmd
+   if debugOutput: print(nanoBenchCmd)
 
    setNanoBenchParameters(unrollCount=unrollCount, loopCount=loopCount, warmUpCount=warmUpCount, basicMode=basicMode)
 
@@ -223,19 +224,19 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
 
    if maxRepeat>0:
       if any(v<-0.05 for v in ret.values()):
-         print 'Repeating experiment because there was a value < 0'
+         print('Repeating experiment because there was a value < 0')
          return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1)
 
       #sumPortUops = sum(v for e,v in ret.items() if 'PORT' in e and not '4' in e)
       #if (sumPortUops % 1) > .2 and (sumPortUops % 1) < .8:
-      #   print 'Repeating experiment because the sum of the port usages is not an integer'
-      #   print ret
+      #   print('Repeating experiment because the sum of the port usages is not an integer')
+      #   print(ret)
       #   return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=basicMode, htmlReports=htmlReports, maxRepeat=maxRepeat-1)
 
       if any('PORT' in e for e in ret):
          maxPortUops = max(v/(len(e)-9) for e,v in ret.items() if 'PORT' in e)
          if maxPortUops * .98 > ret['Core cycles']:
-            print 'Repeating experiment because there were more uops on a port than core cycles'
+            print('Repeating experiment because there were more uops on a port than core cycles')
             return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1)
 
    if htmlReports is not None:
@@ -250,10 +251,10 @@ def writeFile(fileName, content):
 
 def getMachineCode(objFile):
    try:
-      machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile])
+      machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile]).decode()      
       return machineCode.partition('<.text>:\n')[2]
    except subprocess.CalledProcessError as e:
-      print "Error (getMachineCode): " + str(e)
+      print('Error (getMachineCode): ' + str(e))
 
 
 def getCodeLength(asmCode):
@@ -420,7 +421,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
                      ignoreRegs |= set(doNotWriteRegs)|globalDoNotWriteRegs|set(opRegDict.values())
                   if operandNode.attrib.get('r', '0') == '1':
                      ignoreRegs |= set(doNotReadRegs)|writtenRegs|readRegs|set(opRegDict.values())
-                  regsList = filter(lambda x: not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs), regsList)
+                  regsList = [x for x in regsList if not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs)]
                if not regsList:
                   return None;
                reg = sortRegs(regsList)[0]
@@ -507,7 +508,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
 def createIacaAsmFile(fileName, prefixInstr, prefixRep, instr):
    asm = '.intel_syntax noprefix\n .byte 0x0F, 0x0B; mov ebx, 111; .byte 0x64, 0x67, 0x90\n'
    if prefixInstr:
-      for i in xrange(prefixRep):
+      for i in range(prefixRep):
          asm += prefixInstr + "\n"
    asm += instr + "\n"
    asm += "1:\n"
@@ -521,9 +522,9 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
    readRegs = instrInstance.readRegs
    writtenRegs = instrInstance.writtenRegs
 
-   if debugOutput: print '  instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs)
+   if debugOutput: print('  instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs))
    blockInstrsList = getIndependentInstructions(blockInstrNode, True, False, writtenRegs|readRegs, writtenRegs|readRegs, 64)
-   if debugOutput: print '  bIL: ' + str(blockInstrsList)
+   if debugOutput: print('  bIL: ' + str(blockInstrsList))
 
    htmlReports.append('<hr><h3>With blocking instructions for port' +
                      ('s {' if len(blockedPorts)>1 else ' ') +
@@ -537,11 +538,11 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
          subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
          iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
       except subprocess.CalledProcessError as e:
-         print "Error: " + e.output
+         print('Error: ' + e.output)
          return None
 
       if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
-         print "IACA error"
+         print('IACA error')
          return None
 
       allPortsLine = re.search('\| Cycles \|.*', iacaOut).group(0)
@@ -584,7 +585,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
 
       blockInstrAsm = ';'.join(islice(cycle(x.asm for x in blockInstrsList), blockInstrRep))
 
-      unrollCount = 1000/blockInstrRep # make sure that instrs. fit into icache
+      unrollCount = 1000//blockInstrRep # make sure that instrs. fit into icache
       if isAMDCPU(): unrollCount = max(unrollCount, 100) # ZEN+ sometimes undercounts FP usage if code is short
 
 
@@ -596,7 +597,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
 
       if float(measurementResult['Core cycles']) < -10:
          #something went wrong; this happens for example on HSW with long sequences of JMP instructions
-         if debugOutput: print "Core cycles < -10 in getUopsOnBlockedPorts"
+         if debugOutput: print('Core cycles < -10 in getUopsOnBlockedPorts')
 
       if sum(u for p, u in measurementResult.items() if ('UOPS_PORT' in p or 'FpuPipeAssignment.Total' in p)) < blockInstrRep-.5:
          # something went wrong; fewer uops on ports than blockInstrRep
@@ -643,7 +644,7 @@ def getIndependentInstructions(instrNode, useDistinctRegs, useIndexedAddr, doNot
 
       maxMemWidth = 0
       for memNode in instrNode.findall('./operand[@type="mem"][@w="1"]'):
-         maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0'))/8)
+         maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0')) // 8)
       offset += maxMemWidth
 
       independentInstructions.append(instrI)
@@ -694,17 +695,17 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports):
       subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
       iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
-      print "Error: " + e.output
+      print('Error: ' + e.output)
       return None
 
    if debugOutput:
-      print instrNode.attrib['iform'] + ' - NoInteriteration'
-      print iaca_tp
+      print(instrNode.attrib['iform'] + ' - NoInteriteration')
+      print(iaca_tp)
 
    htmlReports.append('<pre>' + iaca_tp + '</pre>\n')
 
    if not iaca_tp or ' !' in iaca_tp or ' X' in iaca_tp or ' 0X' in iaca_tp or not 'Total Num Of Uops' in iaca_tp:
-      print "IACA error"
+      print('IACA error')
       return None
 
    cycles = float(iaca_tp.split('\n')[3].split()[2])
@@ -958,7 +959,7 @@ def getTPConfigsForDiv(instrNode):
          if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM'
 
          config.init = ['MOV RAX, ' + arg]
-         for i in range(0, getRegSize(regType)/8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX']
+         for i in range(0, getRegSize(regType)//8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX']
 
          targetRegIdx = min(int(opNode.attrib['idx']) for opNode in instrNode.findall('./operand') if opNode.text and regType in opNode.text)
          if memDivisor:
@@ -997,11 +998,11 @@ TPResult = namedtuple('TPResult', ['TP', 'TP_loop', 'TP_noLoop', 'TP_noDepBreaki
 def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports):
    configs = getTPConfigs(instrNode, useDistinctRegs, useIndexedAddr)
 
-   minTP = sys.maxint
-   minTP_loop = sys.maxint
-   minTP_noLoop = sys.maxint
-   minTP_noDepBreaking_noLoop = sys.maxint
-   minTP_single = sys.maxint
+   minTP = sys.maxsize
+   minTP_loop = sys.maxsize
+   minTP_noLoop = sys.maxsize
+   minTP_noDepBreaking_noLoop = sys.maxsize
+   minTP_single = sys.maxsize
 
    if useIACA:
       config = configs[0] # consider only first config as IACA does not seem to consider different values in registers
@@ -1024,17 +1025,17 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
             except subprocess.CalledProcessError as e:
                logging.warn('Error: ' + e.output)
-               if minTP != sys.maxint:
+               if minTP != sys.maxsize:
                   htmlReports.append('<pre>' + e.output + '</pre>\n')
                   continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
                else:
                   return None
 
             if not iaca_out or ' ! ' in iaca_out or ' X ' in iaca_out or ' 0X ' in iaca_out or not 'Total Num Of Uops' in iaca_out:
-               print "IACA error"
+               print('IACA error')
                return None
 
-            print instrNode.attrib['iform'] + ' - throughput'
+            print(instrNode.attrib['iform'] + ' - throughput')
 
             htmlReports.append('<pre>' + iaca_out + '</pre>\n')
 
@@ -1087,7 +1088,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
          instrIList = config.independentInstrs
          instrLen = getCodeLength(instrIList[0].asm)
          for ic in sorted(set([1, min(4, len(instrIList)), min(8, len(instrIList)), len(instrIList)])):
-            if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
+            if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break
 
             if len(instrIList) > 1: htmlReports.append('<h3 style="margin-left: 25px">With ' + str(ic) + ' independent instruction' + ('s' if ic>1 else '') + '</h3>\n')
             htmlReports.append('<div style="margin-left: 50px">\n')
@@ -1095,7 +1096,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
             init = list(chain.from_iterable(i.regMemInit for i in instrIList[0:ic])) + config.init
 
             for useDepBreakingInstrs in ([False, True] if config.depBreakingInstrs else [False]):
-               if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
+               if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break
 
                depBreakingInstrs = ''
                if useDepBreakingInstrs:
@@ -1103,7 +1104,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                   htmlReports.append('<h4>With additional dependency-breaking instructions</h4>\n')
 
                for repType in ['unrollOnly', 'loopSmall', 'loopBig']:
-                  if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
+                  if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break
 
                   paddingTypes = ['']
                   if ((repType != 'unrollOnly') and (uopsMITE is not None) and (not uopsMS) and (math.ceil(32.0/instrLen) * uopsMITE > 18)
@@ -1138,7 +1139,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                         else:
                            loopCount = 100
                            unrollCount *= 10
-                        if minTP < sys.maxint and minTP > 100:
+                        if minTP < sys.maxsize and minTP > 100:
                            unrollCount = 1
                            loopCount = 10
 
@@ -1162,7 +1163,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                      #if any('PORT' in e for e in result):
                      #   maxPortUops = max(v/(len(e)-9) for e,v in result.items() if e.startswith('UOPS_PORT') and not '4' in e)
                      #   if maxPortUops * .98 > result['Core cycles']:
-                     #      print 'More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles'])
+                     #      print('More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles']))
                      #       #invalid = True
 
                      #if not invalid:
@@ -1174,7 +1175,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                      else:
                         minTP_loop = min(minTP_loop, cycles)
 
-                     if ic == 1 and (minTP == sys.maxint or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
+                     if ic == 1 and (minTP == sys.maxsize or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
                         minConfig = config
                         minTP_single = min(minTP_single, cycles)
 
@@ -1217,7 +1218,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
 
             htmlReports.append('</div>')
 
-      if minTP < sys.maxint:
+      if minTP < sys.maxsize:
          return TPResult(minTP, minTP_loop, minTP_noLoop, minTP_noDepBreaking_noLoop, minTP_single, uops, uopsFused, uopsMITE, uopsMS, divCycles, ILD_stalls,
                          complexDec, nAvailableSimpleDecoders, minConfig, ports_dict)
 
@@ -1246,7 +1247,7 @@ def getBasicLatencies(instrNodeList):
    movsxResult = runExperiment(instrNodeDict['MOVSXD (R64, R32)'], 'MOVSX RAX, EAX')
    movsxCycles = int(round(movsxResult['Core cycles']))
    if movsxCycles != 1:
-      print 'Latency of MOVSX must be 1'
+      print('Latency of MOVSX must be 1')
       sys.exit()
    basicLatency['MOVSX'] = movsxCycles
 
@@ -1275,7 +1276,7 @@ def getBasicLatencies(instrNodeList):
       testSetResult = runExperiment(None, 'TEST AL, AL; SET' + flag[0] + ' AL')
       testSetCycles = int(round(testSetResult['Core cycles']))
       if not testSetCycles == 2:
-         print 'Latencies of TEST and SET' + flag[0] + ' must be 1'
+         print('Latencies of TEST and SET' + flag[0] + ' must be 1')
          sys.exit()
       basicLatency['SET' + flag[0]] = 1
       basicLatency['TEST'] = 1
@@ -1297,7 +1298,7 @@ def getBasicLatencies(instrNodeList):
       result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
       basicLatency[instr] = int(round(result['Core cycles']))
 
-   if filter(lambda x: x.findall('[@iclass="VANDPS"]'), instrNodeList):
+   if any(x for x in instrNodeList if x.findall('[@iclass="VANDPS"]')):
       for instr in ['VANDPS', 'VANDPD', 'VORPS', 'VORPD', 'VPAND', 'VPOR']:
          result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, XMM)'], instr + ' XMM1, XMM1, XMM1')
          basicLatency[instr] = int(round(result['Core cycles']))
@@ -1310,7 +1311,7 @@ def getBasicLatencies(instrNodeList):
          result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
          basicLatency[instr] = int(round(result['Core cycles']))
 
-   if filter(lambda x: x.findall('[@extension="AVX512EVEX"]'), instrNodeList):
+   if any(x for x in instrNodeList if x.findall('[@extension="AVX512EVEX"]')):
       kmovq_result = runExperiment(instrNodeDict['KMOVQ (K, K)'], 'KMOVQ K1, K1')
       basicLatency['KMOVQ'] = int(round(kmovq_result['Core cycles']))
 
@@ -1321,7 +1322,7 @@ def getBasicLatencies(instrNodeList):
          basicLatency['VMOVUPS_' + regType + '_' + 'K'] = vmovups_cycles
 
          if not vmovups_uops == 1:
-            print 'VMOVUPS must have exactly 1 uop'
+            print('VMOVUPS must have exactly 1 uop')
             sys.exit()
 
          vpmovq2m_result = runExperiment(instrNodeDict['VPMOVQ2M (K, ' + regType + ')'],
@@ -1337,7 +1338,7 @@ def getBasicLatencies(instrNodeList):
       mov_10movsx_mov_result = runExperiment(None, 'mov ' + reg + ', [r14];' + ';'.join(10*['MOVSX R12, R12w']) + '; mov [r14], ' + reg , unrollCount=100)
       basicLatency['MOV_10MOVSX_MOV_'+str(memWidth)] = int(round(mov_10movsx_mov_result['Core cycles']))
 
-   print 'Basic Latencies: ' + str(basicLatency)
+   print('Basic Latencies: ' + str(basicLatency))
 
 # Returns a dict {opNode: instr}, s.t. opNode is both read and written, and instr breaks the dependency
 # Returns a list of dependency breaking instructions for operands that are both read and written (with the exception of ignoreOperand, if specified).
@@ -1541,8 +1542,8 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep):
 
          init = ['MOV RAX, ' + dividend]
          init += ['MOV RBX, ' + divisor]
-         for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RBX']
-         for i in range(64, 64+getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
+         for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RBX']
+         for i in range(64, 64+getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
 
          if instrNode.attrib['iclass'] in ['DIVSS', 'DIVPS', 'DIVSD', 'DIVPD']:
             init += ['MOVUP' + dataType + ' XMM1, [R14+64]']
@@ -1671,7 +1672,7 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep):
          if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM'
 
          init = ['MOV RAX, ' + arg]
-         for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
+         for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
 
          targetReg = regType + '0'
          sourceBaseReg = regType + '1'
@@ -1778,7 +1779,7 @@ def getLatConfigsFromMemToReg(instrNode, instrI, memOpNode, targetReg, addrReg,
             if memOpNode.attrib['width'] != chainOpNode1.attrib['width']: continue
             if memOpNode.attrib.get('VSIB', '') != chainOpNode1.attrib.get('VSIB', ''): continue
 
-            for chainOpNode2 in filter(lambda x: targetReg in x.text.split(','), chainInstrNode.findall('./operand[@type="reg"][@w="1"]')):
+            for chainOpNode2 in [x for x in chainInstrNode.findall('./operand[@type="reg"][@w="1"]') if targetReg in x.text.split(',')]:
                if chainOpNode2.attrib.get('optional', '') == '1': continue
                chainsInstr = getInstrInstanceFromNode(chainInstrNode, [targetReg], [targetReg], True, {int(chainOpNode2.attrib['idx']):targetReg}).asm
                result.append(LatConfig(instrI, chainInstrs=chainsInstr, chainLatency=1))
@@ -1971,7 +1972,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
          else:
             if len(regs2) == 1:
                reg2 = sortRegs(regs2)[0]
-               otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg2), regs1)
+               otherRegs = [x for x in regs1 if getCanonicalReg(x) != getCanonicalReg(reg2)]
                if otherRegs:
                   reg1 = sortRegs(otherRegs)[0]
                else:
@@ -1988,7 +1989,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
                            reg2 = r
                            break
                else:
-                  otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg1), regs2)
+                  otherRegs = [x for x in regs2 if getCanonicalReg(x) != getCanonicalReg(reg1)]
                   if otherRegs:
                      reg2 = sortRegs(otherRegs)[0]
 
@@ -2053,7 +2054,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
                   chainInstrInt, chainLatencyInt = getChainInstrForVectorRegs(instrNode, reg2, reg1, cRep, 'Int')
                   configList.append(LatConfig(instrI, chainInstrs=chainInstrInt, chainLatency=chainLatencyInt))
             else:
-               print 'invalid reg prefix: ' + reg1Prefix
+               print('invalid reg prefix: ' + reg1Prefix)
                return None
          else:
             configList.isUpperBound = True
@@ -2143,7 +2144,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
                configList.extend(getLatConfigsFromMemToReg(instrNode, instrI, targetNode, reg, addrReg, cRep))
          else:
             # ToDo
-            print 'unsupported reg to mem'
+            print('unsupported reg to mem')
             return None
    elif startNode.attrib['type'] == 'flags':
       #################
@@ -2225,7 +2226,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
 
          if suppressedStart:
             if not regs.issubset(GPRegs):
-               print 'read from suppressed mem to non-GPR reg not yet supported'
+               print('read from suppressed mem to non-GPR reg not yet supported')
                return None
 
          instrI = getInstrInstanceFromNode(instrNode, [addrReg, indexReg, 'R12'], [addrReg, indexReg, 'R12'], useDistinctRegs, {targetNodeIdx:reg},
@@ -2358,11 +2359,11 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
             subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
             iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
          except subprocess.CalledProcessError as e:
-            print "Error: " + e.output
+            print('Error: ' + e.output)
             return None
 
          if '!' in iaca_lat or not 'Latency' in iaca_lat:
-            print "IACA error"
+            print('IACA error')
             return None
 
          latency = iaca_lat.split('\n')[3].split()[1]
@@ -2444,7 +2445,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                   latConfigLists = getLatConfigLists(instrNode, opNode1, opNode2, useDistinctRegs, addrMem, tpDict)
                   if latConfigLists is None: continue
 
-                  minLat = sys.maxint
+                  minLat = sys.maxsize
                   maxLat = 0
 
                   minLatIsUpperBound = False
@@ -2453,7 +2454,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                   configHtmlReports = []
 
                   for latConfigList in latConfigLists:
-                     minLatForCurList = sys.maxint
+                     minLatForCurList = sys.maxsize
 
                      if not any((latConfig.init or latConfig.instrI.regMemInit) for latConfig in latConfigList.latConfigs):
                         # Test different register values for read-only registers
@@ -2463,7 +2464,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                            readOnlyRegOpNodeIdx = int(readOnlyRegOpNode.attrib['idx'])
                            for latConfig in list(latConfigList.latConfigs):
                               if not readOnlyRegOpNodeIdx in latConfig.instrI.opRegDict:
-                                 print 'readOnlyRegOpNodeIdx not found in opRegDict'
+                                 print('readOnlyRegOpNodeIdx not found in opRegDict')
                                  continue
                               reg = latConfig.instrI.opRegDict[readOnlyRegOpNodeIdx]
                               if (not reg in GPRegs) or (reg in High8Regs) or (reg in globalDoNotWriteRegs) or (reg in specialRegs): continue
@@ -2551,8 +2552,8 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                            else:
                               latConfig.chainInstrs += 'VPCMPD {0}, {1}, {1}, 7;'.format(maskReg, 'XMM15')
 
-                     mlDP = sys.maxint
-                     mlnoDP = sys.maxint
+                     mlDP = sys.maxsize
+                     mlnoDP = sys.maxsize
 
                      for latConfig in latConfigList.latConfigs:
                         configI += 1
@@ -2576,7 +2577,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                         configHtmlReports.append('</ul>\n')
 
                         if not measurementResult:
-                           print 'no result found'
+                           print('no result found')
                            continue
 
                         cycles = measurementResult['Core cycles']
@@ -2869,7 +2870,7 @@ def main():
    else:
       cpu = cpuid.CPUID()
       arch = cpuid.micro_arch(cpu)
-      print cpuid.get_basic_info(cpu)
+      print(cpuid.get_basic_info(cpu))
       if arch == 'unknown':
          exit(1)
 
@@ -2906,7 +2907,7 @@ def main():
    try:
       subprocess.check_output('mkdir -p /tmp/ramdisk; sudo mount -t tmpfs -o size=100M none /tmp/ramdisk/', shell=True)
    except subprocess.CalledProcessError as e:
-      print "Could not create ramdisk " + e.output
+      print('Could not create ramdisk ' + e.output)
       exit(1)
 
    XMLRoot = ET.parse(args.input).getroot()
@@ -2957,7 +2958,7 @@ def main():
    else:
       for i, instrNode in enumerate(instrNodeList):
          #if not 'RCR (R64, 1)' in instrNode.attrib['string']: continue
-         print 'Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
+         print('Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')
 
          htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Throughput and Uops' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
 
@@ -2968,7 +2969,7 @@ def main():
          if hasExplMemOp: htmlReports.append('<h2 id="nonIndexedAddr">With a non-indexed addressing mode</h2>\n')
 
          tpResult = getThroughputAndUops(instrNode, True, False, htmlReports)
-         print instrNode.attrib['string'] + " - tp: " + str(tpResult)
+         print(instrNode.attrib['string'] + " - tp: " + str(tpResult))
 
          if tpResult:
             tpDict[instrNode] = tpResult
@@ -3005,7 +3006,7 @@ def main():
       with open('tp_' + arch + '.pickle', 'wb') as f:
          pickle.dump((tpDict, tpDictSameReg, tpDictIndexedAddr, tpDictNoInteriteration), f)
 
-   num_ports = len(tpDict.values()[0].unblocked_ports)
+   num_ports = len(list(tpDict.values())[0].unblocked_ports)
 
    ########################
    # Latency
@@ -3023,13 +3024,13 @@ def main():
    elif not useIACA or iacaVersion == '2.1':
       for i, instrNode in enumerate(instrNodeList):
          #if not 'DIV' in instrNode.attrib['string']: continue
-         print 'Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
+         print('Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')
 
          htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Latency' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
          lat = getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports)
 
          if lat is not None:
-            if debugOutput: print instrNode.attrib['iform'] + ': ' + str(lat)
+            if debugOutput: print(instrNode.attrib['iform'] + ': ' + str(lat))
             latencyDict[instrNode] = lat
             writeHtmlFile('html-lat/'+arch, instrNode, instrNode.attrib['string'], ''.join(htmlReports))
       with open('lat_' + arch + '.pickle', 'wb') as f:
@@ -3080,21 +3081,21 @@ def main():
             # their throughput is limited to 1 per cycle; thus, they are disallowed by the TP_noDepBreaking_noLoop check above
             disallowedBlockingInstrs.remove(instrNodeDict['MOVD (R32, XMM)'])
 
-      print 'disallowedBlockingInstrs'
+      print('disallowedBlockingInstrs')
       for instrNode in disallowedBlockingInstrs:
-         print '  ' + str(instrNode.attrib['string'])
+         print('  ' + str(instrNode.attrib['string']))
 
-      print 'tpDict'
+      print('tpDict')
       for instr, tpResult in tpDict.items():
-         print '  ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports)
+         print('  ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports))
 
       # we cannot start higher than .79 as IACA has .2 uops on each port for a port usage of, e.g., 1*p1256
       # using uops_dict instead can be problematic because in IACA the uops on the individual ports do not always add up to this value
       oneUopInstrs = [instr for instr, tpResult in tpDict.items() if instr not in disallowedBlockingInstrs and .79 < sum([v for v in tpResult.unblocked_ports.values() if v>.1]) < 1.11]
 
-      print 'oneUopInstrs'
+      print('oneUopInstrs')
       for instrNode in oneUopInstrs:
-         print '  ' + str(instrNode.attrib['string'])
+         print('  ' + str(instrNode.attrib['string']))
       # dicts from port combination to a set of instructions (either not containing AVX or SSE instructions bec. of transition penalty) that always uses these ports
       blockingInstructionsDictNonAVX_set = {}
       blockingInstructionsDictNonSSE_set = {}
@@ -3102,7 +3103,7 @@ def main():
       for instrNode in oneUopInstrs:
          usedPorts = frozenset({p for p, x in tpDict[instrNode].unblocked_ports.items() if x>0.1})
          if usedPorts:
-            print instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]')))
+            print(instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]'))))
 
             if not isSSEInstr(instrNode):
                if not usedPorts in blockingInstructionsDictNonSSE_set: blockingInstructionsDictNonSSE_set[usedPorts] = set()
@@ -3118,10 +3119,10 @@ def main():
       blockingInstructionsDictNonSSE = {comb: next(iter(sorted(instr_set, key=sort_key))) for comb, instr_set in blockingInstructionsDictNonSSE_set.items()}
 
       #for comb, instr_set in blockingInstructionsDictNonAVX_set.items():
-      #   print comb
-      #   print [x.attrib['string'] for x in sorted(instr_set, key=sort_key)]
+      #   print(comb)
+      #   print([x.attrib['string'] for x in sorted(instr_set, key=sort_key)])
 
-      #print str(blockingInstructionsDictNonAVX.items())
+      #print(str(blockingInstructionsDictNonAVX.items()))
 
       if isIntelCPU():
          # mov to mem has always two uops: store address and store data; there is no instruction that uses just one of them
@@ -3138,26 +3139,26 @@ def main():
          if storeAddressPorts not in blockingInstructionsDictNonAVX: blockingInstructionsDictNonAVX[storeAddressPorts] = movMemInstrNode
          if storeAddressPorts not in blockingInstructionsDictNonSSE: blockingInstructionsDictNonSSE[storeAddressPorts] = movMemInstrNode
 
-      print 'Non-AVX:'
+      print('Non-AVX:')
       for k,v in blockingInstructionsDictNonAVX.items():
-         print str(k) + ': ' + v.attrib['iform']
-      print 'Non-SSE:'
+         print(str(k) + ': ' + v.attrib['iform'])
+      print('Non-SSE:')
       for k,v in blockingInstructionsDictNonSSE.items():
-         print str(k) + ': ' + v.attrib['iform']
+         print(str(k) + ': ' + v.attrib['iform'])
 
       sortedPortCombinationsNonAVX = sorted(blockingInstructionsDictNonAVX.keys(), key=lambda x:(len(x), sorted(x)))
       sortedPortCombinationsNonSSE = sorted(blockingInstructionsDictNonSSE.keys(), key=lambda x:(len(x), sorted(x)))
-      print 'sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX)
+      print('sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX))
 
-      for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (tpDict[x].config.preInstrNodes, x.attrib['string']))):
+      for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (len(tpDict[x].config.preInstrNodes), x.attrib['string']))):
          #if not 'CVTPD2PI' in instrNode.attrib['string']: continue
 
-         print 'Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
+         print('Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')
 
          htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Port Usage' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>']
 
          for useDistinctRegs in ([True, False] if instrNode in tpDictSameReg else [True]):
-            for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]):
+            for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]):               
                tpResult = None
 
                if not useDistinctRegs:
@@ -3176,7 +3177,7 @@ def main():
 
                # use abs because on, e.g., IVB port usages might be smaller in the second half of the experiments if replays happen
                used_ports = {p for p, x in tpResult.unblocked_ports.items() if abs(x)>0.05}
-               if debugOutput: print instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports)
+               if debugOutput: print(instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports))
 
                if not isAVXInstr(instrNode):
                   blockingInstrs = blockingInstructionsDictNonAVX
@@ -3218,13 +3219,13 @@ def main():
                      blockInstrRep = min(blockInstrRep, 100)
                      uopsOnBlockedPorts = getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockingInstrs[combination], blockInstrRep, combination, tpResult.config, htmlReports)
                      if uopsOnBlockedPorts is None:
-                        print 'no uops on blocked ports: ' + str(combination)
+                        print('no uops on blocked ports: ' + str(combination))
                         continue
 
                      uopsOnBlockedPorts -= prevUopsOnCombination
 
                      if rem_uops < uopsOnBlockedPorts:
-                        print 'More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts)
+                        print('More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts))
 
                      if uopsOnBlockedPorts <= 0: continue
 
@@ -3338,8 +3339,8 @@ def main():
             try:
                resultNode.attrib['TP_ports'+suffix] = "%.2f" % getTP_LP(portUsageWithDivList)
             except ValueError as err:
-               print 'Could not solve LP for ' + instrNode.attrib['string'] + ':'
-               print err
+               print('Could not solve LP for ' + instrNode.attrib['string'] + ':')
+               print(err)
 
    with open(args.output, "w") as f:
       reparsed = XMLRoot
@@ -3358,7 +3359,7 @@ def main():
    except subprocess.CalledProcessError:
       exit(1)
 
-   print 'Total number of microbenchmarks: ' + str(nExperiments)
+   print('Total number of microbenchmarks: ' + str(nExperiments))
 
 
 if __name__ == "__main__":
diff --git a/tools/cpuBench/mergeXML.py b/tools/cpuBench/mergeXML.py
index a46f120..e81b229 100755
--- a/tools/cpuBench/mergeXML.py
+++ b/tools/cpuBench/mergeXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
 import argparse
@@ -20,7 +21,7 @@ def main():
 
    for instrNode1 in root1.iter('instruction'):
       if instrNode1.attrib['string'] not in instrNode2Dict:
-         print 'no matching entry found for ' + instrNode1.attrib['string']
+         print('no matching entry found for ' + instrNode1.attrib['string'])
          continue
       for instrNode2 in instrNode2Dict[instrNode1.attrib['string']]:
          for archNode2 in instrNode2.iter('architecture'):
diff --git a/tools/cpuBench/utils.py b/tools/cpuBench/utils.py
index 7657d1a..a29862c 100755
--- a/tools/cpuBench/utils.py
+++ b/tools/cpuBench/utils.py
@@ -137,7 +137,7 @@ def getLatencyTableEntry(measurementNode):
    if measurementNode is None or measurementNode.find('./latency') is None:
       return None
 
-   minLat = sys.maxint
+   minLat = sys.maxsize
    maxLat = 0
    minLatUB = False
    maxLatUB = False