python 3

2026-01-08 13:20:12 +01:00 · 2021-03-13 21:04:52 +01:00
parent ca7f63370b
commit 313aa5ee30
19 changed files with 275 additions and 268 deletions
--- a/tools/CPUID/cpuid.py
+++ b/tools/CPUID/cpuid.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

-# Copyright (C) 2019 Andreas Abel
+# Copyright (C) 2021 Andreas Abel
 #
 # This file was modified from https://github.com/flababah/cpuid.py
 #
@@ -408,7 +408,7 @@ def get_cache_info(cpu):
         parameters.append('Physical Line partitions (P): ' + str(P))
         parameters.append('Ways of associativity (W): ' + str(W))
         parameters.append('Number of Sets (S): ' + str(S))
-         parameters.append('Cache Size: ' + str(W*P*L*S/1024) + ' kB')
+         parameters.append('Cache Size: ' + str(W*P*L*S//1024) + ' kB')

         if get_bit(d, 0): parameters.append('WBINVD/INVD is not guaranteed to act upon lower level caches of non-originating threads sharing this cache')
         else: parameters.append('WBINVD/INVD from threads sharing this cache acts upon lower level caches for threads sharing this cache')
@@ -447,7 +447,7 @@ def get_cache_info(cpu):

      cacheInfo['L1D'] = {
         'lineSize': L1DcLineSize,
-         'nSets': L1DcSize*1024/L1DcAssoc/L1DcLineSize,
+         'nSets': L1DcSize*1024//L1DcAssoc//L1DcLineSize,
         'assoc': L1DcAssoc
      }

@@ -463,7 +463,7 @@ def get_cache_info(cpu):

      cacheInfo['L1I'] = {
         'lineSize': L1IcLineSize,
-         'nSets': L1IcSize*1024/L1IcAssoc/L1IcLineSize,
+         'nSets': L1IcSize*1024//L1IcAssoc//L1IcLineSize,
         'assoc': L1IcAssoc
      }

@@ -484,7 +484,7 @@ def get_cache_info(cpu):
      elif c_15_12 == 0xC: L2Assoc = 64
      elif c_15_12 == 0xD: L2Assoc = 96
      elif c_15_12 == 0xE: L2Assoc = 128
-      elif c_15_12 == 0x2: L2Assoc = L2Size*1024/L2LineSize
+      elif c_15_12 == 0x2: L2Assoc = L2Size*1024//L2LineSize

      log.info('  L2LineSize: ' + str(L2LineSize) + ' B')
      log.info('  L2LinesPerTag: ' + str(L2LinesPerTag))
@@ -493,7 +493,7 @@ def get_cache_info(cpu):

      cacheInfo['L2'] = {
         'lineSize': L2LineSize,
-         'nSets': L2Size*1024/L2Assoc/L2LineSize,
+         'nSets': L2Size*1024//L2Assoc//L2LineSize,
         'assoc': L2Assoc
      }

@@ -519,11 +519,11 @@ def get_cache_info(cpu):
      log.info('  L3LineSize: ' + str(L3LineSize) + ' B')
      log.info('  L3LinesPerTag: ' + str(L3LinesPerTag))
      log.info('  L3Assoc: ' + str(L3Assoc))
-      log.info('  L3Size: ' + str(L3Size/1024) + ' MB')
+      log.info('  L3Size: ' + str(L3Size//1024) + ' MB')

      cacheInfo['L3'] = {
         'lineSize': L3LineSize,
-         'nSets': L3Size*1024/L3Assoc/L3LineSize,
+         'nSets': L3Size*1024//L3Assoc//L3LineSize,
         'assoc': L3Assoc
      }

@@ -551,13 +551,13 @@ if __name__ == "__main__":
                yield (eax, regs)
                eax += 1

-    print " ".join(x.ljust(8) for x in ("CPUID", "A", "B", "C", "D")).strip()
+    print(' '.join(x.ljust(8) for x in ('CPUID', 'A', 'B', 'C', 'D')).strip())
    for eax, regs in valid_inputs():
-        print "%08x" % eax, " ".join("%08x" % reg for reg in regs)
+        print('%08x' % eax, ' '.join('%08x' % reg for reg in regs))

-    print ''
-    print get_basic_info(cpuid)
+    print('')
+    print(get_basic_info(cpuid))

-    print '\nCache information:'
+    print('\nCache information:')
    get_cache_info(cpuid)

--- a/tools/CacheAnalyzer/cacheGraph.py
+++ b/tools/CacheAnalyzer/cacheGraph.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 from itertools import count
 from collections import namedtuple, OrderedDict

@@ -73,7 +74,7 @@ def main():
      _, nbDict = getAgesOfBlocks(blocks, args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice,
                                  clearHL=(not args.noClearHL), wbinvd=(not args.noWbinvd), returnNbResults=True, maxAge=args.maxAge,
                                  nMeasurements=args.nMeasurements, agg=args.agg)
-      for event in sorted(e for e in nbDict.values()[0][0].keys() if 'HIT' in e or 'MISS' in e):
+      for event in sorted(e for e in list(nbDict.values())[0][0].keys() if 'HIT' in e or 'MISS' in e):
         traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks]
         title = 'Access Sequence: ' + (args.seq_init + ' ' + args.seq).replace('?','').strip() + ' <n fresh blocks> <block>?'
         html.append(getPlotlyGraphDiv(title, '# of fresh blocks', event, traces))
@@ -82,7 +83,7 @@ def main():

   with open(args.output ,'w') as f:
      f.write('\n'.join(html))
-      print 'Graph written to ' + args.output
+      print('Graph written to ' + args.output)


 if __name__ == "__main__":
--- a/tools/CacheAnalyzer/cacheInfo.py
+++ b/tools/CacheAnalyzer/cacheInfo.py
@@ -1,6 +1,6 @@
-#!/usr/bin/python
-import argparse
+#!/usr/bin/env python3

+import argparse
 from cacheLib import *

 import logging
@@ -16,11 +16,11 @@ def main():

   cpuidInfo = getCpuidCacheInfo()

-   print ''
-   print getCacheInfo(1)
-   print getCacheInfo(2)
+   print('')
+   print(getCacheInfo(1))
+   print(getCacheInfo(2))
   if 'L3' in cpuidInfo:
-      print getCacheInfo(3)
+      print(getCacheInfo(3))


 if __name__ == "__main__":
--- a/tools/CacheAnalyzer/cacheLib.py
+++ b/tools/CacheAnalyzer/cacheLib.py
@@ -1,4 +1,3 @@
-#!/usr/bin/python
 from itertools import count
 from collections import namedtuple

@@ -79,11 +78,11 @@ class CacheInfo:

   def __str__(self):
      return '\n'.join(['L' + str(self.level) + ':',
-                        '  Size: ' + str(self.size/1024) + ' kB',
+                        '  Size: ' + str(self.size//1024) + ' kB',
                        '  Associativity: ' + str(self.assoc),
                        '  Line Size: ' + str(self.lineSize) + ' B',
                        '  Number of sets' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.nSets),
-                        '  Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize/1024) + ' kB',
+                        '  Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize//1024) + ' kB',
                       ('  Number of CBoxes: ' + str(self.nCboxes) if self.nCboxes is not None else ''),
                       ('  Number of slices: ' + str(self.nSlices) if self.nSlices is not None else '')])

@@ -134,13 +133,13 @@ def getCacheInfo(level):
            assoc = cpuidInfo['assoc']
            nSets = cpuidInfo['nSets']

-            stride = 2**((lineSize*nSets/getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
+            stride = 2**((lineSize*nSets//getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
            ms = findMaximalNonEvictingL3SetInCBox(0, stride, assoc, 0)
            log.debug('Maximal non-evicting L3 set: ' + str(len(ms)) + ' ' + str(ms))
            nCboxes = getNCBoxUnits()
            nSlices = nCboxes * int(math.ceil(float(len(ms))/assoc))

-            getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets/nSlices, nSlices, nCboxes)
+            getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets//nSlices, nSlices, nCboxes)
      return getCacheInfo.L3CacheInfo
   else:
      raise ValueError('invalid level')
@@ -376,9 +375,9 @@ def getAddresses(level, wayID, cacheSetList, cBox=1, cSlice=0):
                  L3SetToWayIDMap[cBox][cSlice][L3Set][i] = addr
         if not wayID in L3SetToWayIDMap[cBox][cSlice][L3Set]:
            if getCacheInfo(3).nSlices == getNCBoxUnits():
-               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, L3SetToWayIDMap[cBox][cSlice][L3Set].values())))
+               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
            else:
-               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, L3SetToWayIDMap[cBox][cSlice][L3Set].values())))
+               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
         addresses.append(L3SetToWayIDMap[cBox][cSlice][L3Set][wayID])

      return addresses
@@ -404,16 +403,16 @@ def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False):
      for s in cacheSetsStr.split(','):
         if '-' in s:
            first, last = s.split('-')[:2]
-            cacheSetList += range(int(first), int(last)+1)
+            cacheSetList += list(range(int(first), int(last)+1))
         else:
            cacheSetList.append(int(s))
   else:
      nSets = getCacheInfo(level).nSets
      if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes):
         nHLSets = getCacheInfo(level-1).nSets
-         cacheSetList = range(nHLSets, nSets)
+         cacheSetList = list(range(nHLSets, nSets))
      else:
-         cacheSetList = range(0, nSets)
+         cacheSetList = list(range(0, nSets))
   return cacheSetList


@@ -509,7 +508,7 @@ def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0,

 def printNB(nb_result):
   for r in nb_result.items():
-      print r[0] + ': ' + str(r[1])
+      print(r[0] + ': ' + str(r[1]))


 def hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
--- a/tools/CacheAnalyzer/cacheSeq.py
+++ b/tools/CacheAnalyzer/cacheSeq.py
@@ -1,12 +1,12 @@
-#!/usr/bin/python
-from itertools import count, cycle, islice
-from collections import namedtuple, OrderedDict
+#!/usr/bin/env python3

 import argparse
 import sys
+from itertools import count, cycle, islice
+from collections import namedtuple, OrderedDict

-from cacheLib import *
 import cacheSim
+from cacheLib import *

 import logging
 log = logging.getLogger(__name__)
@@ -37,7 +37,7 @@ def main():
      policyClass = cacheSim.AllPolicies[args.sim]
      seq = args.seq_init + (' ' + args.seq) * args.loop
      hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets) / args.loop
-      print 'Hits: ' + str(hits)
+      print('Hits: ' + str(hits))
   else:
      nb = runCacheExperiment(args.level, args.seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL),
                              doNotUseOtherCBoxes=args.noUseOtherCBoxes, loop=args.loop, wbinvd=(not args.noWbinvd), nMeasurements=args.nMeasurements, agg=args.agg)
--- a/tools/CacheAnalyzer/cacheSim.py
+++ b/tools/CacheAnalyzer/cacheSim.py
@@ -1,9 +1,7 @@
-#!/usr/bin/python
 import random

 from itertools import count
 from numpy import median
-
 from cacheLib import *

 import logging
@@ -85,7 +83,7 @@ class PLRUSim(ReplPolicySim):
   def updateIndexBits(self, accIndex):
      lastIdx = accIndex
      for level in reversed(range(0, len(self.bits))):
-         curIdx = lastIdx/2
+         curIdx = lastIdx//2
         self.bits[level][curIdx] = 1 - (lastIdx % 2)
         lastIdx = curIdx

@@ -111,7 +109,7 @@ AllRandPLRUVariants = {

 class LRU_PLRU4Sim(ReplPolicySim):
   def __init__(self, assoc):
-      self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc/4)]
+      self.PLRUs = [PLRUSim(4, linearInit=True) for _ in range(0, assoc//4)]
      self.PLRUOrdered = list(self.PLRUs) # from MRU to LRU

   def acc(self, block):
@@ -299,9 +297,9 @@ CommonPolicies = {
   'SRRIP': AllDetQLRUVariants['QLRU_H00_M2_R0_U0_UMO'],
 }

-AllDetPolicies = dict(CommonPolicies.items() + AllDetQLRUVariants.items())
-AllRandPolicies = dict(AllRandQLRUVariants.items() + AllRandPLRUVariants.items())
-AllPolicies = dict(AllDetPolicies.items() + AllRandPolicies.items())
+AllDetPolicies = dict(list(CommonPolicies.items()) + list(AllDetQLRUVariants.items()))
+AllRandPolicies = dict(list(AllRandQLRUVariants.items()) + list(AllRandPLRUVariants.items()))
+AllPolicies = dict(list(AllDetPolicies.items()) + list(AllRandPolicies.items()))


 def parseCacheSetsStrSim(cacheSetsStr):
@@ -312,7 +310,7 @@ def parseCacheSetsStrSim(cacheSetsStr):
   for s in cacheSetsStr.split(','):
      if '-' in s:
         first, last = s.split('-')[:2]
-         cacheSetList += range(int(first), int(last)+1)
+         cacheSetList += list(range(int(first), int(last)+1))
      else:
         cacheSetList.append(int(s))

@@ -381,8 +379,8 @@ def getPermutations(policySimClass, assoc):
   initAges = getAges(initBlocks, seq, policySimClass, assoc)

   accSeqStr = 'Access sequence: <wbinvd> ' + seq
-   print accSeqStr
-   print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}'
+   print(accSeqStr)
+   print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}')

   blocks = ['B' + str(i) for i in range(0, assoc)]
   baseSeq = ' '.join(initBlocks + blocks)
@@ -390,8 +388,8 @@ def getPermutations(policySimClass, assoc):
   ages = getAges(blocks, baseSeq, policySimClass, assoc)

   accSeqStr = 'Access sequence: <wbinvd> ' + baseSeq
-   print accSeqStr
-   print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}'
+   print(accSeqStr)
+   print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}')

   blocksSortedByAge = [a[0] for a in sorted(ages.items(), key=lambda x: -x[1])] # most recent block first

@@ -408,5 +406,5 @@ def getPermutations(policySimClass, assoc):
            break
         perm[assoc-permAge] = bi

-      print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))
+      print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)))

--- a/tools/CacheAnalyzer/hitMiss.py
+++ b/tools/CacheAnalyzer/hitMiss.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import sys

@@ -32,10 +33,10 @@ def main():
      seq = re.sub('[?!]', '', ' '.join([args.seq_init, args.seq])).strip() + '?'
      hits = cacheSim.getHits(seq, policyClass, args.simAssoc, args.sets)
      if hits > 0:
-         print 'HIT'
+         print('HIT')
         exit(1)
      else:
-         print 'MISS'
+         print('MISS')
         exit(0)
   else:
      setCount = len(parseCacheSetsStr(args.level, True, args.sets))
@@ -43,10 +44,10 @@ def main():
      nb = runCacheExperiment(args.level, seq, initSeq=args.seq_init, cacheSets=args.sets, cBox=args.cBox, cSlice=args.slice, clearHL=(not args.noClearHL),
                              loop=args.loop, wbinvd=(not args.noWbinvd))
      if nb['L' + str(args.level) + '_HIT']/setCount > .5:
-         print 'HIT'
+         print('HIT')
         exit(1)
      else:
-         print 'MISS'
+         print('MISS')
         exit(0)


--- a/tools/CacheAnalyzer/permPolicy.py
+++ b/tools/CacheAnalyzer/permPolicy.py
@@ -1,20 +1,20 @@
-#!/usr/bin/python
-from itertools import count
-from collections import namedtuple, OrderedDict
+#!/usr/bin/env python3

 import argparse
 import math
 import os
+import plotly.graph_objects as go
 import re
 import subprocess
 import sys

+from itertools import count
+from collections import namedtuple, OrderedDict
 from plotly.offline import plot
-import plotly.graph_objects as go

+import cacheSim
 from cacheLib import *
 from cacheGraph import *
-import cacheSim

 import logging
 log = logging.getLogger(__name__)
@@ -36,8 +36,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
                                         cBox=cBox, cSlice=cSlice)

      accSeqStr = 'Access sequence: <wbinvd> ' + seq
-      print accSeqStr
-      print 'Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}'
+      print(accSeqStr)
+      print('Ages: {' + ', '.join(b + ': ' + str(initAges[b]) for b in initBlocks) + '}')

      event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent)
      traces = [(b, [nb[event] for nb in nbDict[b]]) for b in initBlocks]
@@ -52,8 +52,8 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
                                  cBox=cBox, cSlice=cSlice)

   accSeqStr = 'Access sequence: <wbinvd> ' + baseSeq
-   print accSeqStr
-   print 'Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}'
+   print(accSeqStr)
+   print('Ages: {' + ', '.join(b + ': ' + str(ages[b]) for b in blocks) + '}')

   event = (hitEvent if hitEvent in next(iter(nbDict.items()))[1][0] else missEvent)
   traces = [(b, [nb[event] for nb in nbDict[b]]) for b in blocks]
@@ -77,7 +77,7 @@ def getPermutations(level, html, cacheSets=None, getInitialAges=True, maxAge=Non
            break
         perm[assoc-permAge] = bi

-      print u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm))
+      print(u'\u03A0_' + str(permI) + ' = ' + str(tuple(perm)))


 def main():
--- a/tools/CacheAnalyzer/replPolicy.py
+++ b/tools/CacheAnalyzer/replPolicy.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import random
 import sys
@@ -23,7 +24,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s
      seq = initSeq + ' '.join(seqPrefix)
      actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements)
      sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets)
-      print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)
+      print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim))
      if sim != actual:
         break

@@ -32,7 +33,7 @@ def findSmallCounterexample(policy, initSeq, level, sets, cBox, cSlice, assoc, s
      seq = initSeq + ' '.join(tmpPrefix)
      actual = getActualHits(seq, level, sets, cBox, cSlice, nMeasurements)
      sim = cacheSim.getHits(seq, cacheSim.AllPolicies[policy], assoc, sets)
-      print 'seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim)
+      print('seq:' + seq + ', actual: ' + str(actual) + ', sim: ' + str(sim))
      if sim != actual:
         seqPrefix = tmpPrefix

@@ -115,7 +116,7 @@ def main():

   for seq in seqList:
      fullSeq = ((args.initSeq + ' ') if args.initSeq else '') + seq
-      print fullSeq
+      print(fullSeq)

      html += ['<tr><td>' + fullSeq + '</td>']
      actualHits = set([getActualHits(fullSeq, args.level, args.sets, cBox, args.slice, args.nMeasurements) for _ in range(0, args.rep)])
@@ -151,14 +152,14 @@ def main():
      html += ['</tr>']

      if not args.randPolicies and not args.best:
-         print 'Possible policies: ' + ', '.join(possiblePolicies)
+         print('Possible policies: ' + ', '.join(possiblePolicies))
         if not possiblePolicies: break

   if not args.randPolicies and args.findCtrEx:
-      print ''
-      print 'Counter example(s): '
+      print('')
+      print('Counter example(s):')
      for p, ctrEx in counterExamples.items():
-         print '  ' + p + ': ' + ctrEx
+         print('  ' + p + ': ' + ctrEx)

   html += ['</table>', '</body>', '</html>']

@@ -166,10 +167,10 @@ def main():
      f.write('\n'.join(html))

   if not args.randPolicies and not args.best:
-      print 'Possible policies: ' + ', '.join(possiblePolicies)
+      print('Possible policies: ' + ', '.join(possiblePolicies))
   else:
      for p, d in reversed(sorted(dists.items(), key=lambda d: d[1])):
-         print p + ': ' + str(d)
+         print(p + ': ' + str(d))


 if __name__ == "__main__":
--- a/tools/CacheAnalyzer/setDueling.py
+++ b/tools/CacheAnalyzer/setDueling.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import random

@@ -31,7 +32,7 @@ def main():
   nCBoxes = max(1, getNCBoxUnits())
   nSlicesPerCBox = 1
   if getCacheInfo(3).nSlices:
-      nSlicesPerCBox = getCacheInfo(3).nSlices / getCacheInfo(3).nCboxes
+      nSlicesPerCBox = getCacheInfo(3).nSlices // getCacheInfo(3).nCboxes

   seqLength = (args.length if args.length is not None else assoc+1)
   seq = ' '.join('B' + str(i) + '?' for i in range(0, seqLength))
@@ -42,7 +43,7 @@ def main():
   html = ['<html>', '<head>', '<title>' + title + '</title>', '<script src="https://cdn.plot.ly/plotly-latest.min.js">', '</script>', '</head>', '<body>']
   html += ['<h3>' + title + '</h3>']

-   setsForSlice = {cBox: {cSlice: range(0,nL3Sets) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
+   setsForSlice = {cBox: {cSlice: list(range(0,nL3Sets)) for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}
   L3HitsDict = {cBox: {cSlice: [[] for s in range(0, nL3Sets)]  for cSlice in range(0, nSlicesPerCBox)} for cBox in range(0, nCBoxes)}

   prevOti = ''
@@ -69,11 +70,11 @@ def main():
                                                 nMeasurements=args.nMeasurements, agg='med')

                     if nb['L1_MISS'] < seqLength - .2:
-                        print 'Hit in L1'
+                        print('Hit in L1')
                        continue

                     if nb['L2_MISS'] < seqLength - .2:
-                        print 'Hit in L2'
+                        print('Hit in L2')
                        continue

                     L3Hits.append(nb['L3_HIT'])
@@ -121,7 +122,7 @@ def main():

   with open(args.output ,'w') as f:
      f.write('\n'.join(html))
-      print 'Output written to ' + args.output
+      print('Output written to ' + args.output)


 if __name__ == "__main__":
--- a/tools/CacheAnalyzer/strideGraph.py
+++ b/tools/CacheAnalyzer/strideGraph.py
@@ -1,9 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import argparse
 import math
-
-from plotly.offline import plot
 import plotly.graph_objects as go
+from plotly.offline import plot

 from cacheLib import *

@@ -28,9 +28,9 @@ def main():
   while pt <= args.endSize*1024:
      tickvals.append(pt)
      for x in ([int(math.pow(2, math.log(pt, 2) + i/16.0)) for i in range(0,16)] if pt < args.endSize*1024 else [pt]):
-         print x/1024
+         print(x//1024)
         xValues.append(str(x))
-         addresses = range(0, x, args.stride)
+         addresses = list(range(0, x, args.stride))
         nAddresses.append(len(addresses))
         ec = getCodeForAddressLists([AddressList(addresses, False, False, False)], wbinvd=True)
         nbDicts.append(runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit))
@@ -57,7 +57,7 @@ def main():

   with open(args.output ,'w') as f:
      f.write('\n'.join(html))
-      print 'Graph written to ' + args.output
+      print('Graph written to ' + args.output)

 if __name__ == "__main__":
    main()
--- a/tools/cpuBench/addAMDDocToXML.py
+++ b/tools/cpuBench/addAMDDocToXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 from collections import namedtuple
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
@@ -69,13 +70,13 @@ def main():
      iclassAsmDict.setdefault(re.sub('{.*} ', '', asm), set()).add(instrNode)

   #for x in set(op for de in docList for op in de.operands):
-   #   print x
+   #   print(x)

   xmlToDocDict = dict()

   for de in sorted(docEntrySet):
      if de.mnemonic not in iclassAsmDict:
-         print 'no XML entry found for ' + str(de)
+         print('no XML entry found for ' + str(de))

      xmlFound = False
      for instrNode in iclassAsmDict[de.mnemonic]:
@@ -135,15 +136,15 @@ def main():
            elif (set(de.operands) == {None}) and (set(xmlToDocDict[instrNode].operands) != {None}):
               pass
            else:
-               print 'duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de))
+               print('duplicate entry for ' + instrNode.attrib['string'] + ' found: ' + str(list(xmlToDocDict[instrNode])) + ', ' + str(list(de)))
         else:
            xmlFound = True
            xmlToDocDict[instrNode] = de

      if not xmlFound:
-         print 'no matching XML entry found for ' + str(de)
+         print('no matching XML entry found for ' + str(de))

-   print 'Found data for ' + str(len(xmlToDocDict)) + ' instruction variants'
+   print('Found data for ' + str(len(xmlToDocDict)) + ' instruction variants')

   for instrNode, de in xmlToDocDict.items():
      archNode = instrNode.find('./architecture[@name="{}"]'.format(args.arch))
--- a/tools/cpuBench/addDocToXML.py
+++ b/tools/cpuBench/addDocToXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 from collections import namedtuple
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
@@ -43,9 +44,9 @@ def main():
                  matchingDEs.remove(de)

         if len(matchingDEs) == 0:
-            print 'No matching iform: ' + iform
+            print('No matching iform: ' + iform)
         elif len(matchingDEs) > 1:
-            print 'Multiple matching iforms: ' + iform
+            print('Multiple matching iforms: ' + iform)
         else:
            de = next(iter(matchingDEs))

--- a/tools/cpuBench/addURLsToXML.py
+++ b/tools/cpuBench/addURLsToXML.py
@@ -1,8 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 import argparse
 import re
-import urllib
+import urllib.request
 from xml.dom import minidom
 from utils import *

@@ -12,7 +13,7 @@ def main():
   parser.add_argument("output", help="Output XML file")
   args = parser.parse_args()

-   html = urllib.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*')
+   html = urllib.request.urlopen('https://www.felixcloutier.com/x86/').read().decode('utf-8').replace(u'\u2013', '-').replace(u'\u2217', '*')
   lines = re.findall('href="\./(.*?)">(.*?)</a>.*?</td><td>(.*?)</td>', html) # Example: ('ADC.html', 'ADC', 'Add with Carry'),
   lineDict = {(line[0],line[1]):line for line in lines}

@@ -128,7 +129,7 @@ def main():
               matchingLines.append(line)

      if len(matchingLines) > 1:
-         print 'Duplicate link found for ' + iclass
+         print('Duplicate link found for ' + iclass)
         exit(1)

      instrNode.attrib['url'] = 'uops.info/html-instr/' + canonicalizeInstrString(instrNode.attrib['string']) + '.html'
--- a/tools/cpuBench/compareMeasurementsToOther.py
+++ b/tools/cpuBench/compareMeasurementsToOther.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 import argparse
 import sys
@@ -66,13 +67,13 @@ def main():
            else:
               portsDiff = True
               nPortsDiff += 1
-               if args.verbose: print 'PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts)
+               if args.verbose: print('PortsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mPorts, otherPorts))
         else:
            nPortsMeasurementOnly += 1
      else:
         if otherPorts:
            nPortsOtherOnly += 1
-            if args.verbose: print 'PortsOtherOnly: ' + instrNode.attrib['string']
+            if args.verbose: print('PortsOtherOnly: ' + instrNode.attrib['string'])

      otherUops = [v for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('uops') and v.replace('.','',1).isdigit()]
      mUops = ([v for a,v in measurementNode.attrib.items() if a.startswith('uops') and not 'retire_slots' in a] if measurementNode is not None else [])
@@ -86,13 +87,13 @@ def main():
               nUopsEqPortsDiff += int(portsDiff)
            else:
               nUopsDiff += 1
-               if args.verbose: print 'UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops)
+               if args.verbose: print('UopsDiff: {} - {} - {}'.format(instrNode.attrib['string'], mUops, otherUops))
         else:
            nUopsMeasurementOnly += 1
      else:
         if otherUops:
            nUopsOtherOnly += 1
-            if args.verbose: print 'UopsOtherOnly: ' + instrNode.attrib['string']
+            if args.verbose: print('UopsOtherOnly: ' + instrNode.attrib['string'])


      otherLatencies = [float(v) for m in nonMeasurementNodes for a,v in m.attrib.items() if a.startswith('latency') and v.replace('.','',1).isdigit()]
@@ -113,54 +114,54 @@ def main():
                     nLatUBClose += 1
               else:
                  nLatUBIncorrect += 1
-                  if args.verbose: print 'LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)
+                  if args.verbose: print('LatUBIncorrect: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies))
            else:
               nLatNoUB += 1
               if maxLat in otherLatencies:
                  nLatNoUBMaxEq += 1
               else:
                  nLatNoUBMaxDiff += 1
-                  if args.verbose: print 'LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies)
+                  if args.verbose: print('LatNoUBMaxDiff: {} - {} - {}'.format(instrNode.attrib['string'], maxLat, otherLatencies))
         else:
            nLatMeasurementOnly += 1
      else:
         if otherLatencies:
            nLatOtherOnly += 1
-            if args.verbose: print 'LatOtherOnly: ' + instrNode.attrib['string']
+            if args.verbose: print('LatOtherOnly: ' + instrNode.attrib['string'])

-   print 'Ports:'
-   print '  Measurement data only: ' + str(nPortsMeasurementOnly)
-   print '  Other data only: ' + str(nPortsOtherOnly)
-   print '  Both: ' + str(nPortsBoth)
-   print '    Eq: ' + str(nPortsEq)
-   print '    Diff: ' + str(nPortsDiff)
-   print ''
+   print('Ports:')
+   print('  Measurement data only: ' + str(nPortsMeasurementOnly))
+   print('  Other data only: ' + str(nPortsOtherOnly))
+   print('  Both: ' + str(nPortsBoth))
+   print('    Eq: ' + str(nPortsEq))
+   print('    Diff: ' + str(nPortsDiff))
+   print('')

-   print 'Uops:'
-   print '  Measurement data only: ' + str(nUopsMeasurementOnly)
-   print '  Other data only: ' + str(nUopsOtherOnly)
-   print '  Both: ' + str(nUopsBoth)
-   print '    Eq: ' + str(nUopsEq)
-   print '      PortsEq: ' + str(nUopsEqPortsEq)
-   print '      PortsDiff: ' + str(nUopsEqPortsDiff)
-   print '    Diff: ' + str(nUopsDiff)
-   print ''
+   print('Uops:')
+   print('  Measurement data only: ' + str(nUopsMeasurementOnly))
+   print('  Other data only: ' + str(nUopsOtherOnly))
+   print('  Both: ' + str(nUopsBoth))
+   print('    Eq: ' + str(nUopsEq))
+   print('      PortsEq: ' + str(nUopsEqPortsEq))
+   print('      PortsDiff: ' + str(nUopsEqPortsDiff))
+   print('    Diff: ' + str(nUopsDiff))
+   print('')

-   print 'Latency:'
-   print '  Measurement data only: ' + str(nLatMeasurementOnly)
-   print '  Other data only: ' + str(nLatOtherOnly)
-   print '  Both: ' + str(nLatBoth)
-   print '    Exact: ' + str(nLatNoUB)
-   print '      Eq (Max): ' + str(nLatNoUBMaxEq)
-   print '      Diff (Max): ' + str(nLatNoUBMaxDiff)
-   print '    Upper Bound: ' + str(nLatUB)
-   print '      Correct: ' + str(nLatUBCorrect)
-   print '        Exact: ' + str(nLatUBExact)
-   print '        Close: ' + str(nLatUBClose)
-   print '      Incorrect: ' + str(nLatUBIncorrect)
-   print ''
+   print('Latency:')
+   print('  Measurement data only: ' + str(nLatMeasurementOnly))
+   print('  Other data only: ' + str(nLatOtherOnly))
+   print('  Both: ' + str(nLatBoth))
+   print('    Exact: ' + str(nLatNoUB))
+   print('      Eq (Max): ' + str(nLatNoUBMaxEq))
+   print('      Diff (Max): ' + str(nLatNoUBMaxDiff))
+   print('    Upper Bound: ' + str(nLatUB))
+   print('      Correct: ' + str(nLatUBCorrect))
+   print('        Exact: ' + str(nLatUBExact))
+   print('        Close: ' + str(nLatUBClose))
+   print('      Incorrect: ' + str(nLatUBIncorrect))
+   print('')

-   print 'Throughput:'
+   print('Throughput:')
   for TP_m, TP_o in [('TP', 'TP'), ('TP_ports', 'TP'), ('TP', 'TP_ports'), ('TP_ports', 'TP_ports')]:
      nTPMeasurementOnly = 0
      nTPOtherOnly = 0
@@ -184,28 +185,28 @@ def main():
                  nTPEq += 1
               else:
                  nTPDiff += 1
-                  if args.verbose: print 'TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)
+                  if args.verbose: print('TPDiff ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs))
               diff = min(abs(float(m)-float(o)) for o in otherTPs for m in mTPs)
               if diff <= .1:
                  nTPClose += 1
               else:
                  nTPNotClose += 1
-                  if args.verbose: print 'TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs)
+                  if args.verbose: print('TPNotClose ({} (measurements) - {} (other)): {} - {} - {}'.format(TP_m, TP_o, instrNode.attrib['string'], mTPs, otherTPs))
            else:
               nTPMeasurementOnly += 1
         else:
            if otherTPs:
               nTPOtherOnly += 1
-               if args.verbose: print 'TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string'])
+               if args.verbose: print('TPOtherOnly ({} (measurements) - {} (other)): {}'.format(TP_m, TP_o, instrNode.attrib['string']))

-      print '  {} (measurements) - {} (other):'.format(TP_m, TP_o)
-      print '    Measurement data only: ' + str(nTPMeasurementOnly)
-      print '    Other data only: ' + str(nTPOtherOnly)
-      print '    Both: ' + str(nTPBoth)
-      print '      Eq: ' + str(nTPEq)
-      print '      Diff: ' + str(nTPDiff)
-      print '      Close: ' + str(nTPClose)
-      print '      NotClose: ' + str(nTPNotClose)
+      print('  {} (measurements) - {} (other):'.format(TP_m, TP_o))
+      print('    Measurement data only: ' + str(nTPMeasurementOnly))
+      print('    Other data only: ' + str(nTPOtherOnly))
+      print('    Both: ' + str(nTPBoth))
+      print('      Eq: ' + str(nTPEq))
+      print('      Diff: ' + str(nTPDiff))
+      print('      Close: ' + str(nTPClose))
+      print('      NotClose: ' + str(nTPNotClose))

 if __name__ == "__main__":
    main()
--- a/tools/cpuBench/compareXML.py
+++ b/tools/cpuBench/compareXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
 import argparse
@@ -29,7 +30,7 @@ def main():
   for instrStr in sorted(instrNodeDict1):
      instrNode1 = instrNodeDict1[instrStr]
      if not instrStr in instrNodeDict2:
-         print 'No matching entry found for ' + instrStr
+         print('No matching entry found for ' + instrStr)
         continue
      instrNode2 = instrNodeDict2[instrStr]
      for mNode1 in instrNode1.findall('./architecture[@name="' + args.arch1 + '"]/measurement'):
@@ -40,44 +41,43 @@ def main():

               if tp1 != tp2:
                  tpDiff += 1
-                  print instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2)
+                  print(instrStr + ' - TP1: ' + str(tp1) + ' - TP2: ' + str(tp2))

            if args.lat:
               for latNode1, latNode2 in zip(mNode1.findall('./latency'), mNode2.findall('./latency')):
-                  latStr1 = ET.tostring(latNode1, encoding='utf-8').strip()
-                  latStr2 = ET.tostring(latNode2, encoding='utf-8').strip()
+                  latStr1 = ET.tostring(latNode1, encoding='utf-8').decode().strip()
+                  latStr2 = ET.tostring(latNode2, encoding='utf-8').decode().strip()
                  if latStr1 != latStr2:
                     latDiff += 1
-                     print instrStr
-                     print '  ' + latStr1
-                     print '  ' + latStr2
+                     print('  ' + latStr1)
+                     print('  ' + latStr2)

            if args.ports:
               p1 = mNode1.attrib.get('ports', '')
               p2 = mNode2.attrib.get('ports', '')
               if p1 != p2:
                  portsDiff += 1
-                  print instrStr + ' - P1: ' + p1 + ' - P2: ' + p2
+                  print(instrStr + ' - P1: ' + p1 + ' - P2: ' + p2)

            if not args.TP and not args.lat and not args.ports:
-               xmlStr1 = ET.tostring(mNode1, encoding='utf-8').strip()
-               xmlStr2 = ET.tostring(mNode2, encoding='utf-8').strip()
+               xmlStr1 = ET.tostring(mNode1, encoding='utf-8').decode().strip()
+               xmlStr2 = ET.tostring(mNode2, encoding='utf-8').decode().strip()

               if xmlStr1 != xmlStr2:
-                  print '-------------------------------'
-                  print instrStr
-                  print xmlStr1
-                  print xmlStr2
-                  print '-------------------------------'
+                  print('-------------------------------')
+                  print(instrStr)
+                  print(xmlStr1)
+                  print(xmlStr2)
+                  print('-------------------------------')

   if args.TP:
-      print 'TPDiff: ' + str(tpDiff)
+      print('TPDiff: ' + str(tpDiff))

   if args.lat:
-      print 'LatDiff: ' + str(latDiff)
+      print('LatDiff: ' + str(latDiff))

   if args.ports:
-      print 'portsDiff: ' + str(portsDiff)
+      print('portsDiff: ' + str(portsDiff))

 if __name__ == "__main__":
    main()
--- a/tools/cpuBench/cpuBench.py
+++ b/tools/cpuBench/cpuBench.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 from xml.etree.ElementTree import Element, SubElement, Comment, tostring
 from xml.dom import minidom
@@ -79,7 +80,7 @@ def getIndexReg(instrNode, opNode):
 # registers that are not used as implicit registers should come first; RAX (and parts of it) should come last, as some instructions have special encodings for that
 # prefer low registers to high registers
 def sortRegs(regsList):
-   return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, map(int, re.findall('\d+',r)), r))
+   return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, list(map(int, re.findall('\d+',r))), r))


 # Initialize registers and memory
@@ -115,7 +116,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
               init += ['MOV {}, 0'.format(reg)]
            elif 'MM' in regPrefix and xtype.startswith('f'):
               init += ['MOV RAX, 0x4000000040000000']
-               for i in range(0, getRegSize(reg)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
+               for i in range(0, getRegSize(reg)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']

               if isAVXInstr(instrNode):
                  init += ['VMOVUPD ' + reg + ', [R14]']
@@ -128,7 +129,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
         elif opNode.attrib['type'] == 'mem':
            if xtype.startswith('f'):
               init += ['MOV RAX, 0x4000000040000000']
-               for i in range(0, int(opNode.attrib['width'])/8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX']
+               for i in range(0, int(opNode.attrib['width'])//8, 8): init += ['MOV [R14+' + str(i+memOffset) + '], RAX']

      for opNode in instrNode.findall('./operand[@type="mem"]'):
         if opNode.attrib.get('suppressed', '0') == '1': continue
@@ -179,7 +180,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
   initObjFile = None
   lateInitObjFile=None
   if initCode:
-      if debugOutput: print 'init: ' + initCode
+      if debugOutput: print('init: ' + initCode)
      objFile = '/tmp/ramdisk/init.o'
      if useLateInit:
         lateInitObjFile = objFile
@@ -191,7 +192,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
      localHtmlReports.append('<li>Init: <pre>' + re.sub(';[ \t]*(.)', r';\n\1', initCode) + '</pre></li>\n')

   localHtmlReports.append('<li><a href="javascript:;" onclick="this.outerHTML = \'<pre>' + nanoBenchCmd + '</pre>\'">Show nanoBench command</a></li>\n')
-   if debugOutput: print nanoBenchCmd
+   if debugOutput: print(nanoBenchCmd)

   setNanoBenchParameters(unrollCount=unrollCount, loopCount=loopCount, warmUpCount=warmUpCount, basicMode=basicMode)

@@ -223,19 +224,19 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,

   if maxRepeat>0:
      if any(v<-0.05 for v in ret.values()):
-         print 'Repeating experiment because there was a value < 0'
+         print('Repeating experiment because there was a value < 0')
         return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1)

      #sumPortUops = sum(v for e,v in ret.items() if 'PORT' in e and not '4' in e)
      #if (sumPortUops % 1) > .2 and (sumPortUops % 1) < .8:
-      #   print 'Repeating experiment because the sum of the port usages is not an integer'
-      #   print ret
+      #   print('Repeating experiment because the sum of the port usages is not an integer')
+      #   print(ret)
      #   return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=basicMode, htmlReports=htmlReports, maxRepeat=maxRepeat-1)

      if any('PORT' in e for e in ret):
         maxPortUops = max(v/(len(e)-9) for e,v in ret.items() if 'PORT' in e)
         if maxPortUops * .98 > ret['Core cycles']:
-            print 'Repeating experiment because there were more uops on a port than core cycles'
+            print('Repeating experiment because there were more uops on a port than core cycles')
            return runExperiment(instrNode, instrCode, init=init, unrollCount=unrollCount, loopCount=loopCount, basicMode=True, htmlReports=htmlReports, maxRepeat=maxRepeat-1)

   if htmlReports is not None:
@@ -250,10 +251,10 @@ def writeFile(fileName, content):

 def getMachineCode(objFile):
   try:
-      machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile])
+      machineCode = subprocess.check_output(['objdump', '-M', 'intel', '-d', objFile]).decode()      
      return machineCode.partition('<.text>:\n')[2]
   except subprocess.CalledProcessError as e:
-      print "Error (getMachineCode): " + str(e)
+      print('Error (getMachineCode): ' + str(e))


 def getCodeLength(asmCode):
@@ -420,7 +421,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
                     ignoreRegs |= set(doNotWriteRegs)|globalDoNotWriteRegs|set(opRegDict.values())
                  if operandNode.attrib.get('r', '0') == '1':
                     ignoreRegs |= set(doNotReadRegs)|writtenRegs|readRegs|set(opRegDict.values())
-                  regsList = filter(lambda x: not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs), regsList)
+                  regsList = [x for x in regsList if not any(getCanonicalReg(x) == getCanonicalReg(y) for y in ignoreRegs)]
               if not regsList:
                  return None;
               reg = sortRegs(regsList)[0]
@@ -507,7 +508,7 @@ def getInstrInstanceFromNode(instrNode, doNotWriteRegs=None, doNotReadRegs=None,
 def createIacaAsmFile(fileName, prefixInstr, prefixRep, instr):
   asm = '.intel_syntax noprefix\n .byte 0x0F, 0x0B; mov ebx, 111; .byte 0x64, 0x67, 0x90\n'
   if prefixInstr:
-      for i in xrange(prefixRep):
+      for i in range(prefixRep):
         asm += prefixInstr + "\n"
   asm += instr + "\n"
   asm += "1:\n"
@@ -521,9 +522,9 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
   readRegs = instrInstance.readRegs
   writtenRegs = instrInstance.writtenRegs

-   if debugOutput: print '  instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs)
+   if debugOutput: print('  instr: ' + instr + 'rR: ' + str(readRegs) + ', wR: ' + str(writtenRegs))
   blockInstrsList = getIndependentInstructions(blockInstrNode, True, False, writtenRegs|readRegs, writtenRegs|readRegs, 64)
-   if debugOutput: print '  bIL: ' + str(blockInstrsList)
+   if debugOutput: print('  bIL: ' + str(blockInstrsList))

   htmlReports.append('<hr><h3>With blocking instructions for port' +
                     ('s {' if len(blockedPorts)>1 else ' ') +
@@ -537,11 +538,11 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
         subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
         iacaOut = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
      except subprocess.CalledProcessError as e:
-         print "Error: " + e.output
+         print('Error: ' + e.output)
         return None

      if not iacaOut or ' !' in iacaOut or ' X' in iacaOut or ' 0X' in iacaOut or not 'Total Num Of Uops' in iacaOut:
-         print "IACA error"
+         print('IACA error')
         return None

      allPortsLine = re.search('\| Cycles \|.*', iacaOut).group(0)
@@ -584,7 +585,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr

      blockInstrAsm = ';'.join(islice(cycle(x.asm for x in blockInstrsList), blockInstrRep))

-      unrollCount = 1000/blockInstrRep # make sure that instrs. fit into icache
+      unrollCount = 1000//blockInstrRep # make sure that instrs. fit into icache
      if isAMDCPU(): unrollCount = max(unrollCount, 100) # ZEN+ sometimes undercounts FP usage if code is short


@@ -596,7 +597,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr

      if float(measurementResult['Core cycles']) < -10:
         #something went wrong; this happens for example on HSW with long sequences of JMP instructions
-         if debugOutput: print "Core cycles < -10 in getUopsOnBlockedPorts"
+         if debugOutput: print('Core cycles < -10 in getUopsOnBlockedPorts')

      if sum(u for p, u in measurementResult.items() if ('UOPS_PORT' in p or 'FpuPipeAssignment.Total' in p)) < blockInstrRep-.5:
         # something went wrong; fewer uops on ports than blockInstrRep
@@ -643,7 +644,7 @@ def getIndependentInstructions(instrNode, useDistinctRegs, useIndexedAddr, doNot

      maxMemWidth = 0
      for memNode in instrNode.findall('./operand[@type="mem"][@w="1"]'):
-         maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0'))/8)
+         maxMemWidth = max(maxMemWidth, int(memNode.attrib.get('width', '0')) // 8)
      offset += maxMemWidth

      independentInstructions.append(instrI)
@@ -694,17 +695,17 @@ def getThroughputIacaNoInteriteration(instrNode, htmlReports):
      subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
      iaca_tp = subprocess.check_output(iacaCMDLine + (['-analysis', 'THROUGHPUT'] if iacaVersion=='2.1' else []) + ['-no_interiteration', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
   except subprocess.CalledProcessError as e:
-      print "Error: " + e.output
+      print('Error: ' + e.output)
      return None

   if debugOutput:
-      print instrNode.attrib['iform'] + ' - NoInteriteration'
-      print iaca_tp
+      print(instrNode.attrib['iform'] + ' - NoInteriteration')
+      print(iaca_tp)

   htmlReports.append('<pre>' + iaca_tp + '</pre>\n')

   if not iaca_tp or ' !' in iaca_tp or ' X' in iaca_tp or ' 0X' in iaca_tp or not 'Total Num Of Uops' in iaca_tp:
-      print "IACA error"
+      print('IACA error')
      return None

   cycles = float(iaca_tp.split('\n')[3].split()[2])
@@ -958,7 +959,7 @@ def getTPConfigsForDiv(instrNode):
         if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM'

         config.init = ['MOV RAX, ' + arg]
-         for i in range(0, getRegSize(regType)/8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX']
+         for i in range(0, getRegSize(regType)//8, 8): config.init += ['MOV [R14+' + str(i) + '], RAX']

         targetRegIdx = min(int(opNode.attrib['idx']) for opNode in instrNode.findall('./operand') if opNode.text and regType in opNode.text)
         if memDivisor:
@@ -997,11 +998,11 @@ TPResult = namedtuple('TPResult', ['TP', 'TP_loop', 'TP_noLoop', 'TP_noDepBreaki
 def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports):
   configs = getTPConfigs(instrNode, useDistinctRegs, useIndexedAddr)

-   minTP = sys.maxint
-   minTP_loop = sys.maxint
-   minTP_noLoop = sys.maxint
-   minTP_noDepBreaking_noLoop = sys.maxint
-   minTP_single = sys.maxint
+   minTP = sys.maxsize
+   minTP_loop = sys.maxsize
+   minTP_noLoop = sys.maxsize
+   minTP_noDepBreaking_noLoop = sys.maxsize
+   minTP_single = sys.maxsize

   if useIACA:
      config = configs[0] # consider only first config as IACA does not seem to consider different values in registers
@@ -1024,17 +1025,17 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
               iaca_out = subprocess.check_output(iacaCMDLine + ['/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
            except subprocess.CalledProcessError as e:
               logging.warn('Error: ' + e.output)
-               if minTP != sys.maxint:
+               if minTP != sys.maxsize:
                  htmlReports.append('<pre>' + e.output + '</pre>\n')
                  continue # on SNB, IACA 2.2 crashes on only some (larger) inputs
               else:
                  return None

            if not iaca_out or ' ! ' in iaca_out or ' X ' in iaca_out or ' 0X ' in iaca_out or not 'Total Num Of Uops' in iaca_out:
-               print "IACA error"
+               print('IACA error')
               return None

-            print instrNode.attrib['iform'] + ' - throughput'
+            print(instrNode.attrib['iform'] + ' - throughput')

            htmlReports.append('<pre>' + iaca_out + '</pre>\n')

@@ -1087,7 +1088,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
         instrIList = config.independentInstrs
         instrLen = getCodeLength(instrIList[0].asm)
         for ic in sorted(set([1, min(4, len(instrIList)), min(8, len(instrIList)), len(instrIList)])):
-            if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
+            if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break

            if len(instrIList) > 1: htmlReports.append('<h3 style="margin-left: 25px">With ' + str(ic) + ' independent instruction' + ('s' if ic>1 else '') + '</h3>\n')
            htmlReports.append('<div style="margin-left: 50px">\n')
@@ -1095,7 +1096,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
            init = list(chain.from_iterable(i.regMemInit for i in instrIList[0:ic])) + config.init

            for useDepBreakingInstrs in ([False, True] if config.depBreakingInstrs else [False]):
-               if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
+               if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break

               depBreakingInstrs = ''
               if useDepBreakingInstrs:
@@ -1103,7 +1104,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                  htmlReports.append('<h4>With additional dependency-breaking instructions</h4>\n')

               for repType in ['unrollOnly', 'loopSmall', 'loopBig']:
-                  if minTP_noLoop < sys.maxint and minTP_loop < sys.maxint and minTP_noLoop > 100 and minTP_loop > 100: break
+                  if minTP_noLoop < sys.maxsize and minTP_loop < sys.maxsize and minTP_noLoop > 100 and minTP_loop > 100: break

                  paddingTypes = ['']
                  if ((repType != 'unrollOnly') and (uopsMITE is not None) and (not uopsMS) and (math.ceil(32.0/instrLen) * uopsMITE > 18)
@@ -1138,7 +1139,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                        else:
                           loopCount = 100
                           unrollCount *= 10
-                        if minTP < sys.maxint and minTP > 100:
+                        if minTP < sys.maxsize and minTP > 100:
                           unrollCount = 1
                           loopCount = 10

@@ -1162,7 +1163,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                     #if any('PORT' in e for e in result):
                     #   maxPortUops = max(v/(len(e)-9) for e,v in result.items() if e.startswith('UOPS_PORT') and not '4' in e)
                     #   if maxPortUops * .98 > result['Core cycles']:
-                     #      print 'More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles'])
+                     #      print('More uops on ports than cycles, uops: {}, cycles: {}'.format(maxPortUops, result['Core cycles']))
                     #       #invalid = True

                     #if not invalid:
@@ -1174,7 +1175,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
                     else:
                        minTP_loop = min(minTP_loop, cycles)

-                     if ic == 1 and (minTP == sys.maxint or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
+                     if ic == 1 and (minTP == sys.maxsize or cycles == minTP) and not useDepBreakingInstrs and repType == 'unrollOnly':
                        minConfig = config
                        minTP_single = min(minTP_single, cycles)

@@ -1217,7 +1218,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports

            htmlReports.append('</div>')

-      if minTP < sys.maxint:
+      if minTP < sys.maxsize:
         return TPResult(minTP, minTP_loop, minTP_noLoop, minTP_noDepBreaking_noLoop, minTP_single, uops, uopsFused, uopsMITE, uopsMS, divCycles, ILD_stalls,
                         complexDec, nAvailableSimpleDecoders, minConfig, ports_dict)

@@ -1246,7 +1247,7 @@ def getBasicLatencies(instrNodeList):
   movsxResult = runExperiment(instrNodeDict['MOVSXD (R64, R32)'], 'MOVSX RAX, EAX')
   movsxCycles = int(round(movsxResult['Core cycles']))
   if movsxCycles != 1:
-      print 'Latency of MOVSX must be 1'
+      print('Latency of MOVSX must be 1')
      sys.exit()
   basicLatency['MOVSX'] = movsxCycles

@@ -1275,7 +1276,7 @@ def getBasicLatencies(instrNodeList):
      testSetResult = runExperiment(None, 'TEST AL, AL; SET' + flag[0] + ' AL')
      testSetCycles = int(round(testSetResult['Core cycles']))
      if not testSetCycles == 2:
-         print 'Latencies of TEST and SET' + flag[0] + ' must be 1'
+         print('Latencies of TEST and SET' + flag[0] + ' must be 1')
         sys.exit()
      basicLatency['SET' + flag[0]] = 1
      basicLatency['TEST'] = 1
@@ -1297,7 +1298,7 @@ def getBasicLatencies(instrNodeList):
      result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
      basicLatency[instr] = int(round(result['Core cycles']))

-   if filter(lambda x: x.findall('[@iclass="VANDPS"]'), instrNodeList):
+   if any(x for x in instrNodeList if x.findall('[@iclass="VANDPS"]')):
      for instr in ['VANDPS', 'VANDPD', 'VORPS', 'VORPD', 'VPAND', 'VPOR']:
         result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, XMM)'], instr + ' XMM1, XMM1, XMM1')
         basicLatency[instr] = int(round(result['Core cycles']))
@@ -1310,7 +1311,7 @@ def getBasicLatencies(instrNodeList):
         result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
         basicLatency[instr] = int(round(result['Core cycles']))

-   if filter(lambda x: x.findall('[@extension="AVX512EVEX"]'), instrNodeList):
+   if any(x for x in instrNodeList if x.findall('[@extension="AVX512EVEX"]')):
      kmovq_result = runExperiment(instrNodeDict['KMOVQ (K, K)'], 'KMOVQ K1, K1')
      basicLatency['KMOVQ'] = int(round(kmovq_result['Core cycles']))

@@ -1321,7 +1322,7 @@ def getBasicLatencies(instrNodeList):
         basicLatency['VMOVUPS_' + regType + '_' + 'K'] = vmovups_cycles

         if not vmovups_uops == 1:
-            print 'VMOVUPS must have exactly 1 uop'
+            print('VMOVUPS must have exactly 1 uop')
            sys.exit()

         vpmovq2m_result = runExperiment(instrNodeDict['VPMOVQ2M (K, ' + regType + ')'],
@@ -1337,7 +1338,7 @@ def getBasicLatencies(instrNodeList):
      mov_10movsx_mov_result = runExperiment(None, 'mov ' + reg + ', [r14];' + ';'.join(10*['MOVSX R12, R12w']) + '; mov [r14], ' + reg , unrollCount=100)
      basicLatency['MOV_10MOVSX_MOV_'+str(memWidth)] = int(round(mov_10movsx_mov_result['Core cycles']))

-   print 'Basic Latencies: ' + str(basicLatency)
+   print('Basic Latencies: ' + str(basicLatency))

 # Returns a dict {opNode: instr}, s.t. opNode is both read and written, and instr breaks the dependency
 # Returns a list of dependency breaking instructions for operands that are both read and written (with the exception of ignoreOperand, if specified).
@@ -1541,8 +1542,8 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep):

         init = ['MOV RAX, ' + dividend]
         init += ['MOV RBX, ' + divisor]
-         for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RBX']
-         for i in range(64, 64+getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
+         for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RBX']
+         for i in range(64, 64+getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']

         if instrNode.attrib['iclass'] in ['DIVSS', 'DIVPS', 'DIVSD', 'DIVPD']:
            init += ['MOVUP' + dataType + ' XMM1, [R14+64]']
@@ -1671,7 +1672,7 @@ def getDivLatConfigLists(instrNode, opNode1, opNode2, cRep):
         if 'ZMM' in instrNode.attrib['iform']: regType = 'ZMM'

         init = ['MOV RAX, ' + arg]
-         for i in range(0, getRegSize(regType)/8, 8): init += ['MOV [R14+' + str(i) + '], RAX']
+         for i in range(0, getRegSize(regType)//8, 8): init += ['MOV [R14+' + str(i) + '], RAX']

         targetReg = regType + '0'
         sourceBaseReg = regType + '1'
@@ -1778,7 +1779,7 @@ def getLatConfigsFromMemToReg(instrNode, instrI, memOpNode, targetReg, addrReg,
            if memOpNode.attrib['width'] != chainOpNode1.attrib['width']: continue
            if memOpNode.attrib.get('VSIB', '') != chainOpNode1.attrib.get('VSIB', ''): continue

-            for chainOpNode2 in filter(lambda x: targetReg in x.text.split(','), chainInstrNode.findall('./operand[@type="reg"][@w="1"]')):
+            for chainOpNode2 in [x for x in chainInstrNode.findall('./operand[@type="reg"][@w="1"]') if targetReg in x.text.split(',')]:
               if chainOpNode2.attrib.get('optional', '') == '1': continue
               chainsInstr = getInstrInstanceFromNode(chainInstrNode, [targetReg], [targetReg], True, {int(chainOpNode2.attrib['idx']):targetReg}).asm
               result.append(LatConfig(instrI, chainInstrs=chainsInstr, chainLatency=1))
@@ -1971,7 +1972,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
         else:
            if len(regs2) == 1:
               reg2 = sortRegs(regs2)[0]
-               otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg2), regs1)
+               otherRegs = [x for x in regs1 if getCanonicalReg(x) != getCanonicalReg(reg2)]
               if otherRegs:
                  reg1 = sortRegs(otherRegs)[0]
               else:
@@ -1988,7 +1989,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
                           reg2 = r
                           break
               else:
-                  otherRegs = filter(lambda x: getCanonicalReg(x) != getCanonicalReg(reg1), regs2)
+                  otherRegs = [x for x in regs2 if getCanonicalReg(x) != getCanonicalReg(reg1)]
                  if otherRegs:
                     reg2 = sortRegs(otherRegs)[0]

@@ -2053,7 +2054,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
                  chainInstrInt, chainLatencyInt = getChainInstrForVectorRegs(instrNode, reg2, reg1, cRep, 'Int')
                  configList.append(LatConfig(instrI, chainInstrs=chainInstrInt, chainLatency=chainLatencyInt))
            else:
-               print 'invalid reg prefix: ' + reg1Prefix
+               print('invalid reg prefix: ' + reg1Prefix)
               return None
         else:
            configList.isUpperBound = True
@@ -2143,7 +2144,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
               configList.extend(getLatConfigsFromMemToReg(instrNode, instrI, targetNode, reg, addrReg, cRep))
         else:
            # ToDo
-            print 'unsupported reg to mem'
+            print('unsupported reg to mem')
            return None
   elif startNode.attrib['type'] == 'flags':
      #################
@@ -2225,7 +2226,7 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem

         if suppressedStart:
            if not regs.issubset(GPRegs):
-               print 'read from suppressed mem to non-GPR reg not yet supported'
+               print('read from suppressed mem to non-GPR reg not yet supported')
               return None

         instrI = getInstrInstanceFromNode(instrNode, [addrReg, indexReg, 'R12'], [addrReg, indexReg, 'R12'], useDistinctRegs, {targetNodeIdx:reg},
@@ -2358,11 +2359,11 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
            subprocess.check_output(['as', '/tmp/ramdisk/asm.s', '-o', '/tmp/ramdisk/asm.o'])
            iaca_lat = subprocess.check_output(iacaCMDLine + ['-analysis', 'LATENCY', '/tmp/ramdisk/asm.o'], stderr=subprocess.STDOUT)
         except subprocess.CalledProcessError as e:
-            print "Error: " + e.output
+            print('Error: ' + e.output)
            return None

         if '!' in iaca_lat or not 'Latency' in iaca_lat:
-            print "IACA error"
+            print('IACA error')
            return None

         latency = iaca_lat.split('\n')[3].split()[1]
@@ -2444,7 +2445,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                  latConfigLists = getLatConfigLists(instrNode, opNode1, opNode2, useDistinctRegs, addrMem, tpDict)
                  if latConfigLists is None: continue

-                  minLat = sys.maxint
+                  minLat = sys.maxsize
                  maxLat = 0

                  minLatIsUpperBound = False
@@ -2453,7 +2454,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                  configHtmlReports = []

                  for latConfigList in latConfigLists:
-                     minLatForCurList = sys.maxint
+                     minLatForCurList = sys.maxsize

                     if not any((latConfig.init or latConfig.instrI.regMemInit) for latConfig in latConfigList.latConfigs):
                        # Test different register values for read-only registers
@@ -2463,7 +2464,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                           readOnlyRegOpNodeIdx = int(readOnlyRegOpNode.attrib['idx'])
                           for latConfig in list(latConfigList.latConfigs):
                              if not readOnlyRegOpNodeIdx in latConfig.instrI.opRegDict:
-                                 print 'readOnlyRegOpNodeIdx not found in opRegDict'
+                                 print('readOnlyRegOpNodeIdx not found in opRegDict')
                                 continue
                              reg = latConfig.instrI.opRegDict[readOnlyRegOpNodeIdx]
                              if (not reg in GPRegs) or (reg in High8Regs) or (reg in globalDoNotWriteRegs) or (reg in specialRegs): continue
@@ -2551,8 +2552,8 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                           else:
                              latConfig.chainInstrs += 'VPCMPD {0}, {1}, {1}, 7;'.format(maskReg, 'XMM15')

-                     mlDP = sys.maxint
-                     mlnoDP = sys.maxint
+                     mlDP = sys.maxsize
+                     mlnoDP = sys.maxsize

                     for latConfig in latConfigList.latConfigs:
                        configI += 1
@@ -2576,7 +2577,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
                        configHtmlReports.append('</ul>\n')

                        if not measurementResult:
-                           print 'no result found'
+                           print('no result found')
                           continue

                        cycles = measurementResult['Core cycles']
@@ -2869,7 +2870,7 @@ def main():
   else:
      cpu = cpuid.CPUID()
      arch = cpuid.micro_arch(cpu)
-      print cpuid.get_basic_info(cpu)
+      print(cpuid.get_basic_info(cpu))
      if arch == 'unknown':
         exit(1)

@@ -2906,7 +2907,7 @@ def main():
   try:
      subprocess.check_output('mkdir -p /tmp/ramdisk; sudo mount -t tmpfs -o size=100M none /tmp/ramdisk/', shell=True)
   except subprocess.CalledProcessError as e:
-      print "Could not create ramdisk " + e.output
+      print('Could not create ramdisk ' + e.output)
      exit(1)

   XMLRoot = ET.parse(args.input).getroot()
@@ -2957,7 +2958,7 @@ def main():
   else:
      for i, instrNode in enumerate(instrNodeList):
         #if not 'RCR (R64, 1)' in instrNode.attrib['string']: continue
-         print 'Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
+         print('Measuring throughput for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')

         htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Throughput and Uops' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']

@@ -2968,7 +2969,7 @@ def main():
         if hasExplMemOp: htmlReports.append('<h2 id="nonIndexedAddr">With a non-indexed addressing mode</h2>\n')

         tpResult = getThroughputAndUops(instrNode, True, False, htmlReports)
-         print instrNode.attrib['string'] + " - tp: " + str(tpResult)
+         print(instrNode.attrib['string'] + " - tp: " + str(tpResult))

         if tpResult:
            tpDict[instrNode] = tpResult
@@ -3005,7 +3006,7 @@ def main():
      with open('tp_' + arch + '.pickle', 'wb') as f:
         pickle.dump((tpDict, tpDictSameReg, tpDictIndexedAddr, tpDictNoInteriteration), f)

-   num_ports = len(tpDict.values()[0].unblocked_ports)
+   num_ports = len(list(tpDict.values())[0].unblocked_ports)

   ########################
   # Latency
@@ -3023,13 +3024,13 @@ def main():
   elif not useIACA or iacaVersion == '2.1':
      for i, instrNode in enumerate(instrNodeList):
         #if not 'DIV' in instrNode.attrib['string']: continue
-         print 'Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
+         print('Measuring latencies for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')

         htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Latency' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>\n<hr>\n']
         lat = getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports)

         if lat is not None:
-            if debugOutput: print instrNode.attrib['iform'] + ': ' + str(lat)
+            if debugOutput: print(instrNode.attrib['iform'] + ': ' + str(lat))
            latencyDict[instrNode] = lat
            writeHtmlFile('html-lat/'+arch, instrNode, instrNode.attrib['string'], ''.join(htmlReports))
      with open('lat_' + arch + '.pickle', 'wb') as f:
@@ -3080,21 +3081,21 @@ def main():
            # their throughput is limited to 1 per cycle; thus, they are disallowed by the TP_noDepBreaking_noLoop check above
            disallowedBlockingInstrs.remove(instrNodeDict['MOVD (R32, XMM)'])

-      print 'disallowedBlockingInstrs'
+      print('disallowedBlockingInstrs')
      for instrNode in disallowedBlockingInstrs:
-         print '  ' + str(instrNode.attrib['string'])
+         print('  ' + str(instrNode.attrib['string']))

-      print 'tpDict'
+      print('tpDict')
      for instr, tpResult in tpDict.items():
-         print '  ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports)
+         print('  ' + str(instr.attrib['string']) + ' ' + str(tpResult.unblocked_ports))

      # we cannot start higher than .79 as IACA has .2 uops on each port for a port usage of, e.g., 1*p1256
      # using uops_dict instead can be problematic because in IACA the uops on the individual ports do not always add up to this value
      oneUopInstrs = [instr for instr, tpResult in tpDict.items() if instr not in disallowedBlockingInstrs and .79 < sum([v for v in tpResult.unblocked_ports.values() if v>.1]) < 1.11]

-      print 'oneUopInstrs'
+      print('oneUopInstrs')
      for instrNode in oneUopInstrs:
-         print '  ' + str(instrNode.attrib['string'])
+         print('  ' + str(instrNode.attrib['string']))
      # dicts from port combination to a set of instructions (either not containing AVX or SSE instructions bec. of transition penalty) that always uses these ports
      blockingInstructionsDictNonAVX_set = {}
      blockingInstructionsDictNonSSE_set = {}
@@ -3102,7 +3103,7 @@ def main():
      for instrNode in oneUopInstrs:
         usedPorts = frozenset({p for p, x in tpDict[instrNode].unblocked_ports.items() if x>0.1})
         if usedPorts:
-            print instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]')))
+            print(instrNode.attrib['iform'] + ': ' + str(usedPorts) + ' ' + str(len(instrNode.findall('./operand[@suppressed="1"]'))))

            if not isSSEInstr(instrNode):
               if not usedPorts in blockingInstructionsDictNonSSE_set: blockingInstructionsDictNonSSE_set[usedPorts] = set()
@@ -3118,10 +3119,10 @@ def main():
      blockingInstructionsDictNonSSE = {comb: next(iter(sorted(instr_set, key=sort_key))) for comb, instr_set in blockingInstructionsDictNonSSE_set.items()}

      #for comb, instr_set in blockingInstructionsDictNonAVX_set.items():
-      #   print comb
-      #   print [x.attrib['string'] for x in sorted(instr_set, key=sort_key)]
+      #   print(comb)
+      #   print([x.attrib['string'] for x in sorted(instr_set, key=sort_key)])

-      #print str(blockingInstructionsDictNonAVX.items())
+      #print(str(blockingInstructionsDictNonAVX.items()))

      if isIntelCPU():
         # mov to mem has always two uops: store address and store data; there is no instruction that uses just one of them
@@ -3138,26 +3139,26 @@ def main():
         if storeAddressPorts not in blockingInstructionsDictNonAVX: blockingInstructionsDictNonAVX[storeAddressPorts] = movMemInstrNode
         if storeAddressPorts not in blockingInstructionsDictNonSSE: blockingInstructionsDictNonSSE[storeAddressPorts] = movMemInstrNode

-      print 'Non-AVX:'
+      print('Non-AVX:')
      for k,v in blockingInstructionsDictNonAVX.items():
-         print str(k) + ': ' + v.attrib['iform']
-      print 'Non-SSE:'
+         print(str(k) + ': ' + v.attrib['iform'])
+      print('Non-SSE:')
      for k,v in blockingInstructionsDictNonSSE.items():
-         print str(k) + ': ' + v.attrib['iform']
+         print(str(k) + ': ' + v.attrib['iform'])

      sortedPortCombinationsNonAVX = sorted(blockingInstructionsDictNonAVX.keys(), key=lambda x:(len(x), sorted(x)))
      sortedPortCombinationsNonSSE = sorted(blockingInstructionsDictNonSSE.keys(), key=lambda x:(len(x), sorted(x)))
-      print 'sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX)
+      print('sortedPortCombinations: ' + str(sortedPortCombinationsNonAVX))

-      for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (tpDict[x].config.preInstrNodes, x.attrib['string']))):
+      for i, instrNode in enumerate(sorted(tpDict.keys(), key=lambda x: (len(tpDict[x].config.preInstrNodes), x.attrib['string']))):
         #if not 'CVTPD2PI' in instrNode.attrib['string']: continue

-         print 'Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')'
+         print('Measuring port usage for ' + instrNode.attrib['string'] + ' (' + str(i) + '/' + str(len(instrNodeList)) + ')')

         htmlReports = ['<h1>' + instrNode.attrib['string'] + ' - Port Usage' + (' (IACA '+iacaVersion+')' if useIACA else '') + '</h1>']

         for useDistinctRegs in ([True, False] if instrNode in tpDictSameReg else [True]):
-            for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]):
+            for useIndexedAddr in ([False, True] if useDistinctRegs and (instrNode in tpDictIndexedAddr) else [False]):               
               tpResult = None

               if not useDistinctRegs:
@@ -3176,7 +3177,7 @@ def main():

               # use abs because on, e.g., IVB port usages might be smaller in the second half of the experiments if replays happen
               used_ports = {p for p, x in tpResult.unblocked_ports.items() if abs(x)>0.05}
-               if debugOutput: print instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports)
+               if debugOutput: print(instrNode.attrib['string'] + ' - used ports: ' + str(used_ports) + ', dict: ' + str(tpResult.unblocked_ports))

               if not isAVXInstr(instrNode):
                  blockingInstrs = blockingInstructionsDictNonAVX
@@ -3218,13 +3219,13 @@ def main():
                     blockInstrRep = min(blockInstrRep, 100)
                     uopsOnBlockedPorts = getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockingInstrs[combination], blockInstrRep, combination, tpResult.config, htmlReports)
                     if uopsOnBlockedPorts is None:
-                        print 'no uops on blocked ports: ' + str(combination)
+                        print('no uops on blocked ports: ' + str(combination))
                        continue

                     uopsOnBlockedPorts -= prevUopsOnCombination

                     if rem_uops < uopsOnBlockedPorts:
-                        print 'More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts)
+                        print('More uops on ports than total uops, combination: ' + str(combination) + ', ' + str(uopsOnBlockedPorts))

                     if uopsOnBlockedPorts <= 0: continue

@@ -3338,8 +3339,8 @@ def main():
            try:
               resultNode.attrib['TP_ports'+suffix] = "%.2f" % getTP_LP(portUsageWithDivList)
            except ValueError as err:
-               print 'Could not solve LP for ' + instrNode.attrib['string'] + ':'
-               print err
+               print('Could not solve LP for ' + instrNode.attrib['string'] + ':')
+               print(err)

   with open(args.output, "w") as f:
      reparsed = XMLRoot
@@ -3358,7 +3359,7 @@ def main():
   except subprocess.CalledProcessError:
      exit(1)

-   print 'Total number of microbenchmarks: ' + str(nExperiments)
+   print('Total number of microbenchmarks: ' + str(nExperiments))


 if __name__ == "__main__":
--- a/tools/cpuBench/mergeXML.py
+++ b/tools/cpuBench/mergeXML.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
 import argparse
@@ -20,7 +21,7 @@ def main():

   for instrNode1 in root1.iter('instruction'):
      if instrNode1.attrib['string'] not in instrNode2Dict:
-         print 'no matching entry found for ' + instrNode1.attrib['string']
+         print('no matching entry found for ' + instrNode1.attrib['string'])
         continue
      for instrNode2 in instrNode2Dict[instrNode1.attrib['string']]:
         for archNode2 in instrNode2.iter('architecture'):
--- a/tools/cpuBench/utils.py
+++ b/tools/cpuBench/utils.py
@@ -137,7 +137,7 @@ def getLatencyTableEntry(measurementNode):
   if measurementNode is None or measurementNode.find('./latency') is None:
      return None

-   minLat = sys.maxint
+   minLat = sys.maxsize
   maxLat = 0
   minLatUB = False
   maxLatUB = False