new caching structure with support for distribution

This commit is contained in:
Julian Hammer
2020-10-28 16:29:23 +01:00
parent b5b1a1f2b2
commit 9d2ea8603f
6 changed files with 73 additions and 76 deletions

View File

@@ -12,11 +12,7 @@ from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG, from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG,
MachineModel, reduce_to_section) MachineModel, reduce_to_section)
MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
)
LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/')
DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
SUPPORTED_ARCHS = [ SUPPORTED_ARCHS = [
'SNB', 'SNB',
'IVB', 'IVB',

View File

@@ -7,6 +7,8 @@ import re
import string import string
from copy import deepcopy from copy import deepcopy
from itertools import product from itertools import product
import hashlib
from pathlib import Path
import ruamel.yaml import ruamel.yaml
from ruamel.yaml.compat import StringIO from ruamel.yaml.compat import StringIO
@@ -49,7 +51,7 @@ class MachineModel(object):
yaml = self._create_yaml_object() yaml = self._create_yaml_object()
if arch: if arch:
self._arch = arch.lower() self._arch = arch.lower()
self._path = utils.find_file(self._arch + '.yml') self._path = utils.find_datafile(self._arch + '.yml')
# check if file is cached # check if file is cached
cached = self._get_cached(self._path) if not lazy else False cached = self._get_cached(self._path) if not lazy else False
if cached: if cached:
@@ -314,18 +316,22 @@ class MachineModel(object):
:type filepath: str :type filepath: str
:returns: cached DB if existing, `False` otherwise :returns: cached DB if existing, `False` otherwise
""" """
hashname = self._get_hashname(filepath) p = Path(filepath)
cachepath = utils.exists_cached_file(hashname + '.pickle') # 1. companion cachefile: same location, with '.' prefix and '.pickle' suffix
if cachepath: companion_cachefile = p.with_name('.' + p.name).with_suffix('.pickle')
# Check if modification date of DB is older than cached version if companion_cachefile.exists():
if os.path.getmtime(filepath) < os.path.getmtime(cachepath): if companion_cachefile.stat().st_mtime > p.stat().st_mtime:
# load cached version # companion file up-to-date
with open(cachepath, 'rb') as f: with companion_cachefile.open('rb') as f:
cached_db = pickle.load(f) return pickle.load(f)
return cached_db
else: # 2. home cachefile: ~/.osaca/cache/<sha512hash>.pickle
# DB newer than cached version --> delete cached file and return False hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
os.remove(cachepath) home_cachefile = (Path(utils.CACHE_DIR) / hexhash).with_suffix('.pickle')
if home_cachefile.exists():
# home file (must be up-to-date, due to equal hash)
with home_cachefile.open('rb') as f:
return pickle.load(f)
return False return False
def _write_in_cache(self, filepath, data): def _write_in_cache(self, filepath, data):
@@ -337,14 +343,25 @@ class MachineModel(object):
:param data: :class:`MachineModel` to store :param data: :class:`MachineModel` to store
:type data: :class:`dict` :type data: :class:`dict`
""" """
hashname = self._get_hashname(filepath) p = Path(filepath)
filepath = os.path.join(utils.CACHE_DIR, hashname + '.pickle') # 1. companion cachefile: same location, with '.' prefix and '.pickle' suffix
with open(filepath, 'wb') as f: companion_cachefile = p.with_name('.' + p.name).with_suffix('.pickle')
pickle.dump(data, f) if os.access(companion_cachefile.parent, os.W_OK):
with companion_cachefile.open('wb') as f:
pickle.dump(data, f)
return
def _get_hashname(self, name): # 2. home cachefile: ~/.osaca/cache/<sha512hash>.pickle
"""Returns unique hashname for machine model""" hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
return base64.b64encode(name.encode()).decode() cache_dir = Path(utils.CACHE_DIR)
try:
os.makedirs(cache_dir, exist_ok=True)
except OSError:
return
home_cachefile = (cache_dir / hexhash).with_suffix('.pickle')
if os.access(home_cachefile.parent, os.W_OK):
with home_cachefile.open('wb') as f:
pickle.dump(data, f)
def _get_key(self, name, operands): def _get_key(self, name, operands):
"""Get unique instruction form key for dict DB.""" """Get unique instruction form key for dict DB."""

View File

@@ -26,7 +26,7 @@ class ISASemantics(object):
def __init__(self, isa, path_to_yaml=None): def __init__(self, isa, path_to_yaml=None):
self._isa = isa.lower() self._isa = isa.lower()
path = utils.find_file('isa/' + self._isa + '.yml') if not path_to_yaml else path_to_yaml path = path_to_yaml or utils.find_datafile('isa/' + self._isa + '.yml')
self._isa_model = MachineModel(path_to_yaml=path) self._isa_model = MachineModel(path_to_yaml=path)
if self._isa == 'x86': if self._isa == 'x86':
self._parser = ParserX86ATT() self._parser = ParserX86ATT()

View File

@@ -1,28 +1,17 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os.path import os.path
from pathlib import Path
import hashlib
DATA_DIRS = [os.path.expanduser('~/.osaca/data'),
os.path.join(os.path.dirname(__file__), 'data')]
CACHE_DIR = os.path.expanduser('~/.osaca/cache') CACHE_DIR = os.path.expanduser('~/.osaca/cache')
def find_file(name): def find_datafile(name):
"""Check for existence of name in user or package data folders and return path.""" """Check for existence of name in user or package data folders and return path."""
search_paths = [os.path.expanduser('~/.osaca/data'), for dir in DATA_DIRS:
os.path.join(os.path.dirname(__file__), 'data')]
for dir in search_paths:
path = os.path.join(dir, name) path = os.path.join(dir, name)
if os.path.exists(path): if os.path.exists(path):
return path return path
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, search_paths)) raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, DATA_DIRS))
def exists_cached_file(name):
"""Check for existence of file in cache dir. Returns path if it exists and False otherwise."""
if not os.path.exists(CACHE_DIR):
os.makedirs(CACHE_DIR)
return False
search_paths = [CACHE_DIR]
for dir in search_paths:
path = os.path.join(dir, name)
if os.path.exists(path):
return path
return False

View File

@@ -33,7 +33,7 @@ class TestFrontend(unittest.TestCase):
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml') path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
) )
self.machine_model_tx2 = MachineModel( self.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml') arch='tx2.yml'
) )
self.semantics_csx = ArchSemantics( self.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml') self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')

View File

@@ -20,48 +20,43 @@ class TestSemanticTools(unittest.TestCase):
MODULE_DATA_DIR = os.path.join( MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/' os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
) )
USER_DATA_DIR = os.path.join(os.path.expanduser('~'), '.osaca/')
@classmethod @classmethod
def setUpClass(self): def setUpClass(cls):
# copy db files in user directory
if not os.path.isdir(os.path.join(self.USER_DATA_DIR, 'data')):
os.makedirs(os.path.join(self.USER_DATA_DIR, 'data'))
call(['cp', '-r', self.MODULE_DATA_DIR, self.USER_DATA_DIR])
# set up parser and kernels # set up parser and kernels
self.parser_x86 = ParserX86ATT() cls.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64() cls.parser_AArch64 = ParserAArch64()
with open(self._find_file('kernel_x86.s')) as f: with open(cls._find_file('kernel_x86.s')) as f:
self.code_x86 = f.read() cls.code_x86 = f.read()
with open(self._find_file('kernel_aarch64.s')) as f: with open(cls._find_file('kernel_aarch64.s')) as f:
self.code_AArch64 = f.read() cls.code_AArch64 = f.read()
self.kernel_x86 = reduce_to_section(self.parser_x86.parse_file(self.code_x86), 'x86') cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), 'x86')
self.kernel_AArch64 = reduce_to_section( cls.kernel_AArch64 = reduce_to_section(
self.parser_AArch64.parse_file(self.code_AArch64), 'aarch64' cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64'
) )
# set up machine models # set up machine models
self.machine_model_csx = MachineModel( cls.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml') path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml')
) )
self.machine_model_tx2 = MachineModel( cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml') path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml')
) )
self.semantics_csx = ArchSemantics( cls.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml') cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/x86.yml')
) )
self.semantics_tx2 = ArchSemantics( cls.semantics_tx2 = ArchSemantics(
self.machine_model_tx2, cls.machine_model_tx2,
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'), path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'),
) )
self.machine_model_zen = MachineModel(arch='zen1') cls.machine_model_zen = MachineModel(arch='zen1')
for i in range(len(self.kernel_x86)): for i in range(len(cls.kernel_x86)):
self.semantics_csx.assign_src_dst(self.kernel_x86[i]) cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
self.semantics_csx.assign_tp_lt(self.kernel_x86[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
for i in range(len(self.kernel_AArch64)): for i in range(len(cls.kernel_AArch64)):
self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i]) cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
########### ###########
# Tests # Tests