new caching structure with support for distribution

This commit is contained in:
Julian Hammer
2020-10-28 16:29:23 +01:00
parent b5b1a1f2b2
commit 9d2ea8603f
6 changed files with 73 additions and 76 deletions

View File

@@ -12,11 +12,7 @@ from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG,
MachineModel, reduce_to_section)
MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
)
LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/')
DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
SUPPORTED_ARCHS = [
'SNB',
'IVB',

View File

@@ -7,6 +7,8 @@ import re
import string
from copy import deepcopy
from itertools import product
import hashlib
from pathlib import Path
import ruamel.yaml
from ruamel.yaml.compat import StringIO
@@ -49,7 +51,7 @@ class MachineModel(object):
yaml = self._create_yaml_object()
if arch:
self._arch = arch.lower()
self._path = utils.find_file(self._arch + '.yml')
self._path = utils.find_datafile(self._arch + '.yml')
# check if file is cached
cached = self._get_cached(self._path) if not lazy else False
if cached:
@@ -314,18 +316,22 @@ class MachineModel(object):
:type filepath: str
:returns: cached DB if existing, `False` otherwise
"""
hashname = self._get_hashname(filepath)
cachepath = utils.exists_cached_file(hashname + '.pickle')
if cachepath:
# Check if modification date of DB is older than cached version
if os.path.getmtime(filepath) < os.path.getmtime(cachepath):
# load cached version
with open(cachepath, 'rb') as f:
cached_db = pickle.load(f)
return cached_db
else:
# DB newer than cached version --> delete cached file and return False
os.remove(cachepath)
p = Path(filepath)
# 1. companion cachefile: same location, with '.' prefix and '.pickle' suffix
companion_cachefile = p.with_name('.' + p.name).with_suffix('.pickle')
if companion_cachefile.exists():
if companion_cachefile.stat().st_mtime > p.stat().st_mtime:
# companion file up-to-date
with companion_cachefile.open('rb') as f:
return pickle.load(f)
# 2. home cachefile: ~/.osaca/cache/<sha512hash>.pickle
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
home_cachefile = (Path(utils.CACHE_DIR) / hexhash).with_suffix('.pickle')
if home_cachefile.exists():
# home file (must be up-to-date, due to equal hash)
with home_cachefile.open('rb') as f:
return pickle.load(f)
return False
def _write_in_cache(self, filepath, data):
@@ -337,14 +343,25 @@ class MachineModel(object):
:param data: :class:`MachineModel` to store
:type data: :class:`dict`
"""
hashname = self._get_hashname(filepath)
filepath = os.path.join(utils.CACHE_DIR, hashname + '.pickle')
with open(filepath, 'wb') as f:
pickle.dump(data, f)
p = Path(filepath)
# 1. companion cachefile: same location, with '.' prefix and '.pickle' suffix
companion_cachefile = p.with_name('.' + p.name).with_suffix('.pickle')
if os.access(companion_cachefile.parent, os.W_OK):
with companion_cachefile.open('wb') as f:
pickle.dump(data, f)
return
def _get_hashname(self, name):
"""Returns unique hashname for machine model"""
return base64.b64encode(name.encode()).decode()
# 2. home cachefile: ~/.osaca/cache/<sha512hash>.pickle
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
cache_dir = Path(utils.CACHE_DIR)
try:
os.makedirs(cache_dir, exist_ok=True)
except OSError:
return
home_cachefile = (cache_dir / hexhash).with_suffix('.pickle')
if os.access(home_cachefile.parent, os.W_OK):
with home_cachefile.open('wb') as f:
pickle.dump(data, f)
def _get_key(self, name, operands):
"""Get unique instruction form key for dict DB."""

View File

@@ -26,7 +26,7 @@ class ISASemantics(object):
def __init__(self, isa, path_to_yaml=None):
self._isa = isa.lower()
path = utils.find_file('isa/' + self._isa + '.yml') if not path_to_yaml else path_to_yaml
path = path_to_yaml or utils.find_datafile('isa/' + self._isa + '.yml')
self._isa_model = MachineModel(path_to_yaml=path)
if self._isa == 'x86':
self._parser = ParserX86ATT()

View File

@@ -1,28 +1,17 @@
#!/usr/bin/env python3
import os.path
from pathlib import Path
import hashlib
DATA_DIRS = [os.path.expanduser('~/.osaca/data'),
os.path.join(os.path.dirname(__file__), 'data')]
CACHE_DIR = os.path.expanduser('~/.osaca/cache')
def find_file(name):
def find_datafile(name):
"""Check for existence of name in user or package data folders and return path."""
search_paths = [os.path.expanduser('~/.osaca/data'),
os.path.join(os.path.dirname(__file__), 'data')]
for dir in search_paths:
for dir in DATA_DIRS:
path = os.path.join(dir, name)
if os.path.exists(path):
return path
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, search_paths))
def exists_cached_file(name):
"""Check for existence of file in cache dir. Returns path if it exists and False otherwise."""
if not os.path.exists(CACHE_DIR):
os.makedirs(CACHE_DIR)
return False
search_paths = [CACHE_DIR]
for dir in search_paths:
path = os.path.join(dir, name)
if os.path.exists(path):
return path
return False
raise FileNotFoundError("Could not find {!r} in {!r}.".format(name, DATA_DIRS))

View File

@@ -33,7 +33,7 @@ class TestFrontend(unittest.TestCase):
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
)
self.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
arch='tx2.yml'
)
self.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')

View File

@@ -20,48 +20,43 @@ class TestSemanticTools(unittest.TestCase):
MODULE_DATA_DIR = os.path.join(
os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
)
USER_DATA_DIR = os.path.join(os.path.expanduser('~'), '.osaca/')
@classmethod
def setUpClass(self):
# copy db files in user directory
if not os.path.isdir(os.path.join(self.USER_DATA_DIR, 'data')):
os.makedirs(os.path.join(self.USER_DATA_DIR, 'data'))
call(['cp', '-r', self.MODULE_DATA_DIR, self.USER_DATA_DIR])
def setUpClass(cls):
# set up parser and kernels
self.parser_x86 = ParserX86ATT()
self.parser_AArch64 = ParserAArch64()
with open(self._find_file('kernel_x86.s')) as f:
self.code_x86 = f.read()
with open(self._find_file('kernel_aarch64.s')) as f:
self.code_AArch64 = f.read()
self.kernel_x86 = reduce_to_section(self.parser_x86.parse_file(self.code_x86), 'x86')
self.kernel_AArch64 = reduce_to_section(
self.parser_AArch64.parse_file(self.code_AArch64), 'aarch64'
cls.parser_x86 = ParserX86ATT()
cls.parser_AArch64 = ParserAArch64()
with open(cls._find_file('kernel_x86.s')) as f:
cls.code_x86 = f.read()
with open(cls._find_file('kernel_aarch64.s')) as f:
cls.code_AArch64 = f.read()
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), 'x86')
cls.kernel_AArch64 = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64'
)
# set up machine models
self.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')
cls.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml')
)
self.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')
cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml')
)
self.semantics_csx = ArchSemantics(
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/x86.yml')
cls.semantics_csx = ArchSemantics(
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/x86.yml')
)
self.semantics_tx2 = ArchSemantics(
self.machine_model_tx2,
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'),
cls.semantics_tx2 = ArchSemantics(
cls.machine_model_tx2,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'),
)
self.machine_model_zen = MachineModel(arch='zen1')
cls.machine_model_zen = MachineModel(arch='zen1')
for i in range(len(self.kernel_x86)):
self.semantics_csx.assign_src_dst(self.kernel_x86[i])
self.semantics_csx.assign_tp_lt(self.kernel_x86[i])
for i in range(len(self.kernel_AArch64)):
self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i])
self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i])
for i in range(len(cls.kernel_x86)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
for i in range(len(cls.kernel_AArch64)):
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
###########
# Tests