diff --git a/python_appimage/appimage/relocate.py b/python_appimage/appimage/relocate.py index 668a71e..2914c92 100644 --- a/python_appimage/appimage/relocate.py +++ b/python_appimage/appimage/relocate.py @@ -77,7 +77,7 @@ _excluded_libs = None def patch_binary(path, libdir, recursive=True): - '''Patch the RPATH of a binary and and fetch its dependencies + '''Patch the RPATH of a binary and fetch its dependencies ''' global _excluded_libs diff --git a/python_appimage/manylinux/__init__.py b/python_appimage/manylinux/__init__.py new file mode 100644 index 0000000..12c94ad --- /dev/null +++ b/python_appimage/manylinux/__init__.py @@ -0,0 +1,8 @@ +from .config import Arch, LinuxTag, PythonImpl, PythonVersion +from .download import Downloader +from .extract import ImageExtractor, PythonExtractor + + +__all__ = ['Arch', 'Downloader', 'ImageExtractor', 'LinuxTag', + 'PythonExtractor', 'PythonImpl', 'PythonVersion'] + diff --git a/python_appimage/manylinux/config.py b/python_appimage/manylinux/config.py new file mode 100644 index 0000000..beafba1 --- /dev/null +++ b/python_appimage/manylinux/config.py @@ -0,0 +1,81 @@ +from enum import auto, Enum +import platform +from typing import NamedTuple, Union + + +__all__ = ['Arch', 'PythonImpl', 'PythonVersion'] + + +class Arch(Enum): + '''Supported platform architectures.''' + AARCH64 = auto() + I686 = auto() + X86_64 = auto() + + def __str__(self): + return self.name.lower() + + @classmethod + def from_host(cls) -> 'Arch': + return cls.from_str(platform.machine()) + + @classmethod + def from_str(cls, value) -> 'Arch': + for arch in cls: + if value == str(arch): + return arch + else: + raise NotImplementedError(value) + + +class LinuxTag(Enum): + '''Supported platform tags.''' + MANYLINUX_1 = auto() + MANYLINUX_2010 = auto() + MANYLINUX_2014 = auto() + MANYLINUX_2_24 = auto() + MANYLINUX_2_28 = auto() + + def __str__(self): + tag = self.name.lower() + if self in (LinuxTag.MANYLINUX_1, LinuxTag.MANYLINUX_2010, + LinuxTag.MANYLINUX_2014): + return tag.replace('_', '') + else: + return tag + + @classmethod + def from_str(cls, value) -> 'LinuxTag': + for tag in cls: + if value == str(tag): + return tag + else: + raise NotImplementedError(value) + + +class PythonImpl(Enum): + '''Supported Python implementations.''' + CPYTHON = auto() + + +class PythonVersion(NamedTuple): + '''''' + + major: int + minor: int + patch: Union[int, str] + + @classmethod + def from_str(cls, value: str) -> 'PythonVersion': + major, minor, patch = value.split('.', 2) + try: + patch = int(patch) + except ValueError: + pass + return cls(int(major), int(minor), patch) + + def long(self) -> str: + return f'{self.major}.{self.minor}.{self.patch}' + + def short(self) -> str: + return f'{self.major}.{self.minor}' diff --git a/python_appimage/manylinux/download.py b/python_appimage/manylinux/download.py new file mode 100644 index 0000000..6373931 --- /dev/null +++ b/python_appimage/manylinux/download.py @@ -0,0 +1,145 @@ +import collections +from dataclasses import dataclass, field +import glob +import hashlib +import json +from pathlib import Path +import requests +import shutil +import tempfile +from typing import List, Optional + +from .config import Arch, LinuxTag +from ..utils.log import debug, log + + +CHUNK_SIZE = 8189 + +SUCCESS = 200 + + +class DownloadError(Exception): + pass + +class TarError(Exception): + pass + + +@dataclass(frozen=True) +class Downloader: + + '''Manylinux tag.''' + tag: LinuxTag + + '''Platform architecture.''' + arch: Optional[Arch] = None + + '''Docker image.''' + image: str = field(init=False) + + '''Authentication token.''' + token: str = field(init=False) + + + def __post_init__(self): + # Set host arch if not explictly specified. + if self.arch is None: + arch = Arch.from_host() + object.__setattr__(self, 'arch', arch) + + # Set image name. + image = f'{self.tag}_{self.arch}' + object.__setattr__(self, 'image', image) + + + def download( + self, + destination: Optional[Path]=None, + tag: Optional[str] = 'latest'): + + destination = destination or Path(self.image) + + # Authenticate to quay.io. + repository = f'pypa/{self.image}' + url = 'https://quay.io/v2/auth' + url = f'{url}?service=quay.io&scope=repository:{repository}:pull' + debug('GET', url) + r = requests.request('GET', url) + if r.status_code == SUCCESS: + object.__setattr__(self, 'token', r.json()['token']) + else: + raise DownloadError(r.status_code, r.text, r.headers) + + # Fetch image manifest. + repository = f'pypa/{self.image}' + url = f'https://quay.io/v2/{repository}/manifests/{tag}' + headers = { + 'Authorization': f'Bearer {self.token}', + 'Accept': 'application/vnd.docker.distribution.manifest.v2+json' + } + debug('GET', url) + r = requests.request('GET', url, headers=headers) + if r.status_code == SUCCESS: + image_digest = r.headers['Docker-Content-Digest'].split(':', 1)[-1] + manifest = r.json() + else: + raise DownloadError(r.status_code, r.text, r.headers) + + # Check missing layers to download. + required = [layer['digest'].split(':', 1)[-1] for layer in + manifest['layers']] + is_missing = lambda hash_: \ + not (destination / f'layers/{hash_}.tar.gz').exists() + missing = tuple(filter(is_missing, required)) + + # Fetch missing layers. + with tempfile.TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) + for i, hash_ in enumerate(missing): + log('DOWNLOAD', f'{self.image} ({tag}) ' + f'[{i + 1} / {len(missing)}]') + + filename = f'{hash_}.tar.gz' + url = f'https://quay.io/v2/{repository}/blobs/sha256:{hash_}' + debug('GET', url) + r = requests.request('GET', url, headers=headers, stream=True) + if r.status_code == SUCCESS: + debug('STREAM', filename) + else: + raise DownloadError(r.status_code, r.text, r.headers) + + hasher = hashlib.sha256() + tmp = workdir / 'layer.tgz' + with open(tmp, "wb") as f: + for chunk in r.iter_content(CHUNK_SIZE): + if chunk: + f.write(chunk) + hasher.update(chunk) + + h = hasher.hexdigest() + if h != hash_: + raise DownloadError( + f'bad hash (expected {name}, found {h})' + ) + layers_dir = destination / 'layers' + layers_dir.mkdir(exist_ok=True, parents=True) + shutil.move(tmp, layers_dir / filename) + + tags_dir = destination / 'tags' + tags_dir.mkdir(exist_ok=True, parents=True) + with open(tags_dir / f'{tag}.json', "w") as f: + json.dump({'digest': image_digest, 'layers': required}, f) + + # Remove unused layers. + required = set(required) + for tag in glob.glob(str(destination / 'tags/*.json')): + with open(tag) as f: + tag = json.load(f) + required |= set(tag["layers"]) + required = [f'{hash_}.tar.gz' for hash_ in required] + + for layer in glob.glob(str(destination / 'layers/*.tar.gz')): + layer = Path(layer) + if layer.name not in required: + debug('REMOVE', f'{self.image} [layer/{layer.stem}]') + layer.unlink() diff --git a/python_appimage/manylinux/extract.py b/python_appimage/manylinux/extract.py new file mode 100644 index 0000000..6202aa7 --- /dev/null +++ b/python_appimage/manylinux/extract.py @@ -0,0 +1,327 @@ +from dataclasses import dataclass, field +from distutils.version import LooseVersion +import glob +import json +import os +import re +from pathlib import Path +import shutil +import stat +import subprocess +from typing import Dict, List, NamedTuple, Optional, Union + +from .config import Arch, PythonImpl, PythonVersion +from ..utils.deps import ensure_excludelist, EXCLUDELIST +from ..utils.log import debug, log + + +@dataclass(frozen=True) +class PythonExtractor: + '''Python extractor from an extracted Manylinux image.''' + + arch: Arch + '''Target architecture''' + + prefix: Path + '''Target image path''' + + tag: str + '''Python binary tag''' + + + excludelist: Optional[Path] = None + '''Exclude list for shared libraries.''' + + patchelf: Optional[Path] = None + '''Patchelf executable.''' + + + excluded: List[str] = field(init=False) + '''Excluded shared libraries.''' + + impl: PythonImpl = field(init=False) + '''Python implementation''' + + library_path: List[str] = field(init=False) + '''Search paths for libraries (LD_LIBRARY_PATH)''' + + python_prefix: Path = field(init=False) + '''Python installation prefix''' + + version: PythonVersion = field(init=False) + '''Python version''' + + + def __post_init__(self): + # Locate Python installation. + link = os.readlink(self.prefix / f'opt/python/{self.tag}') + if not link.startswith('/'): + raise NotImplementedError() + object.__setattr__(self, 'python_prefix', self.prefix / link[1:]) + + # Parse implementation and version. + head, tail = Path(link).name.split('-', 1) + if head == 'cpython': + impl = PythonImpl.CPYTHON + version = PythonVersion.from_str(tail) + else: + raise NotImplementedError() + object.__setattr__(self, 'impl', impl) + object.__setattr__(self, 'version', version) + + # Set libraries search path. + paths = [] + if self.arch in (Arch.AARCH64, Arch.X86_64): + paths.append(self.prefix / 'lib64') + elif self.arch == Arch.I686: + paths.append(self.prefix / 'lib') + else: + raise NotImplementedError() + paths.append(self.prefix / 'usr/local/lib') + + ssl = glob.glob(str(self.prefix / 'opt/_internal/openssl-*')) + if ssl: + paths.append(Path(ssl[0]) / 'lib') + + object.__setattr__(self, 'library_path', paths) + + # Set excluded libraries. + if self.excludelist: + excludelist = Path(self.excludelist) + else: + ensure_excludelist() + excludelist = Path(EXCLUDELIST) + excluded = [] + with excludelist.open() as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + excluded.append(line) + object.__setattr__(self, 'excluded', excluded) + + # Set patchelf, if not provided. + if self.patchelf is None: + paths = ( + Path(__file__).parent / 'bin', + Path.home() / '.local/bin' + ) + for path in paths: + patchelf = path / 'patchelf' + if patchelf.exists(): + break + else: + raise NotImplementedError() + object.__setattr__(self, 'patchelf', patchelf) + else: + assert(self.patchelf.exists()) + + + def extract(self, destination): + '''Extract Python runtime.''' + + python = f'python{self.version.short()}' + runtime = f'bin/{python}' + packages = f'lib/{python}' + pip = f'bin/pip{self.version.short()}' + + # Locate include files. + include = glob.glob(str(self.python_prefix / 'include/*')) + if include: + include = Path(include[0]).name + include = f'include/{include}' + else: + raise NotImplementedError() + + # Clone Python runtime. + (destination / 'bin').mkdir(exist_ok=True, parents=True) + shutil.copy(self.python_prefix / runtime, destination / runtime) + + short = Path(destination / f'bin/python{self.version.major}') + short.unlink(missing_ok=True) + short.symlink_to(python) + short = Path(destination / 'bin/python') + short.unlink(missing_ok=True) + short.symlink_to(f'python{self.version.major}') + + # Clone pip wrapper. + with open(self.python_prefix / pip) as f: + f.readline() # Skip shebang. + body = f.read() + + with open(destination / pip, 'w') as f: + f.write('#! /bin/sh\n') + f.write(' '.join(( + '"exec"', + f'"$(dirname $(readlink -f ${0}))/{python}"', + '"$0"', + '"$@"\n' + ))) + f.write(body) + shutil.copymode(self.python_prefix / pip, destination / pip) + + short = Path(destination / f'bin/pip{self.version.major}') + short.unlink(missing_ok=True) + short.symlink_to(f'pip{self.version.short()}') + short = Path(destination / 'bin/pip') + short.unlink(missing_ok=True) + short.symlink_to(f'pip{self.version.major}') + + # Clone Python packages. + for folder in (packages, include): + shutil.copytree(self.python_prefix / folder, destination / folder, + symlinks=True, dirs_exist_ok=True) + + # Remove some clutters. + shutil.rmtree(destination / packages / 'test', ignore_errors=True) + for root, dirs, files in os.walk(destination / packages): + root = Path(root) + for d in dirs: + if d == '__pycache__': + shutil.rmtree(root / d, ignore_errors=True) + for f in files: + if f.endswith('.pyc'): + (root / f).unlink() + + # Map binary dependencies. + libs = self.ldd(self.python_prefix / f'bin/{python}') + path = Path(self.python_prefix / f'{packages}/lib-dynload') + for module in glob.glob(str(path / "*.so")): + l = self.ldd(module) + libs.update(l) + + # Copy and patch binary dependencies. + libdir = destination / 'lib' + for (name, src) in libs.items(): + dst = libdir / name + shutil.copy(src, dst, follow_symlinks=True) + # Some libraries are read-only, which prevents overriding the + # destination directory. Below, we change the permission of + # destination files to read-write (for the owner). + mode = dst.stat().st_mode + if not (mode & stat.S_IWUSR): + mode = mode | stat.S_IWUSR + dst.chmod(mode) + + self.set_rpath(dst, '$ORIGIN') + + # Patch RPATHs of binary modules. + path = Path(destination / f'{packages}/lib-dynload') + for module in glob.glob(str(path / "*.so")): + src = Path(module) + dst = os.path.relpath(libdir, src.parent) + self.set_rpath(src, f'$ORIGIN/{dst}') + + # Patch RPATHs of Python runtime. + src = destination / runtime + dst = os.path.relpath(libdir, src.parent) + self.set_rpath(src, f'$ORIGIN/{dst}') + + # Copy SSL certificates (i.e. clone certifi). + certs = self.prefix / 'opt/_internal/certs.pem' + if certs.is_symlink(): + dst = self.prefix / str(certs.readlink())[1:] + certifi = dst.parent + assert(certifi.name == 'certifi') + site_packages = certifi.parent + assert(site_packages.name == 'site-packages') + + for src in glob.glob(str(site_packages / 'certifi*')): + src = Path(src) + dst = destination / f'{packages}/site-packages/{src.name}' + if not dst.exists(): + shutil.copytree(src, dst, symlinks=True) + else: + raise NotImplementedError() + + # Copy Tcl & Tk data. + tcltk_src = self.prefix / 'usr/local/lib' + tx_version = [] + for match in glob.glob(str(tcltk_src / 'tk*')): + path = Path(match) + if path.is_dir(): + tx_version.append(LooseVersion(path.name[2:])) + tx_version.sort() + tx_version = tx_version[-1] + + tcltk_dir = Path(destination / 'usr/share/tcltk') + tcltk_dir.mkdir(exist_ok=True, parents=True) + + for tx in ('tcl', 'tk'): + name = f'{tx}{tx_version}' + src = tcltk_src / name + dst = tcltk_dir / name + shutil.copytree(src, dst, symlinks=True, dirs_exist_ok=True) + + + def ldd(self, target: Path) -> Dict[str, Path]: + '''Cross-platform implementation of ldd, using readelf.''' + + pattern = re.compile(r'[(]NEEDED[)]\s+Shared library:\s+\[([^\]]+)\]') + dependencies = dict() + + def recurse(target: Path): + result = subprocess.run(f'readelf -d {target}', shell=True, + check=True, capture_output=True) + stdout = result.stdout.decode() + matches = pattern.findall(stdout) + + for match in matches: + if (match not in dependencies) and (match not in self.excluded): + path = self.locate_library(match) + dependencies[match] = path + subs = recurse(path) + + recurse(target) + return dependencies + + + def locate_library(self, name: str) -> Path: + '''Locate a library given its qualified name.''' + + for dirname in self.library_path: + path = dirname / name + if path.exists(): + return path + else: + raise FileNotFoundError(name) + + + def set_rpath(self, target, rpath): + cmd = f'{self.patchelf} --print-rpath {target}' + result = subprocess.run(cmd, shell=True, check=True, + capture_output=True) + current_rpath = result.stdout.decode().strip() + if current_rpath != rpath: + cmd = f"{self.patchelf} --set-rpath '{rpath}' {target}" + subprocess.run(cmd, shell=True, check=True, capture_output=True) + + +@dataclass(frozen=True) +class ImageExtractor: + '''Manylinux image extractor from layers.''' + + prefix: Path + '''Manylinux image prefix.''' + + tag: Optional[str] = 'latest' + '''Manylinux image tag.''' + + + def extract(self, destination: Path): + '''Extract Manylinux image.''' + + with open(self.prefix / f'tags/{self.tag}.json') as f: + meta = json.load(f) + layers = meta['layers'] + + for layer in layers: + debug('EXTRACT', f'{layer}.tar.gz') + + filename = self.prefix / f'layers/{layer}.tar.gz' + cmd = ' && '.join(( + f'mkdir -p {destination}', + f'tar -xzf {filename} -C {destination}', + f'chmod u+rw -R {destination}' + )) + process = subprocess.run(cmd, shell=True, check=True, + capture_output=True) diff --git a/python_appimage/utils/deps.py b/python_appimage/utils/deps.py index 7b01357..3fa3113 100644 --- a/python_appimage/utils/deps.py +++ b/python_appimage/utils/deps.py @@ -9,28 +9,30 @@ from .tmp import TemporaryDirectory from .url import urlretrieve -__all__ = ['APPIMAGETOOL', 'EXCLUDELIST', 'PATCHELF', 'PREFIX', - 'ensure_appimagetool', 'ensure_excludelist', 'ensure_patchelf'] - - _ARCH = platform.machine() +_CACHE_DIR = os.path.expanduser('~/.cache/python-appimage') + PREFIX = os.path.abspath(os.path.dirname(__file__) + '/..') '''Package installation prefix''' -APPIMAGETOOL_DIR = os.path.expanduser('~/.local/bin') +APPIMAGETOOL_DIR = os.path.join(_CACHE_DIR, 'bin') '''Location of the appimagetool binary''' APPIMAGETOOL_VERSION = '12' '''Version of the appimagetool binary''' -EXCLUDELIST = PREFIX + '/data/excludelist' +EXCLUDELIST = os.path.join(_CACHE_DIR, 'share/excludelist') '''AppImage exclusion list''' -PATCHELF = os.path.expanduser('~/.local/bin/patchelf') +PATCHELF = os.path.join(_CACHE_DIR, 'bin/patchelf') '''Location of the PatchELF binary''' +PATCHELF_VERSION = '0.14.3' +'''Version of the patchelf binary''' + + def ensure_appimagetool(dry=False): '''Fetch appimagetool from the web if not available locally ''' @@ -91,19 +93,18 @@ def ensure_patchelf(): if os.path.exists(PATCHELF): return False - iarch = 'i386' if _ARCH == 'i686' else _ARCH - appimage = 'patchelf-{0:}.AppImage'.format(iarch) - baseurl = 'https://github.com/niess/patchelf.appimage/releases/download' + tgz = '-'.join(('patchelf', _PATCHELF_VERSION, _ARCH)) + '.tar.gz' + baseurl = 'https://github.com/NixOS/patchelf' log('INSTALL', 'patchelf from %s', baseurl) dirname = os.path.dirname(PATCHELF) patchelf = dirname + '/patchelf' make_tree(dirname) with TemporaryDirectory() as tmpdir: - urlretrieve(os.path.join(baseurl, 'rolling', appimage), appimage) - os.chmod(appimage, stat.S_IRWXU) - system(('./' + appimage, '--appimage-extract')) - copy_file('squashfs-root/usr/bin/patchelf', patchelf) + urlretrieve(os.path.join(baseurl, 'releases', 'download', + _PATCHELF_VERSION, tgz), tgz) + system(('tar', 'xzf', tgz)) + copy_file('bin/patchelf', patchelf) os.chmod(patchelf, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) return True diff --git a/python_appimage/utils/url.py b/python_appimage/utils/url.py index 8f24c6a..0cdd9c8 100644 --- a/python_appimage/utils/url.py +++ b/python_appimage/utils/url.py @@ -32,6 +32,10 @@ def urlretrieve(url, filename=None): else: debug('DOWNLOAD', '%s as %s', url, filename) + parent_directory = os.path.dirname(filename) + if not os.path.exists(parent_directory): + os.makedirs(parent_directory) + if _urlretrieve is None: data = urllib2.urlopen(url).read() with open(filename, 'w') as f: