Dnia 2015-04-16, o godz. 10:38:22 Brian Dolbec napisał(a): > On Sun, 5 Apr 2015 12:08:31 +0200 > Michał Górny wrote: > > > The squashdelta module provides syncing via SquashFS snapshots. For > > the initial sync, a complete snapshot is fetched and placed in > > /var/cache/portage/squashfs. On subsequent sync operations, deltas are > > fetched from the mirror and used to reconstruct the newest snapshot. > > > > The distfile fetching logic is reused to fetch the remote files > > and verify their checksums. Additionally, the sha512sum.txt file > > should be OpenPGP-verified after fetching but this is currently > > unimplemented. > > > > After fetching, Portage tries to (re-)mount the SquashFS in repository > > location. > > --- > > cnf/repos.conf | 4 + > > pym/portage/sync/modules/squashdelta/README | 124 > > +++++++++++++ pym/portage/sync/modules/squashdelta/__init__.py | > > 37 ++++ .../sync/modules/squashdelta/squashdelta.py | 192 > > +++++++++++++++++++++ 4 files changed, 357 insertions(+) > > create mode 100644 pym/portage/sync/modules/squashdelta/README > > create mode 100644 pym/portage/sync/modules/squashdelta/__init__.py > > create mode 100644 > > pym/portage/sync/modules/squashdelta/squashdelta.py > > > > diff --git a/cnf/repos.conf b/cnf/repos.conf > > index 1ca98ca..062fc0d 100644 > > --- a/cnf/repos.conf > > +++ b/cnf/repos.conf > > @@ -6,3 +6,7 @@ location = /usr/portage > > sync-type = rsync > > sync-uri = rsync://rsync.gentoo.org/gentoo-portage > > auto-sync = yes > > + > > +# for daily squashfs snapshots > > +#sync-type = squashdelta > > +#sync-uri = mirror://gentoo/../snapshots/squashfs > > > > > > > diff --git a/pym/portage/sync/modules/squashdelta/__init__.py > > b/pym/portage/sync/modules/squashdelta/__init__.py new file mode > > 100644 index 0000000..1a17dea > > --- /dev/null > > +++ b/pym/portage/sync/modules/squashdelta/__init__.py > > @@ -0,0 +1,37 @@ > > +# vim:fileencoding=utf-8:noet > > +# (c) 2015 Michał Górny > > +# Distributed under the terms of the GNU General Public License v2 > > + > > +from portage.sync.config_checks import CheckSyncConfig > > + > > + > > +DEFAULT_CACHE_LOCATION = '/var/cache/portage/squashfs' > > + > > + > > +class CheckSquashDeltaConfig(CheckSyncConfig): > > + def __init__(self, repo, logger): > > + CheckSyncConfig.__init__(self, repo, logger) > > + self.checks.append('check_cache_location') > > + > > + def check_cache_location(self): > > + # TODO: make it configurable when Portage is fixed > > to support > > + # arbitrary config variables > > + pass > > + > > + > > +module_spec = { > > + 'name': 'squashdelta', > > + 'description': 'Syncing SquashFS images using SquashDeltas', > > + 'provides': { > > + 'squashdelta-module': { > > + 'name': "squashdelta", > > + 'class': "SquashDeltaSync", > > + 'description': 'Syncing SquashFS images > > using SquashDeltas', > > + 'functions': ['sync', 'new', 'exists'], > > + 'func_desc': { > > + 'sync': 'Performs the sync of the > > repository', > > + }, > > + 'validate_config': CheckSquashDeltaConfig, > > + } > > + } > > +} > > diff --git a/pym/portage/sync/modules/squashdelta/squashdelta.py > > b/pym/portage/sync/modules/squashdelta/squashdelta.py new file mode > > 100644 index 0000000..a0dfc46 > > --- /dev/null > > +++ b/pym/portage/sync/modules/squashdelta/squashdelta.py > > @@ -0,0 +1,192 @@ > > +# vim:fileencoding=utf-8:noet > > +# (c) 2015 Michał Górny > > +# Distributed under the terms of the GNU General Public License v2 > > + > > +import errno > > +import io > > +import logging > > +import os > > +import os.path > > +import re > > + > > +import portage > > +from portage.package.ebuild.fetch import fetch > > +from portage.sync.syncbase import SyncBase > > + > > +from . import DEFAULT_CACHE_LOCATION > > + > > + > > +class SquashDeltaSync(SyncBase): > > > OK, I see a small mistake. You are subclassing SyncBase which does not > stub out a new() and you do not define one here. But you export a new() > in the module-spec above. Fixed. I removed them from [func_desc], and apparently forgot to do so from [functions]. > > + short_desc = "Repository syncing using SquashFS deltas" > > + > > + @staticmethod > > + def name(): > > + return "SquashDeltaSync" > > + > > + def __init__(self): > > + super(SquashDeltaSync, self).__init__( > > + 'squashmerge', > > 'dev-util/squashmerge') + > > + def sync(self, **kwargs): > > + self._kwargs(kwargs) > > + my_settings = portage.config(clone = self.settings) > > + cache_location = DEFAULT_CACHE_LOCATION > > + > > + # override fetching location > > + my_settings['DISTDIR'] = cache_location > > + > > + # make sure we append paths correctly > > + base_uri = self.repo.sync_uri > > + if not base_uri.endswith('/'): > > + base_uri += '/' > > + > > + def my_fetch(fn, **kwargs): > > + kwargs['try_mirrors'] = 0 > > + return fetch([base_uri + fn], my_settings, > > **kwargs) + > > + # fetch sha512sum.txt > > + sha512_path = os.path.join(cache_location, > > 'sha512sum.txt') > > + try: > > + os.unlink(sha512_path) > > + except OSError: > > + pass > > + if not my_fetch('sha512sum.txt'): > > + return (1, False) > > + > > + if 'webrsync-gpg' in my_settings.features: > > + # TODO: GPG signature verification > > + pass > > + > > + # sha512sum.txt parsing > > + with io.open(sha512_path, 'r', encoding='utf8') as f: > > + data = f.readlines() > > + > > + repo_re = re.compile(self.repo.name + '-(.*)$') > > + # current tag > > + current_re = re.compile('current:', re.IGNORECASE) > > + # checksum > > + checksum_re = re.compile('^([a-f0-9]{128})\s+(.*)$', > > re.IGNORECASE) + > > + def iter_snapshots(lines): > > + for l in lines: > > + m = current_re.search(l) > > + if m: > > + for s in l[m.end():].split(): > > + yield s > > + > > + def iter_checksums(lines): > > + for l in lines: > > + m = checksum_re.match(l) > > + if m: > > + yield (m.group(2), { > > + 'size': None, > > + 'SHA512': m.group(1), > > + }) > > + > > + # look for current indicator > > + for s in iter_snapshots(data): > > + m = repo_re.match(s) > > + if m: > > + new_snapshot = m.group(0) + '.sqfs' > > + new_version = m.group(1) > > + break > > + else: > > + logging.error('Unable to find current > > snapshot in sha512sum.txt') > > + return (1, False) > > + new_path = os.path.join(cache_location, new_snapshot) > > + > > + # get digests > > + my_digests = dict(iter_checksums(data)) > > + > > + # try to find a local snapshot > > + old_version = None > > + current_path = os.path.join(cache_location, > > + self.repo.name + '-current.sqfs') > > + try: > > + old_snapshot = os.readlink(current_path) > > + except OSError: > > + pass > > + else: > > + m = repo_re.match(old_snapshot) > > + if m and old_snapshot.endswith('.sqfs'): > > + old_version = m.group(1)[:-5] > > + old_path = > > os.path.join(cache_location, old_snapshot) + > > + if old_version is not None: > > + if old_version == new_version: > > + logging.info('Snapshot up-to-date, > > verifying integrity.') > > + else: > > + # attempt to update > > + delta_path = None > > + expected_delta = '%s-%s-%s.sqdelta' > > % ( > > + self.repo.name, > > old_version, new_version) > > + if expected_delta not in my_digests: > > + logging.warning('No delta > > for %s->%s, fetching new snapshot.' > > + % > > (old_version, new_version)) > > + else: > > + delta_path = > > os.path.join(cache_location, expected_delta) + > > + if not > > my_fetch(expected_delta, digests = my_digests): > > + return (4, False) > > + if not self.has_bin: > > + return (5, False) > > + > > + ret = > > portage.process.spawn([self.bin_command, > > + old_path, > > delta_path, new_path], **self.spawn_kwargs) > > + if ret != os.EX_OK: > > + > > logging.error('Merging the delta failed') > > + return (6, False) > > + > > + # pass-through to > > verification and cleanup + > > + # fetch full snapshot or verify the one we have > > + if not my_fetch(new_snapshot, digests = my_digests): > > + return (2, False) > > + > > + # create/update -current symlink > > + # using external ln for two reasons: > > + # 1. clean --force (unlike python's unlink+symlink) > > + # 2. easy userpriv (otherwise we'd have to lchown()) > > + ret = portage.process.spawn(['ln', '-s', '-f', > > new_snapshot, current_path], > > + **self.spawn_kwargs) > > + if ret != os.EX_OK: > > + logging.error('Unable to set -current > > symlink') > > + retrurn (3, False) > > + > > + # remove old snapshot > > + if old_version is not None and old_version != > > new_version: > > + try: > > + os.unlink(old_path) > > + except OSError as e: > > + logging.warning('Unable to unlink > > old snapshot: ' + str(e)) > > + if delta_path is not None: > > + try: > > + os.unlink(delta_path) > > + except OSError as e: > > + logging.warning('Unable to > > unlink old delta: ' + str(e)) > > + try: > > + os.unlink(sha512_path) > > + except OSError as e: > > + logging.warning('Unable to unlink > > sha512sum.txt: ' + str(e)) + > > + mount_cmd = ['mount', current_path, > > self.repo.location] > > + can_mount = True > > + if os.path.ismount(self.repo.location): > > + # need to umount old snapshot > > + ret = portage.process.spawn(['umount', '-l', > > self.repo.location]) > > + if ret != os.EX_OK: > > + logging.warning('Unable to unmount > > old SquashFS after update') > > + can_mount = False > > + else: > > + try: > > + os.makedirs(self.repo.location) > > + except OSError as e: > > + if e.errno != errno.EEXIST: > > + raise > > + > > + if can_mount: > > + ret = portage.process.spawn(mount_cmd) > > + if ret != os.EX_OK: > > + logging.warning('Unable to > > (re-)mount SquashFS after update') + > > + return (0, True) > > Overall the code itself looks decent. Aside from the small mistake > mentioned inline, my only concern is the sheer size of the sync(). It > is 162 lines and embeds 2 private functions. This code could easily be > broken up into several smaller task functions. It would make reading > the main sync() logic easier as well as the smaller task sections. I > am not a fan of the long winded functions and scripts present in > portage (this by no means is in the same category as many of those). > But I certainly don't want to let more of that in if I can help it. And > aim to reduce it while I'm the lead. Will try. > Ok, so the only data variable you wanted to add to the repos.conf was > the cache location? Yes, right now just that, I think. Maybe in the future we'd use per-repo OpenPGP verification switch. Something like: openpgp-verify = [no|yes|required] With 'yes' meaning 'verify if signed', and 'required' meaning 'refuse if not signed'. > I'll work on adding the gkeys integration in the gkeys branch I started > for the gpg verification. I see no point in porting the code from > emerge-webrsync's bash to python only to be replaced by gkeys in the > very near future. Please stub out a function & call for it when you > address the above issues. I'll fill in the code for it. Ok. -- Best regards, Michał Górny