From: "Michał Górny" <mgorny@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Subject: Re: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure
Date: Fri, 04 Oct 2019 07:53:34 +0200 [thread overview]
Message-ID: <7aad9f446dcfb3da376d57f65a4b2b6c9a5171aa.camel@gentoo.org> (raw)
In-Reply-To: <CAAr7Pr-V6CNvDiwL7PziQx5CLDNO110WFva+pzN2TUHfTOgaeQ@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 8892 bytes --]
On Thu, 2019-10-03 at 22:01 -0700, Alec Warner wrote:
> On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <mgorny@gentoo.org> wrote:
>
> > Add a support for the subset of GLEP 75 needed by Gentoo Infra. This
> > includes fetching and parsing layout.conf, and support for flat layout
> > and filename-hash layout with cutoffs being multiplies of 4.
> >
> > Bug: https://bugs.gentoo.org/646898
> > Signed-off-by: Michał Górny <mgorny@gentoo.org>
> > ---
> > lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
> > 1 file changed, 135 insertions(+), 4 deletions(-)
> >
> > Changes in v2: switched to a more classy layout to make the code
> > reusable in emirrordist.
> >
> > diff --git a/lib/portage/package/ebuild/fetch.py
> > b/lib/portage/package/ebuild/fetch.py
> > index 227bf45ae..18e3d390a 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -7,12 +7,15 @@ __all__ = ['fetch']
> >
> > import errno
> > import io
> > +import itertools
> > +import json
> > import logging
> > import random
> > import re
> > import stat
> > import sys
> > import tempfile
> > +import time
> >
> > from collections import OrderedDict
> >
> > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
> > 'portage.package.ebuild.doebuild:doebuild_environment,' + \
> > '_doebuild_spawn',
> > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
> > +
> > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
> > + 'portage.util._urlopen:urlopen',
> > )
> >
> > from portage import os, selinux, shutil, _encodings, \
> > _movefile, _shell_quote, _unicode_encode
> > from portage.checksum import (get_valid_checksum_keys, perform_md5,
> > verify_all,
> > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
> > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
> > + checksum_str)
> > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
> > - GLOBAL_CONFIG_PATH
> > + GLOBAL_CONFIG_PATH, CACHE_PATH
> > from portage.data import portage_gid, portage_uid, secpass,
> > userpriv_groups
> > from portage.exception import FileNotFound, OperationNotPermitted, \
> > PortageException, TryAgain
> > @@ -253,6 +259,130 @@ _size_suffix_map = {
> > 'Y' : 80,
> > }
> >
> > +
> > +class FlatLayout(object):
> > + def get_path(self, filename):
> > + return filename
> > +
> > +
> > +class FilenameHashLayout(object):
> > + def __init__(self, algo, cutoffs):
> > + self.algo = algo
> > + self.cutoffs = [int(x) for x in cutoffs.split(':')]
> > +
> > + def get_path(self, filename):
> > + fnhash = checksum_str(filename.encode('utf8'), self.algo)
> > + ret = ''
> > + for c in self.cutoffs:
> > + assert c % 4 == 0
> >
>
> I'm not quite sure what this assert is doing. I'm not super in favor of
> asserts (I'd rather see an exception like raise FooError("..."), but if you
> are going to use it please use something like:
>
> assert c %4 == 0, "Some description of why we put this assert here so if it
> fires we can do something useful."
It's already checked in validate_structure(). Maybe I could indirect it
to this class to make things clearer.
>
> + c = c // 4
> > + ret += fnhash[:c] + '/'
> > + fnhash = fnhash[c:]
> > + return ret + filename
> > +
> > +
> > +class MirrorLayoutConfig(object):
> > + """
> > + Class to read layout.conf from a mirror.
> > + """
> > +
> > + def __init__(self):
> > + self.structure = ()
> > +
> > + def read_from_file(self, f):
> > + cp = SafeConfigParser()
> > + read_configs(cp, [f])
> > + vals = []
> > + for i in itertools.count():
> > + try:
> > + vals.append(tuple(cp.get('structure', '%d'
> > % i).split()))
> > + except NoOptionError:
> > + break
> > + self.structure = tuple(vals)
> > +
> > + def serialize(self):
> > + return self.structure
> > +
> > + def deserialize(self, data):
> > + self.structure = data
> > +
> > + @staticmethod
> > + def validate_structure(val):
> > + if val == ('flat',):
> > + return True
> > + if val[0] == 'filename-hash' and len(val) == 3:
> > + if val[1] not in get_valid_checksum_keys():
> > + return False
> > + # validate cutoffs
> > + for c in val[2].split(':'):
> > + try:
> > + c = int(c)
> > + except ValueError:
> > + break
> > + else:
> > + if c % 4 != 0:
> > + break
> > + else:
> > + return True
> > + return False
> > + return False
> > +
> > + def get_best_supported_layout(self):
> > + for val in self.structure:
> > + if self.validate_structure(val):
> > + if val[0] == 'flat':
> > + return FlatLayout()
> > + elif val[0] == 'filename-hash':
> > + return FilenameHashLayout(val[1],
> > val[2])
> > + else:
> > + # fallback
> > + return FlatLayout()
> > +
> > +
> > +def get_mirror_url(mirror_url, filename, eroot):
> > + """
> > + Get correct fetch URL for a given file, accounting for mirror
> > + layout configuration.
> > +
> > + @param mirror_url: Base URL to the mirror (without '/distfiles')
> > + @param filename: Filename to fetch
> > + @param eroot: EROOT to use for the cache file
> > + @return: Full URL to fetch
> > + """
> > +
> > + mirror_conf = MirrorLayoutConfig()
> > +
> > + cache_file = os.path.join(eroot, CACHE_PATH,
> > 'mirror-metadata.json')
> > + try:
> > + with open(cache_file, 'r') as f:
> > + cache = json.load(f)
> > + except (IOError, ValueError):
> > + cache = {}
> > +
> > + ts, data = cache.get(mirror_url, (0, None))
> > + # refresh at least daily
> > + if ts >= time.time() - 86400:
> > + mirror_conf.deserialize(data)
> > + else:
> > + try:
> > + f = urlopen(mirror_url + '/distfiles/layout.conf')
> > + try:
> > + data = io.StringIO(f.read().decode('utf8'))
> > + finally:
> > + f.close()
> > +
> > + mirror_conf.read_from_file(data)
> > + except IOError:
> > + pass
> > +
> > + cache[mirror_url] = (time.time(), mirror_conf.serialize())
> > + with open(cache_file, 'w') as f:
> > + json.dump(cache, f)
> > +
> > + return (mirror_url + "/distfiles/" +
> > +
> > mirror_conf.get_best_supported_layout().get_path(filename))
> > +
> > +
> > def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> > locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
> > allow_missing_digests=True):
> > @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> > for myfile, myuri in file_uri_tuples:
> > if myfile not in filedict:
> > filedict[myfile]=[]
> > - for y in range(0,len(locations)):
> > -
> > filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
> > + for l in locations:
> > + filedict[myfile].append(get_mirror_url(l,
> > myfile,
> > + mysettings["EROOT"]))
> > if myuri is None:
> > continue
> > if myuri[:9]=="mirror://":
> > --
> > 2.23.0
> >
> >
> >
--
Best regards,
Michał Górny
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 618 bytes --]
prev parent reply other threads:[~2019-10-04 5:53 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-03 16:36 [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure Michał Górny
2019-10-04 5:01 ` Alec Warner
2019-10-04 5:53 ` Michał Górny [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7aad9f446dcfb3da376d57f65a4b2b6c9a5171aa.camel@gentoo.org \
--to=mgorny@gentoo.org \
--cc=gentoo-portage-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox