On Thu, Oct 3, 2019 at 9:37 AM Michał Górny wrote: > Add a support for the subset of GLEP 75 needed by Gentoo Infra. This > includes fetching and parsing layout.conf, and support for flat layout > and filename-hash layout with cutoffs being multiplies of 4. > > Bug: https://bugs.gentoo.org/646898 > Signed-off-by: Michał Górny > --- > lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++- > 1 file changed, 135 insertions(+), 4 deletions(-) > > Changes in v2: switched to a more classy layout to make the code > reusable in emirrordist. > > diff --git a/lib/portage/package/ebuild/fetch.py > b/lib/portage/package/ebuild/fetch.py > index 227bf45ae..18e3d390a 100644 > --- a/lib/portage/package/ebuild/fetch.py > +++ b/lib/portage/package/ebuild/fetch.py > @@ -7,12 +7,15 @@ __all__ = ['fetch'] > > import errno > import io > +import itertools > +import json > import logging > import random > import re > import stat > import sys > import tempfile > +import time > > from collections import OrderedDict > > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(), > 'portage.package.ebuild.doebuild:doebuild_environment,' + \ > '_doebuild_spawn', > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs', > + > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError', > + 'portage.util._urlopen:urlopen', > ) > > from portage import os, selinux, shutil, _encodings, \ > _movefile, _shell_quote, _unicode_encode > from portage.checksum import (get_valid_checksum_keys, perform_md5, > verify_all, > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter) > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter, > + checksum_str) > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \ > - GLOBAL_CONFIG_PATH > + GLOBAL_CONFIG_PATH, CACHE_PATH > from portage.data import portage_gid, portage_uid, secpass, > userpriv_groups > from portage.exception import FileNotFound, OperationNotPermitted, \ > PortageException, TryAgain > @@ -253,6 +259,130 @@ _size_suffix_map = { > 'Y' : 80, > } > > + > +class FlatLayout(object): > + def get_path(self, filename): > + return filename > + > + > +class FilenameHashLayout(object): > + def __init__(self, algo, cutoffs): > + self.algo = algo > + self.cutoffs = [int(x) for x in cutoffs.split(':')] > + > + def get_path(self, filename): > + fnhash = checksum_str(filename.encode('utf8'), self.algo) > + ret = '' > + for c in self.cutoffs: > + assert c % 4 == 0 > I'm not quite sure what this assert is doing. I'm not super in favor of asserts (I'd rather see an exception like raise FooError("..."), but if you are going to use it please use something like: assert c %4 == 0, "Some description of why we put this assert here so if it fires we can do something useful." + c = c // 4 > + ret += fnhash[:c] + '/' > + fnhash = fnhash[c:] > + return ret + filename > + > + > +class MirrorLayoutConfig(object): > + """ > + Class to read layout.conf from a mirror. > + """ > + > + def __init__(self): > + self.structure = () > + > + def read_from_file(self, f): > + cp = SafeConfigParser() > + read_configs(cp, [f]) > + vals = [] > + for i in itertools.count(): > + try: > + vals.append(tuple(cp.get('structure', '%d' > % i).split())) > + except NoOptionError: > + break > + self.structure = tuple(vals) > + > + def serialize(self): > + return self.structure > + > + def deserialize(self, data): > + self.structure = data > + > + @staticmethod > + def validate_structure(val): > + if val == ('flat',): > + return True > + if val[0] == 'filename-hash' and len(val) == 3: > + if val[1] not in get_valid_checksum_keys(): > + return False > + # validate cutoffs > + for c in val[2].split(':'): > + try: > + c = int(c) > + except ValueError: > + break > + else: > + if c % 4 != 0: > + break > + else: > + return True > + return False > + return False > + > + def get_best_supported_layout(self): > + for val in self.structure: > + if self.validate_structure(val): > + if val[0] == 'flat': > + return FlatLayout() > + elif val[0] == 'filename-hash': > + return FilenameHashLayout(val[1], > val[2]) > + else: > + # fallback > + return FlatLayout() > + > + > +def get_mirror_url(mirror_url, filename, eroot): > + """ > + Get correct fetch URL for a given file, accounting for mirror > + layout configuration. > + > + @param mirror_url: Base URL to the mirror (without '/distfiles') > + @param filename: Filename to fetch > + @param eroot: EROOT to use for the cache file > + @return: Full URL to fetch > + """ > + > + mirror_conf = MirrorLayoutConfig() > + > + cache_file = os.path.join(eroot, CACHE_PATH, > 'mirror-metadata.json') > + try: > + with open(cache_file, 'r') as f: > + cache = json.load(f) > + except (IOError, ValueError): > + cache = {} > + > + ts, data = cache.get(mirror_url, (0, None)) > + # refresh at least daily > + if ts >= time.time() - 86400: > + mirror_conf.deserialize(data) > + else: > + try: > + f = urlopen(mirror_url + '/distfiles/layout.conf') > + try: > + data = io.StringIO(f.read().decode('utf8')) > + finally: > + f.close() > + > + mirror_conf.read_from_file(data) > + except IOError: > + pass > + > + cache[mirror_url] = (time.time(), mirror_conf.serialize()) > + with open(cache_file, 'w') as f: > + json.dump(cache, f) > + > + return (mirror_url + "/distfiles/" + > + > mirror_conf.get_best_supported_layout().get_path(filename)) > + > + > def fetch(myuris, mysettings, listonly=0, fetchonly=0, > locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None, > allow_missing_digests=True): > @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, > for myfile, myuri in file_uri_tuples: > if myfile not in filedict: > filedict[myfile]=[] > - for y in range(0,len(locations)): > - > filedict[myfile].append(locations[y]+"/distfiles/"+myfile) > + for l in locations: > + filedict[myfile].append(get_mirror_url(l, > myfile, > + mysettings["EROOT"])) > if myuri is None: > continue > if myuri[:9]=="mirror://": > -- > 2.23.0 > > >