On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <mgorny@gentoo.org> wrote:
Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <mgorny@gentoo.org>
---
 lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

Changes in v2: switched to a more classy layout to make the code
reusable in emirrordist.

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..18e3d390a 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -7,12 +7,15 @@ __all__ = ['fetch']

 import errno
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time

 from collections import OrderedDict

@@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.package.ebuild.doebuild:doebuild_environment,' + \
                '_doebuild_spawn',
        'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+       'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
+       'portage.util._urlopen:urlopen',
 )

 from portage import os, selinux, shutil, _encodings, \
        _movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
-       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+       checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
-       GLOBAL_CONFIG_PATH
+       GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
        PortageException, TryAgain
@@ -253,6 +259,130 @@ _size_suffix_map = {
        'Y' : 80,
 }

+
+class FlatLayout(object):
+       def get_path(self, filename):
+               return filename
+
+
+class FilenameHashLayout(object):
+       def __init__(self, algo, cutoffs):
+               self.algo = algo
+               self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+       def get_path(self, filename):
+               fnhash = checksum_str(filename.encode('utf8'), self.algo)
+               ret = ''
+               for c in self.cutoffs:
+                       assert c % 4 == 0

I'm not quite sure what this assert is doing. I'm not super in favor of asserts (I'd rather see an exception like raise FooError("..."), but if you are going to use it please use something like:

assert c %4 == 0, "Some description of why we put this assert here so if it fires we can do something useful."

+                       c = c // 4
+                       ret += fnhash[:c] + '/'
+                       fnhash = fnhash[c:]
+               return ret + filename
+
+
+class MirrorLayoutConfig(object):
+       """
+       Class to read layout.conf from a mirror.
+       """
+
+       def __init__(self):
+               self.structure = ()
+
+       def read_from_file(self, f):
+               cp = SafeConfigParser()
+               read_configs(cp, [f])
+               vals = []
+               for i in itertools.count():
+                       try:
+                               vals.append(tuple(cp.get('structure', '%d' % i).split()))
+                       except NoOptionError:
+                               break
+               self.structure = tuple(vals)
+
+       def serialize(self):
+               return self.structure
+
+       def deserialize(self, data):
+               self.structure = data
+
+       @staticmethod
+       def validate_structure(val):
+               if val == ('flat',):
+                       return True
+               if val[0] == 'filename-hash' and len(val) == 3:
+                       if val[1] not in get_valid_checksum_keys():
+                               return False
+                       # validate cutoffs
+                       for c in val[2].split(':'):
+                               try:
+                                       c = int(c)
+                               except ValueError:
+                                       break
+                               else:
+                                       if c % 4 != 0:
+                                               break
+                       else:
+                               return True
+                       return False
+               return False
+
+       def get_best_supported_layout(self):
+               for val in self.structure:
+                       if self.validate_structure(val):
+                               if val[0] == 'flat':
+                                       return FlatLayout()
+                               elif val[0] == 'filename-hash':
+                                       return FilenameHashLayout(val[1], val[2])
+               else:
+                       # fallback
+                       return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+       """
+       Get correct fetch URL for a given file, accounting for mirror
+       layout configuration.
+
+       @param mirror_url: Base URL to the mirror (without '/distfiles')
+       @param filename: Filename to fetch
+       @param eroot: EROOT to use for the cache file
+       @return: Full URL to fetch
+       """
+
+       mirror_conf = MirrorLayoutConfig()
+
+       cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+       try:
+               with open(cache_file, 'r') as f:
+                       cache = json.load(f)
+       except (IOError, ValueError):
+               cache = {}
+
+       ts, data = cache.get(mirror_url, (0, None))
+       # refresh at least daily
+       if ts >= time.time() - 86400:
+               mirror_conf.deserialize(data)
+       else:
+               try:
+                       f = urlopen(mirror_url + '/distfiles/layout.conf')
+                       try:
+                               data = io.StringIO(f.read().decode('utf8'))
+                       finally:
+                               f.close()
+
+                       mirror_conf.read_from_file(data)
+               except IOError:
+                       pass
+
+               cache[mirror_url] = (time.time(), mirror_conf.serialize())
+               with open(cache_file, 'w') as f:
+                       json.dump(cache, f)
+
+       return (mirror_url + "/distfiles/" +
+                       mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
        allow_missing_digests=True):
@@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        for myfile, myuri in file_uri_tuples:
                if myfile not in filedict:
                        filedict[myfile]=[]
-                       for y in range(0,len(locations)):
-                               filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+                       for l in locations:
+                               filedict[myfile].append(get_mirror_url(l, myfile,
+                                               mysettings["EROOT"]))
                if myuri is None:
                        continue
                if myuri[:9]=="mirror://":
--
2.23.0