From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id A071D138334 for ; Fri, 4 Oct 2019 05:53:40 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id D7A17E088C; Fri, 4 Oct 2019 05:53:39 +0000 (UTC) Received: from smtp.gentoo.org (mail.gentoo.org [IPv6:2001:470:ea4a:1:5054:ff:fec7:86e4]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id B3219E088C for ; Fri, 4 Oct 2019 05:53:39 +0000 (UTC) Received: from pomiot (c134-66.icpnet.pl [85.221.134.66]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) (Authenticated sender: mgorny) by smtp.gentoo.org (Postfix) with ESMTPSA id 27A4534B8D5; Fri, 4 Oct 2019 05:53:37 +0000 (UTC) Message-ID: <7aad9f446dcfb3da376d57f65a4b2b6c9a5171aa.camel@gentoo.org> Subject: Re: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure From: =?UTF-8?Q?Micha=C5=82_G=C3=B3rny?= To: gentoo-portage-dev@lists.gentoo.org Date: Fri, 04 Oct 2019 07:53:34 +0200 In-Reply-To: References: <20191003163632.7231-1-mgorny@gentoo.org> Organization: Gentoo Content-Type: multipart/signed; micalg="pgp-sha512"; protocol="application/pgp-signature"; boundary="=-w2vs6nN64pL9EuJWn0z9" User-Agent: Evolution 3.32.4 Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Auto-Response-Suppress: DR, RN, NRN, OOF, AutoReply MIME-Version: 1.0 X-Archives-Salt: 5b4f78fb-658d-44e6-bfc7-deba7a85f593 X-Archives-Hash: 451ada3d2cbb8d77c62530e52d361b6a --=-w2vs6nN64pL9EuJWn0z9 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable On Thu, 2019-10-03 at 22:01 -0700, Alec Warner wrote: > On Thu, Oct 3, 2019 at 9:37 AM Micha=C5=82 G=C3=B3rny = wrote: >=20 > > Add a support for the subset of GLEP 75 needed by Gentoo Infra. This > > includes fetching and parsing layout.conf, and support for flat layout > > and filename-hash layout with cutoffs being multiplies of 4. > >=20 > > Bug: https://bugs.gentoo.org/646898 > > Signed-off-by: Micha=C5=82 G=C3=B3rny > > --- > > lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++- > > 1 file changed, 135 insertions(+), 4 deletions(-) > >=20 > > Changes in v2: switched to a more classy layout to make the code > > reusable in emirrordist. > >=20 > > diff --git a/lib/portage/package/ebuild/fetch.py > > b/lib/portage/package/ebuild/fetch.py > > index 227bf45ae..18e3d390a 100644 > > --- a/lib/portage/package/ebuild/fetch.py > > +++ b/lib/portage/package/ebuild/fetch.py > > @@ -7,12 +7,15 @@ __all__ =3D ['fetch'] > >=20 > > import errno > > import io > > +import itertools > > +import json > > import logging > > import random > > import re > > import stat > > import sys > > import tempfile > > +import time > >=20 > > from collections import OrderedDict > >=20 > > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(), > > 'portage.package.ebuild.doebuild:doebuild_environment,' + \ > > '_doebuild_spawn', > > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs', > > + > > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError= ', > > + 'portage.util._urlopen:urlopen', > > ) > >=20 > > from portage import os, selinux, shutil, _encodings, \ > > _movefile, _shell_quote, _unicode_encode > > from portage.checksum import (get_valid_checksum_keys, perform_md5, > > verify_all, > > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter) > > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter, > > + checksum_str) > > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \ > > - GLOBAL_CONFIG_PATH > > + GLOBAL_CONFIG_PATH, CACHE_PATH > > from portage.data import portage_gid, portage_uid, secpass, > > userpriv_groups > > from portage.exception import FileNotFound, OperationNotPermitted, \ > > PortageException, TryAgain > > @@ -253,6 +259,130 @@ _size_suffix_map =3D { > > 'Y' : 80, > > } > >=20 > > + > > +class FlatLayout(object): > > + def get_path(self, filename): > > + return filename > > + > > + > > +class FilenameHashLayout(object): > > + def __init__(self, algo, cutoffs): > > + self.algo =3D algo > > + self.cutoffs =3D [int(x) for x in cutoffs.split(':')] > > + > > + def get_path(self, filename): > > + fnhash =3D checksum_str(filename.encode('utf8'), self.a= lgo) > > + ret =3D '' > > + for c in self.cutoffs: > > + assert c % 4 =3D=3D 0 > >=20 >=20 > I'm not quite sure what this assert is doing. I'm not super in favor of > asserts (I'd rather see an exception like raise FooError("..."), but if y= ou > are going to use it please use something like: >=20 > assert c %4 =3D=3D 0, "Some description of why we put this assert here so= if it > fires we can do something useful." It's already checked in validate_structure(). Maybe I could indirect it to this class to make things clearer. >=20 > + c =3D c // 4 > > + ret +=3D fnhash[:c] + '/' > > + fnhash =3D fnhash[c:] > > + return ret + filename > > + > > + > > +class MirrorLayoutConfig(object): > > + """ > > + Class to read layout.conf from a mirror. > > + """ > > + > > + def __init__(self): > > + self.structure =3D () > > + > > + def read_from_file(self, f): > > + cp =3D SafeConfigParser() > > + read_configs(cp, [f]) > > + vals =3D [] > > + for i in itertools.count(): > > + try: > > + vals.append(tuple(cp.get('structure', '= %d' > > % i).split())) > > + except NoOptionError: > > + break > > + self.structure =3D tuple(vals) > > + > > + def serialize(self): > > + return self.structure > > + > > + def deserialize(self, data): > > + self.structure =3D data > > + > > + @staticmethod > > + def validate_structure(val): > > + if val =3D=3D ('flat',): > > + return True > > + if val[0] =3D=3D 'filename-hash' and len(val) =3D=3D 3: > > + if val[1] not in get_valid_checksum_keys(): > > + return False > > + # validate cutoffs > > + for c in val[2].split(':'): > > + try: > > + c =3D int(c) > > + except ValueError: > > + break > > + else: > > + if c % 4 !=3D 0: > > + break > > + else: > > + return True > > + return False > > + return False > > + > > + def get_best_supported_layout(self): > > + for val in self.structure: > > + if self.validate_structure(val): > > + if val[0] =3D=3D 'flat': > > + return FlatLayout() > > + elif val[0] =3D=3D 'filename-hash': > > + return FilenameHashLayout(val[1= ], > > val[2]) > > + else: > > + # fallback > > + return FlatLayout() > > + > > + > > +def get_mirror_url(mirror_url, filename, eroot): > > + """ > > + Get correct fetch URL for a given file, accounting for mirror > > + layout configuration. > > + > > + @param mirror_url: Base URL to the mirror (without '/distfiles'= ) > > + @param filename: Filename to fetch > > + @param eroot: EROOT to use for the cache file > > + @return: Full URL to fetch > > + """ > > + > > + mirror_conf =3D MirrorLayoutConfig() > > + > > + cache_file =3D os.path.join(eroot, CACHE_PATH, > > 'mirror-metadata.json') > > + try: > > + with open(cache_file, 'r') as f: > > + cache =3D json.load(f) > > + except (IOError, ValueError): > > + cache =3D {} > > + > > + ts, data =3D cache.get(mirror_url, (0, None)) > > + # refresh at least daily > > + if ts >=3D time.time() - 86400: > > + mirror_conf.deserialize(data) > > + else: > > + try: > > + f =3D urlopen(mirror_url + '/distfiles/layout.c= onf') > > + try: > > + data =3D io.StringIO(f.read().decode('u= tf8')) > > + finally: > > + f.close() > > + > > + mirror_conf.read_from_file(data) > > + except IOError: > > + pass > > + > > + cache[mirror_url] =3D (time.time(), mirror_conf.seriali= ze()) > > + with open(cache_file, 'w') as f: > > + json.dump(cache, f) > > + > > + return (mirror_url + "/distfiles/" + > > + > > mirror_conf.get_best_supported_layout().get_path(filename)) > > + > > + > > def fetch(myuris, mysettings, listonly=3D0, fetchonly=3D0, > > locks_in_subdir=3D".locks", use_locks=3D1, try_mirrors=3D1, dig= ests=3DNone, > > allow_missing_digests=3DTrue): > > @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=3D0, fetchon= ly=3D0, > > for myfile, myuri in file_uri_tuples: > > if myfile not in filedict: > > filedict[myfile]=3D[] > > - for y in range(0,len(locations)): > > - > > filedict[myfile].append(locations[y]+"/distfiles/"+myfile) > > + for l in locations: > > + filedict[myfile].append(get_mirror_url(= l, > > myfile, > > + mysettings["EROOT"])) > > if myuri is None: > > continue > > if myuri[:9]=3D=3D"mirror://": > > -- > > 2.23.0 > >=20 > >=20 > >=20 --=20 Best regards, Micha=C5=82 G=C3=B3rny --=-w2vs6nN64pL9EuJWn0z9 Content-Type: application/pgp-signature; name="signature.asc" Content-Description: This is a digitally signed message part Content-Transfer-Encoding: 7bit -----BEGIN PGP SIGNATURE----- iQGTBAABCgB9FiEEx2qEUJQJjSjMiybFY5ra4jKeJA4FAl2W3l5fFIAAAAAALgAo aXNzdWVyLWZwckBub3RhdGlvbnMub3BlbnBncC5maWZ0aGhvcnNlbWFuLm5ldEM3 NkE4NDUwOTQwOThEMjhDQzhCMjZDNTYzOUFEQUUyMzI5RTI0MEUACgkQY5ra4jKe JA4NHgf+N19ntFDbEwW+cWoMrjZdwQ7S3CTIrUtcwxD+SkdEvdrvsdYvn+n23yOZ 32Ef6ppJjkro2oDXEx4eMe0w8nDeD8S5QlAuEbMti3EjvdwZJM9DspjdzgfF1sI7 +Wws3jAakm4DgNI2+Cw2KeCPpwhY8CAcKwyX5/MPFB3OG/9TLG94HDHQMIMeT6sI C+sSnMN8IdXDTYhgox1YV8gipOwRp3g5eXkn2Qz5eX1I9zwQRiXjVExiPBaJI1tz AtIyH14U+oBkKkp90rEBM6x6xmsyofgJkXlRpvbgM+X5URLCVj8S0524EdmnBw/r Dy8oDavrR7/weaAMdqspXG9RzkMy+w== =HNCh -----END PGP SIGNATURE----- --=-w2vs6nN64pL9EuJWn0z9--