public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure
@ 2019-10-03 16:36 Michał Górny
  2019-10-04  5:01 ` Alec Warner
  0 siblings, 1 reply; 3+ messages in thread
From: Michał Górny @ 2019-10-03 16:36 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Michał Górny

Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <mgorny@gentoo.org>
---
 lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

Changes in v2: switched to a more classy layout to make the code
reusable in emirrordist.

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..18e3d390a 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -7,12 +7,15 @@ __all__ = ['fetch']
 
 import errno
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time
 
 from collections import OrderedDict
 
@@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
 	'portage.package.ebuild.doebuild:doebuild_environment,' + \
 		'_doebuild_spawn',
 	'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+	'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
+	'portage.util._urlopen:urlopen',
 )
 
 from portage import os, selinux, shutil, _encodings, \
 	_movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
-	_filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+	_filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+	checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
-	GLOBAL_CONFIG_PATH
+	GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
 	PortageException, TryAgain
@@ -253,6 +259,130 @@ _size_suffix_map = {
 	'Y' : 80,
 }
 
+
+class FlatLayout(object):
+	def get_path(self, filename):
+		return filename
+
+
+class FilenameHashLayout(object):
+	def __init__(self, algo, cutoffs):
+		self.algo = algo
+		self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+	def get_path(self, filename):
+		fnhash = checksum_str(filename.encode('utf8'), self.algo)
+		ret = ''
+		for c in self.cutoffs:
+			assert c % 4 == 0
+			c = c // 4
+			ret += fnhash[:c] + '/'
+			fnhash = fnhash[c:]
+		return ret + filename
+
+
+class MirrorLayoutConfig(object):
+	"""
+	Class to read layout.conf from a mirror.
+	"""
+
+	def __init__(self):
+		self.structure = ()
+
+	def read_from_file(self, f):
+		cp = SafeConfigParser()
+		read_configs(cp, [f])
+		vals = []
+		for i in itertools.count():
+			try:
+				vals.append(tuple(cp.get('structure', '%d' % i).split()))
+			except NoOptionError:
+				break
+		self.structure = tuple(vals)
+
+	def serialize(self):
+		return self.structure
+
+	def deserialize(self, data):
+		self.structure = data
+
+	@staticmethod
+	def validate_structure(val):
+		if val == ('flat',):
+			return True
+		if val[0] == 'filename-hash' and len(val) == 3:
+			if val[1] not in get_valid_checksum_keys():
+				return False
+			# validate cutoffs
+			for c in val[2].split(':'):
+				try:
+					c = int(c)
+				except ValueError:
+					break
+				else:
+					if c % 4 != 0:
+						break
+			else:
+				return True
+			return False
+		return False
+
+	def get_best_supported_layout(self):
+		for val in self.structure:
+			if self.validate_structure(val):
+				if val[0] == 'flat':
+					return FlatLayout()
+				elif val[0] == 'filename-hash':
+					return FilenameHashLayout(val[1], val[2])
+		else:
+			# fallback
+			return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+	"""
+	Get correct fetch URL for a given file, accounting for mirror
+	layout configuration.
+
+	@param mirror_url: Base URL to the mirror (without '/distfiles')
+	@param filename: Filename to fetch
+	@param eroot: EROOT to use for the cache file
+	@return: Full URL to fetch
+	"""
+
+	mirror_conf = MirrorLayoutConfig()
+
+	cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+	try:
+		with open(cache_file, 'r') as f:
+			cache = json.load(f)
+	except (IOError, ValueError):
+		cache = {}
+
+	ts, data = cache.get(mirror_url, (0, None))
+	# refresh at least daily
+	if ts >= time.time() - 86400:
+		mirror_conf.deserialize(data)
+	else:
+		try:
+			f = urlopen(mirror_url + '/distfiles/layout.conf')
+			try:
+				data = io.StringIO(f.read().decode('utf8'))
+			finally:
+				f.close()
+
+			mirror_conf.read_from_file(data)
+		except IOError:
+			pass
+
+		cache[mirror_url] = (time.time(), mirror_conf.serialize())
+		with open(cache_file, 'w') as f:
+			json.dump(cache, f)
+
+	return (mirror_url + "/distfiles/" +
+			mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
 	locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
 	allow_missing_digests=True):
@@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
 	for myfile, myuri in file_uri_tuples:
 		if myfile not in filedict:
 			filedict[myfile]=[]
-			for y in range(0,len(locations)):
-				filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+			for l in locations:
+				filedict[myfile].append(get_mirror_url(l, myfile,
+						mysettings["EROOT"]))
 		if myuri is None:
 			continue
 		if myuri[:9]=="mirror://":
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure
  2019-10-03 16:36 [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure Michał Górny
@ 2019-10-04  5:01 ` Alec Warner
  2019-10-04  5:53   ` Michał Górny
  0 siblings, 1 reply; 3+ messages in thread
From: Alec Warner @ 2019-10-04  5:01 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Michał Górny

[-- Attachment #1: Type: text/plain, Size: 8224 bytes --]

On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <mgorny@gentoo.org> wrote:

> Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
> includes fetching and parsing layout.conf, and support for flat layout
> and filename-hash layout with cutoffs being multiplies of 4.
>
> Bug: https://bugs.gentoo.org/646898
> Signed-off-by: Michał Górny <mgorny@gentoo.org>
> ---
>  lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
>  1 file changed, 135 insertions(+), 4 deletions(-)
>
> Changes in v2: switched to a more classy layout to make the code
> reusable in emirrordist.
>
> diff --git a/lib/portage/package/ebuild/fetch.py
> b/lib/portage/package/ebuild/fetch.py
> index 227bf45ae..18e3d390a 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -7,12 +7,15 @@ __all__ = ['fetch']
>
>  import errno
>  import io
> +import itertools
> +import json
>  import logging
>  import random
>  import re
>  import stat
>  import sys
>  import tempfile
> +import time
>
>  from collections import OrderedDict
>
> @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
>         'portage.package.ebuild.doebuild:doebuild_environment,' + \
>                 '_doebuild_spawn',
>         'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
> +
>  'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
> +       'portage.util._urlopen:urlopen',
>  )
>
>  from portage import os, selinux, shutil, _encodings, \
>         _movefile, _shell_quote, _unicode_encode
>  from portage.checksum import (get_valid_checksum_keys, perform_md5,
> verify_all,
> -       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
> +       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
> +       checksum_str)
>  from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
> -       GLOBAL_CONFIG_PATH
> +       GLOBAL_CONFIG_PATH, CACHE_PATH
>  from portage.data import portage_gid, portage_uid, secpass,
> userpriv_groups
>  from portage.exception import FileNotFound, OperationNotPermitted, \
>         PortageException, TryAgain
> @@ -253,6 +259,130 @@ _size_suffix_map = {
>         'Y' : 80,
>  }
>
> +
> +class FlatLayout(object):
> +       def get_path(self, filename):
> +               return filename
> +
> +
> +class FilenameHashLayout(object):
> +       def __init__(self, algo, cutoffs):
> +               self.algo = algo
> +               self.cutoffs = [int(x) for x in cutoffs.split(':')]
> +
> +       def get_path(self, filename):
> +               fnhash = checksum_str(filename.encode('utf8'), self.algo)
> +               ret = ''
> +               for c in self.cutoffs:
> +                       assert c % 4 == 0
>

I'm not quite sure what this assert is doing. I'm not super in favor of
asserts (I'd rather see an exception like raise FooError("..."), but if you
are going to use it please use something like:

assert c %4 == 0, "Some description of why we put this assert here so if it
fires we can do something useful."

+                       c = c // 4
> +                       ret += fnhash[:c] + '/'
> +                       fnhash = fnhash[c:]
> +               return ret + filename
> +
> +
> +class MirrorLayoutConfig(object):
> +       """
> +       Class to read layout.conf from a mirror.
> +       """
> +
> +       def __init__(self):
> +               self.structure = ()
> +
> +       def read_from_file(self, f):
> +               cp = SafeConfigParser()
> +               read_configs(cp, [f])
> +               vals = []
> +               for i in itertools.count():
> +                       try:
> +                               vals.append(tuple(cp.get('structure', '%d'
> % i).split()))
> +                       except NoOptionError:
> +                               break
> +               self.structure = tuple(vals)
> +
> +       def serialize(self):
> +               return self.structure
> +
> +       def deserialize(self, data):
> +               self.structure = data
> +
> +       @staticmethod
> +       def validate_structure(val):
> +               if val == ('flat',):
> +                       return True
> +               if val[0] == 'filename-hash' and len(val) == 3:
> +                       if val[1] not in get_valid_checksum_keys():
> +                               return False
> +                       # validate cutoffs
> +                       for c in val[2].split(':'):
> +                               try:
> +                                       c = int(c)
> +                               except ValueError:
> +                                       break
> +                               else:
> +                                       if c % 4 != 0:
> +                                               break
> +                       else:
> +                               return True
> +                       return False
> +               return False
> +
> +       def get_best_supported_layout(self):
> +               for val in self.structure:
> +                       if self.validate_structure(val):
> +                               if val[0] == 'flat':
> +                                       return FlatLayout()
> +                               elif val[0] == 'filename-hash':
> +                                       return FilenameHashLayout(val[1],
> val[2])
> +               else:
> +                       # fallback
> +                       return FlatLayout()
> +
> +
> +def get_mirror_url(mirror_url, filename, eroot):
> +       """
> +       Get correct fetch URL for a given file, accounting for mirror
> +       layout configuration.
> +
> +       @param mirror_url: Base URL to the mirror (without '/distfiles')
> +       @param filename: Filename to fetch
> +       @param eroot: EROOT to use for the cache file
> +       @return: Full URL to fetch
> +       """
> +
> +       mirror_conf = MirrorLayoutConfig()
> +
> +       cache_file = os.path.join(eroot, CACHE_PATH,
> 'mirror-metadata.json')
> +       try:
> +               with open(cache_file, 'r') as f:
> +                       cache = json.load(f)
> +       except (IOError, ValueError):
> +               cache = {}
> +
> +       ts, data = cache.get(mirror_url, (0, None))
> +       # refresh at least daily
> +       if ts >= time.time() - 86400:
> +               mirror_conf.deserialize(data)
> +       else:
> +               try:
> +                       f = urlopen(mirror_url + '/distfiles/layout.conf')
> +                       try:
> +                               data = io.StringIO(f.read().decode('utf8'))
> +                       finally:
> +                               f.close()
> +
> +                       mirror_conf.read_from_file(data)
> +               except IOError:
> +                       pass
> +
> +               cache[mirror_url] = (time.time(), mirror_conf.serialize())
> +               with open(cache_file, 'w') as f:
> +                       json.dump(cache, f)
> +
> +       return (mirror_url + "/distfiles/" +
> +
>  mirror_conf.get_best_supported_layout().get_path(filename))
> +
> +
>  def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>         locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
>         allow_missing_digests=True):
> @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>         for myfile, myuri in file_uri_tuples:
>                 if myfile not in filedict:
>                         filedict[myfile]=[]
> -                       for y in range(0,len(locations)):
> -
>  filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
> +                       for l in locations:
> +                               filedict[myfile].append(get_mirror_url(l,
> myfile,
> +                                               mysettings["EROOT"]))
>                 if myuri is None:
>                         continue
>                 if myuri[:9]=="mirror://":
> --
> 2.23.0
>
>
>

[-- Attachment #2: Type: text/html, Size: 10750 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure
  2019-10-04  5:01 ` Alec Warner
@ 2019-10-04  5:53   ` Michał Górny
  0 siblings, 0 replies; 3+ messages in thread
From: Michał Górny @ 2019-10-04  5:53 UTC (permalink / raw
  To: gentoo-portage-dev

[-- Attachment #1: Type: text/plain, Size: 8892 bytes --]

On Thu, 2019-10-03 at 22:01 -0700, Alec Warner wrote:
> On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <mgorny@gentoo.org> wrote:
> 
> > Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
> > includes fetching and parsing layout.conf, and support for flat layout
> > and filename-hash layout with cutoffs being multiplies of 4.
> > 
> > Bug: https://bugs.gentoo.org/646898
> > Signed-off-by: Michał Górny <mgorny@gentoo.org>
> > ---
> >  lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
> >  1 file changed, 135 insertions(+), 4 deletions(-)
> > 
> > Changes in v2: switched to a more classy layout to make the code
> > reusable in emirrordist.
> > 
> > diff --git a/lib/portage/package/ebuild/fetch.py
> > b/lib/portage/package/ebuild/fetch.py
> > index 227bf45ae..18e3d390a 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -7,12 +7,15 @@ __all__ = ['fetch']
> > 
> >  import errno
> >  import io
> > +import itertools
> > +import json
> >  import logging
> >  import random
> >  import re
> >  import stat
> >  import sys
> >  import tempfile
> > +import time
> > 
> >  from collections import OrderedDict
> > 
> > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
> >         'portage.package.ebuild.doebuild:doebuild_environment,' + \
> >                 '_doebuild_spawn',
> >         'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
> > +
> >  'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
> > +       'portage.util._urlopen:urlopen',
> >  )
> > 
> >  from portage import os, selinux, shutil, _encodings, \
> >         _movefile, _shell_quote, _unicode_encode
> >  from portage.checksum import (get_valid_checksum_keys, perform_md5,
> > verify_all,
> > -       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
> > +       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
> > +       checksum_str)
> >  from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
> > -       GLOBAL_CONFIG_PATH
> > +       GLOBAL_CONFIG_PATH, CACHE_PATH
> >  from portage.data import portage_gid, portage_uid, secpass,
> > userpriv_groups
> >  from portage.exception import FileNotFound, OperationNotPermitted, \
> >         PortageException, TryAgain
> > @@ -253,6 +259,130 @@ _size_suffix_map = {
> >         'Y' : 80,
> >  }
> > 
> > +
> > +class FlatLayout(object):
> > +       def get_path(self, filename):
> > +               return filename
> > +
> > +
> > +class FilenameHashLayout(object):
> > +       def __init__(self, algo, cutoffs):
> > +               self.algo = algo
> > +               self.cutoffs = [int(x) for x in cutoffs.split(':')]
> > +
> > +       def get_path(self, filename):
> > +               fnhash = checksum_str(filename.encode('utf8'), self.algo)
> > +               ret = ''
> > +               for c in self.cutoffs:
> > +                       assert c % 4 == 0
> > 
> 
> I'm not quite sure what this assert is doing. I'm not super in favor of
> asserts (I'd rather see an exception like raise FooError("..."), but if you
> are going to use it please use something like:
> 
> assert c %4 == 0, "Some description of why we put this assert here so if it
> fires we can do something useful."

It's already checked in validate_structure().  Maybe I could indirect it
to this class to make things clearer.

> 
> +                       c = c // 4
> > +                       ret += fnhash[:c] + '/'
> > +                       fnhash = fnhash[c:]
> > +               return ret + filename
> > +
> > +
> > +class MirrorLayoutConfig(object):
> > +       """
> > +       Class to read layout.conf from a mirror.
> > +       """
> > +
> > +       def __init__(self):
> > +               self.structure = ()
> > +
> > +       def read_from_file(self, f):
> > +               cp = SafeConfigParser()
> > +               read_configs(cp, [f])
> > +               vals = []
> > +               for i in itertools.count():
> > +                       try:
> > +                               vals.append(tuple(cp.get('structure', '%d'
> > % i).split()))
> > +                       except NoOptionError:
> > +                               break
> > +               self.structure = tuple(vals)
> > +
> > +       def serialize(self):
> > +               return self.structure
> > +
> > +       def deserialize(self, data):
> > +               self.structure = data
> > +
> > +       @staticmethod
> > +       def validate_structure(val):
> > +               if val == ('flat',):
> > +                       return True
> > +               if val[0] == 'filename-hash' and len(val) == 3:
> > +                       if val[1] not in get_valid_checksum_keys():
> > +                               return False
> > +                       # validate cutoffs
> > +                       for c in val[2].split(':'):
> > +                               try:
> > +                                       c = int(c)
> > +                               except ValueError:
> > +                                       break
> > +                               else:
> > +                                       if c % 4 != 0:
> > +                                               break
> > +                       else:
> > +                               return True
> > +                       return False
> > +               return False
> > +
> > +       def get_best_supported_layout(self):
> > +               for val in self.structure:
> > +                       if self.validate_structure(val):
> > +                               if val[0] == 'flat':
> > +                                       return FlatLayout()
> > +                               elif val[0] == 'filename-hash':
> > +                                       return FilenameHashLayout(val[1],
> > val[2])
> > +               else:
> > +                       # fallback
> > +                       return FlatLayout()
> > +
> > +
> > +def get_mirror_url(mirror_url, filename, eroot):
> > +       """
> > +       Get correct fetch URL for a given file, accounting for mirror
> > +       layout configuration.
> > +
> > +       @param mirror_url: Base URL to the mirror (without '/distfiles')
> > +       @param filename: Filename to fetch
> > +       @param eroot: EROOT to use for the cache file
> > +       @return: Full URL to fetch
> > +       """
> > +
> > +       mirror_conf = MirrorLayoutConfig()
> > +
> > +       cache_file = os.path.join(eroot, CACHE_PATH,
> > 'mirror-metadata.json')
> > +       try:
> > +               with open(cache_file, 'r') as f:
> > +                       cache = json.load(f)
> > +       except (IOError, ValueError):
> > +               cache = {}
> > +
> > +       ts, data = cache.get(mirror_url, (0, None))
> > +       # refresh at least daily
> > +       if ts >= time.time() - 86400:
> > +               mirror_conf.deserialize(data)
> > +       else:
> > +               try:
> > +                       f = urlopen(mirror_url + '/distfiles/layout.conf')
> > +                       try:
> > +                               data = io.StringIO(f.read().decode('utf8'))
> > +                       finally:
> > +                               f.close()
> > +
> > +                       mirror_conf.read_from_file(data)
> > +               except IOError:
> > +                       pass
> > +
> > +               cache[mirror_url] = (time.time(), mirror_conf.serialize())
> > +               with open(cache_file, 'w') as f:
> > +                       json.dump(cache, f)
> > +
> > +       return (mirror_url + "/distfiles/" +
> > +
> >  mirror_conf.get_best_supported_layout().get_path(filename))
> > +
> > +
> >  def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >         locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
> >         allow_missing_digests=True):
> > @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >         for myfile, myuri in file_uri_tuples:
> >                 if myfile not in filedict:
> >                         filedict[myfile]=[]
> > -                       for y in range(0,len(locations)):
> > -
> >  filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
> > +                       for l in locations:
> > +                               filedict[myfile].append(get_mirror_url(l,
> > myfile,
> > +                                               mysettings["EROOT"]))
> >                 if myuri is None:
> >                         continue
> >                 if myuri[:9]=="mirror://":
> > --
> > 2.23.0
> > 
> > 
> > 

-- 
Best regards,
Michał Górny


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 618 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-10-04  5:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-10-03 16:36 [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure Michał Górny
2019-10-04  5:01 ` Alec Warner
2019-10-04  5:53   ` Michał Górny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox