public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Michał Górny" <mgorny@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Cc: "Michał Górny" <mgorny@gentoo.org>
Subject: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure
Date: Thu,  3 Oct 2019 18:36:32 +0200	[thread overview]
Message-ID: <20191003163632.7231-1-mgorny@gentoo.org> (raw)

Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <mgorny@gentoo.org>
---
 lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

Changes in v2: switched to a more classy layout to make the code
reusable in emirrordist.

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..18e3d390a 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -7,12 +7,15 @@ __all__ = ['fetch']
 
 import errno
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time
 
 from collections import OrderedDict
 
@@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
 	'portage.package.ebuild.doebuild:doebuild_environment,' + \
 		'_doebuild_spawn',
 	'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+	'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
+	'portage.util._urlopen:urlopen',
 )
 
 from portage import os, selinux, shutil, _encodings, \
 	_movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
-	_filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+	_filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+	checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
-	GLOBAL_CONFIG_PATH
+	GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
 	PortageException, TryAgain
@@ -253,6 +259,130 @@ _size_suffix_map = {
 	'Y' : 80,
 }
 
+
+class FlatLayout(object):
+	def get_path(self, filename):
+		return filename
+
+
+class FilenameHashLayout(object):
+	def __init__(self, algo, cutoffs):
+		self.algo = algo
+		self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+	def get_path(self, filename):
+		fnhash = checksum_str(filename.encode('utf8'), self.algo)
+		ret = ''
+		for c in self.cutoffs:
+			assert c % 4 == 0
+			c = c // 4
+			ret += fnhash[:c] + '/'
+			fnhash = fnhash[c:]
+		return ret + filename
+
+
+class MirrorLayoutConfig(object):
+	"""
+	Class to read layout.conf from a mirror.
+	"""
+
+	def __init__(self):
+		self.structure = ()
+
+	def read_from_file(self, f):
+		cp = SafeConfigParser()
+		read_configs(cp, [f])
+		vals = []
+		for i in itertools.count():
+			try:
+				vals.append(tuple(cp.get('structure', '%d' % i).split()))
+			except NoOptionError:
+				break
+		self.structure = tuple(vals)
+
+	def serialize(self):
+		return self.structure
+
+	def deserialize(self, data):
+		self.structure = data
+
+	@staticmethod
+	def validate_structure(val):
+		if val == ('flat',):
+			return True
+		if val[0] == 'filename-hash' and len(val) == 3:
+			if val[1] not in get_valid_checksum_keys():
+				return False
+			# validate cutoffs
+			for c in val[2].split(':'):
+				try:
+					c = int(c)
+				except ValueError:
+					break
+				else:
+					if c % 4 != 0:
+						break
+			else:
+				return True
+			return False
+		return False
+
+	def get_best_supported_layout(self):
+		for val in self.structure:
+			if self.validate_structure(val):
+				if val[0] == 'flat':
+					return FlatLayout()
+				elif val[0] == 'filename-hash':
+					return FilenameHashLayout(val[1], val[2])
+		else:
+			# fallback
+			return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+	"""
+	Get correct fetch URL for a given file, accounting for mirror
+	layout configuration.
+
+	@param mirror_url: Base URL to the mirror (without '/distfiles')
+	@param filename: Filename to fetch
+	@param eroot: EROOT to use for the cache file
+	@return: Full URL to fetch
+	"""
+
+	mirror_conf = MirrorLayoutConfig()
+
+	cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+	try:
+		with open(cache_file, 'r') as f:
+			cache = json.load(f)
+	except (IOError, ValueError):
+		cache = {}
+
+	ts, data = cache.get(mirror_url, (0, None))
+	# refresh at least daily
+	if ts >= time.time() - 86400:
+		mirror_conf.deserialize(data)
+	else:
+		try:
+			f = urlopen(mirror_url + '/distfiles/layout.conf')
+			try:
+				data = io.StringIO(f.read().decode('utf8'))
+			finally:
+				f.close()
+
+			mirror_conf.read_from_file(data)
+		except IOError:
+			pass
+
+		cache[mirror_url] = (time.time(), mirror_conf.serialize())
+		with open(cache_file, 'w') as f:
+			json.dump(cache, f)
+
+	return (mirror_url + "/distfiles/" +
+			mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
 	locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
 	allow_missing_digests=True):
@@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
 	for myfile, myuri in file_uri_tuples:
 		if myfile not in filedict:
 			filedict[myfile]=[]
-			for y in range(0,len(locations)):
-				filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+			for l in locations:
+				filedict[myfile].append(get_mirror_url(l, myfile,
+						mysettings["EROOT"]))
 		if myuri is None:
 			continue
 		if myuri[:9]=="mirror://":
-- 
2.23.0



             reply	other threads:[~2019-10-03 16:37 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-03 16:36 Michał Górny [this message]
2019-10-04  5:01 ` [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure Alec Warner
2019-10-04  5:53   ` Michał Górny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191003163632.7231-1-mgorny@gentoo.org \
    --to=mgorny@gentoo.org \
    --cc=gentoo-portage-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox