public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-portage-dev] [PATCH v3] fetch: Support GLEP 75 mirror structure
@ 2019-10-04  9:18 Michał Górny
  0 siblings, 0 replies; only message in thread
From: Michał Górny @ 2019-10-04  9:18 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Michał Górny

Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <mgorny@gentoo.org>
---
 lib/portage/package/ebuild/fetch.py    | 158 ++++++++++++++++++++++++-
 lib/portage/tests/ebuild/test_fetch.py |  94 ++++++++++++++-
 2 files changed, 247 insertions(+), 5 deletions(-)

Chages in v3:
- mirrors are evaluated lazily (i.e. Portage doesn't fetch layouts
  for all mirrors prematurely),
- garbage layout.conf is handled gracefully,
- cache updates are done atomically,
- layout.conf argument verification has been moved to invidual classes,
- a few unit and integration tests have been added.

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..fa250535f 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -6,13 +6,17 @@ from __future__ import print_function
 __all__ = ['fetch']
 
 import errno
+import functools
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time
 
 from collections import OrderedDict
 
@@ -27,14 +31,19 @@ portage.proxy.lazyimport.lazyimport(globals(),
 	'portage.package.ebuild.doebuild:doebuild_environment,' + \
 		'_doebuild_spawn',
 	'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+	'portage.util:atomic_ofstream',
+	'portage.util.configparser:SafeConfigParser,read_configs,' +
+		'NoOptionError,ConfigParserError',
+	'portage.util._urlopen:urlopen',
 )
 
 from portage import os, selinux, shutil, _encodings, \
 	_movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
-	_filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+	_filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+	checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
-	GLOBAL_CONFIG_PATH
+	GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
 	PortageException, TryAgain
@@ -253,6 +262,144 @@ _size_suffix_map = {
 	'Y' : 80,
 }
 
+
+class FlatLayout(object):
+	def get_path(self, filename):
+		return filename
+
+	@staticmethod
+	def verify_args(args):
+		return len(args) == 1
+
+
+class FilenameHashLayout(object):
+	def __init__(self, algo, cutoffs):
+		self.algo = algo
+		self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+	def get_path(self, filename):
+		fnhash = checksum_str(filename.encode('utf8'), self.algo)
+		ret = ''
+		for c in self.cutoffs:
+			assert c % 4 == 0
+			c = c // 4
+			ret += fnhash[:c] + '/'
+			fnhash = fnhash[c:]
+		return ret + filename
+
+	@staticmethod
+	def verify_args(args):
+		if len(args) != 3:
+			return False
+		if args[1] not in get_valid_checksum_keys():
+			return False
+		# argsidate cutoffs
+		for c in args[2].split(':'):
+			try:
+				c = int(c)
+			except ValueError:
+				break
+			else:
+				if c % 4 != 0:
+					break
+		else:
+			return True
+		return False
+
+
+class MirrorLayoutConfig(object):
+	"""
+	Class to read layout.conf from a mirror.
+	"""
+
+	def __init__(self):
+		self.structure = ()
+
+	def read_from_file(self, f):
+		cp = SafeConfigParser()
+		read_configs(cp, [f])
+		vals = []
+		for i in itertools.count():
+			try:
+				vals.append(tuple(cp.get('structure', '%d' % i).split()))
+			except NoOptionError:
+				break
+		self.structure = tuple(vals)
+
+	def serialize(self):
+		return self.structure
+
+	def deserialize(self, data):
+		self.structure = data
+
+	@staticmethod
+	def validate_structure(val):
+		if val[0] == 'flat':
+			return FlatLayout.verify_args(val)
+		if val[0] == 'filename-hash':
+			return FilenameHashLayout.verify_args(val)
+		return False
+
+	def get_best_supported_layout(self):
+		for val in self.structure:
+			if self.validate_structure(val):
+				if val[0] == 'flat':
+					return FlatLayout(*val[1:])
+				elif val[0] == 'filename-hash':
+					return FilenameHashLayout(*val[1:])
+		else:
+			# fallback
+			return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+	"""
+	Get correct fetch URL for a given file, accounting for mirror
+	layout configuration.
+
+	@param mirror_url: Base URL to the mirror (without '/distfiles')
+	@param filename: Filename to fetch
+	@param eroot: EROOT to use for the cache file
+	@return: Full URL to fetch
+	"""
+
+	mirror_conf = MirrorLayoutConfig()
+
+	cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+	try:
+		with open(cache_file, 'r') as f:
+			cache = json.load(f)
+	except (IOError, ValueError):
+		cache = {}
+
+	ts, data = cache.get(mirror_url, (0, None))
+	# refresh at least daily
+	if ts >= time.time() - 86400:
+		mirror_conf.deserialize(data)
+	else:
+		try:
+			f = urlopen(mirror_url + '/distfiles/layout.conf')
+			try:
+				data = io.StringIO(f.read().decode('utf8'))
+			finally:
+				f.close()
+
+			try:
+				mirror_conf.read_from_file(data)
+			except ConfigParserError:
+				pass
+		except IOError:
+			pass
+
+		cache[mirror_url] = (time.time(), mirror_conf.serialize())
+		f = atomic_ofstream(cache_file, 'w')
+		json.dump(cache, f)
+		f.close()
+
+	return (mirror_url + "/distfiles/" +
+			mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
 	locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
 	allow_missing_digests=True):
@@ -434,8 +581,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
 	for myfile, myuri in file_uri_tuples:
 		if myfile not in filedict:
 			filedict[myfile]=[]
-			for y in range(0,len(locations)):
-				filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+			for l in locations:
+				filedict[myfile].append(functools.partial(
+					get_mirror_url, l, myfile, mysettings["EROOT"]))
 		if myuri is None:
 			continue
 		if myuri[:9]=="mirror://":
@@ -895,6 +1043,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
 			tried_locations = set()
 			while uri_list:
 				loc = uri_list.pop()
+				if isinstance(loc, functools.partial):
+					loc = loc()
 				# Eliminate duplicates here in case we've switched to
 				# "primaryuri" mode on the fly due to a checksum failure.
 				if loc in tried_locations:
diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py
index 83321fed7..f2254c468 100644
--- a/lib/portage/tests/ebuild/test_fetch.py
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -4,6 +4,7 @@
 from __future__ import unicode_literals
 
 import functools
+import io
 import tempfile
 
 import portage
@@ -11,12 +12,14 @@ from portage import shutil, os
 from portage.tests import TestCase
 from portage.tests.resolver.ResolverPlayground import ResolverPlayground
 from portage.tests.util.test_socks5 import AsyncHTTPServer
+from portage.util.configparser import ConfigParserError
 from portage.util.futures.executor.fork import ForkExecutor
 from portage.util._async.SchedulerInterface import SchedulerInterface
 from portage.util._eventloop.global_event_loop import global_event_loop
 from portage.package.ebuild.config import config
 from portage.package.ebuild.digestgen import digestgen
-from portage.package.ebuild.fetch import _download_suffix
+from portage.package.ebuild.fetch import (_download_suffix, FlatLayout,
+		FilenameHashLayout, MirrorLayoutConfig)
 from _emerge.EbuildFetcher import EbuildFetcher
 from _emerge.Package import Package
 
@@ -228,3 +231,92 @@ class EbuildFetchTestCase(TestCase):
 			finally:
 				shutil.rmtree(ro_distdir)
 				playground.cleanup()
+
+	def test_flat_layout(self):
+		self.assertTrue(FlatLayout.verify_args(('flat',)))
+		self.assertFalse(FlatLayout.verify_args(('flat', 'extraneous-arg')))
+		self.assertEqual(FlatLayout().get_path('foo-1.tar.gz'), 'foo-1.tar.gz')
+
+	def test_filename_hash_layout(self):
+		self.assertFalse(FilenameHashLayout.verify_args(('filename-hash',)))
+		self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '8')))
+		self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'INVALID-HASH', '8')))
+		self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:12')))
+		self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '3')))
+		self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 'junk')))
+		self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:junk')))
+
+		self.assertEqual(FilenameHashLayout('SHA1', '4').get_path('foo-1.tar.gz'),
+				'1/foo-1.tar.gz')
+		self.assertEqual(FilenameHashLayout('SHA1', '8').get_path('foo-1.tar.gz'),
+				'19/foo-1.tar.gz')
+		self.assertEqual(FilenameHashLayout('SHA1', '8:16').get_path('foo-1.tar.gz'),
+				'19/c3b6/foo-1.tar.gz')
+		self.assertEqual(FilenameHashLayout('SHA1', '8:16:24').get_path('foo-1.tar.gz'),
+				'19/c3b6/37a94b/foo-1.tar.gz')
+
+	def test_mirror_layout_config(self):
+		mlc = MirrorLayoutConfig()
+		self.assertEqual(mlc.serialize(), ())
+		self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
+
+		conf = '''
+[structure]
+0=flat
+'''
+		mlc.read_from_file(io.StringIO(conf))
+		self.assertEqual(mlc.serialize(), (('flat',),))
+		self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
+		self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+				'foo-1.tar.gz')
+
+		conf = '''
+[structure]
+0=filename-hash SHA1 8:16
+1=flat
+'''
+		mlc.read_from_file(io.StringIO(conf))
+		self.assertEqual(mlc.serialize(), (
+			('filename-hash', 'SHA1', '8:16'),
+			('flat',)
+		))
+		self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+		self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+				'19/c3b6/foo-1.tar.gz')
+		serialized = mlc.serialize()
+
+		# test fallback
+		conf = '''
+[structure]
+0=filename-hash INVALID-HASH 8:16
+1=filename-hash SHA1 32
+2=flat
+'''
+		mlc.read_from_file(io.StringIO(conf))
+		self.assertEqual(mlc.serialize(), (
+			('filename-hash', 'INVALID-HASH', '8:16'),
+			('filename-hash', 'SHA1', '32'),
+			('flat',)
+		))
+		self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+		self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+				'19c3b637/foo-1.tar.gz')
+
+		# test deserialization
+		mlc.deserialize(serialized)
+		self.assertEqual(mlc.serialize(), (
+			('filename-hash', 'SHA1', '8:16'),
+			('flat',)
+		))
+		self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+		self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+				'19/c3b6/foo-1.tar.gz')
+
+		# test erraneous input
+		conf = '''
+[#(*DA*&*F
+[structure]
+0=filename-hash SHA1 32
+'''
+		self.assertRaises(ConfigParserError, mlc.read_from_file,
+				io.StringIO(conf))
-- 
2.23.0



^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2019-10-04  9:19 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-10-04  9:18 [gentoo-portage-dev] [PATCH v3] fetch: Support GLEP 75 mirror structure Michał Górny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox