* [gentoo-portage-dev] [PATCH v3] fetch: Support GLEP 75 mirror structure
@ 2019-10-04 9:18 Michał Górny
0 siblings, 0 replies; only message in thread
From: Michał Górny @ 2019-10-04 9:18 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Michał Górny
Add a support for the subset of GLEP 75 needed by Gentoo Infra. This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.
Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <mgorny@gentoo.org>
---
lib/portage/package/ebuild/fetch.py | 158 ++++++++++++++++++++++++-
lib/portage/tests/ebuild/test_fetch.py | 94 ++++++++++++++-
2 files changed, 247 insertions(+), 5 deletions(-)
Chages in v3:
- mirrors are evaluated lazily (i.e. Portage doesn't fetch layouts
for all mirrors prematurely),
- garbage layout.conf is handled gracefully,
- cache updates are done atomically,
- layout.conf argument verification has been moved to invidual classes,
- a few unit and integration tests have been added.
diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..fa250535f 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -6,13 +6,17 @@ from __future__ import print_function
__all__ = ['fetch']
import errno
+import functools
import io
+import itertools
+import json
import logging
import random
import re
import stat
import sys
import tempfile
+import time
from collections import OrderedDict
@@ -27,14 +31,19 @@ portage.proxy.lazyimport.lazyimport(globals(),
'portage.package.ebuild.doebuild:doebuild_environment,' + \
'_doebuild_spawn',
'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+ 'portage.util:atomic_ofstream',
+ 'portage.util.configparser:SafeConfigParser,read_configs,' +
+ 'NoOptionError,ConfigParserError',
+ 'portage.util._urlopen:urlopen',
)
from portage import os, selinux, shutil, _encodings, \
_movefile, _shell_quote, _unicode_encode
from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
- _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+ _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+ checksum_str)
from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
- GLOBAL_CONFIG_PATH
+ GLOBAL_CONFIG_PATH, CACHE_PATH
from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
from portage.exception import FileNotFound, OperationNotPermitted, \
PortageException, TryAgain
@@ -253,6 +262,144 @@ _size_suffix_map = {
'Y' : 80,
}
+
+class FlatLayout(object):
+ def get_path(self, filename):
+ return filename
+
+ @staticmethod
+ def verify_args(args):
+ return len(args) == 1
+
+
+class FilenameHashLayout(object):
+ def __init__(self, algo, cutoffs):
+ self.algo = algo
+ self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+ def get_path(self, filename):
+ fnhash = checksum_str(filename.encode('utf8'), self.algo)
+ ret = ''
+ for c in self.cutoffs:
+ assert c % 4 == 0
+ c = c // 4
+ ret += fnhash[:c] + '/'
+ fnhash = fnhash[c:]
+ return ret + filename
+
+ @staticmethod
+ def verify_args(args):
+ if len(args) != 3:
+ return False
+ if args[1] not in get_valid_checksum_keys():
+ return False
+ # argsidate cutoffs
+ for c in args[2].split(':'):
+ try:
+ c = int(c)
+ except ValueError:
+ break
+ else:
+ if c % 4 != 0:
+ break
+ else:
+ return True
+ return False
+
+
+class MirrorLayoutConfig(object):
+ """
+ Class to read layout.conf from a mirror.
+ """
+
+ def __init__(self):
+ self.structure = ()
+
+ def read_from_file(self, f):
+ cp = SafeConfigParser()
+ read_configs(cp, [f])
+ vals = []
+ for i in itertools.count():
+ try:
+ vals.append(tuple(cp.get('structure', '%d' % i).split()))
+ except NoOptionError:
+ break
+ self.structure = tuple(vals)
+
+ def serialize(self):
+ return self.structure
+
+ def deserialize(self, data):
+ self.structure = data
+
+ @staticmethod
+ def validate_structure(val):
+ if val[0] == 'flat':
+ return FlatLayout.verify_args(val)
+ if val[0] == 'filename-hash':
+ return FilenameHashLayout.verify_args(val)
+ return False
+
+ def get_best_supported_layout(self):
+ for val in self.structure:
+ if self.validate_structure(val):
+ if val[0] == 'flat':
+ return FlatLayout(*val[1:])
+ elif val[0] == 'filename-hash':
+ return FilenameHashLayout(*val[1:])
+ else:
+ # fallback
+ return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+ """
+ Get correct fetch URL for a given file, accounting for mirror
+ layout configuration.
+
+ @param mirror_url: Base URL to the mirror (without '/distfiles')
+ @param filename: Filename to fetch
+ @param eroot: EROOT to use for the cache file
+ @return: Full URL to fetch
+ """
+
+ mirror_conf = MirrorLayoutConfig()
+
+ cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+ try:
+ with open(cache_file, 'r') as f:
+ cache = json.load(f)
+ except (IOError, ValueError):
+ cache = {}
+
+ ts, data = cache.get(mirror_url, (0, None))
+ # refresh at least daily
+ if ts >= time.time() - 86400:
+ mirror_conf.deserialize(data)
+ else:
+ try:
+ f = urlopen(mirror_url + '/distfiles/layout.conf')
+ try:
+ data = io.StringIO(f.read().decode('utf8'))
+ finally:
+ f.close()
+
+ try:
+ mirror_conf.read_from_file(data)
+ except ConfigParserError:
+ pass
+ except IOError:
+ pass
+
+ cache[mirror_url] = (time.time(), mirror_conf.serialize())
+ f = atomic_ofstream(cache_file, 'w')
+ json.dump(cache, f)
+ f.close()
+
+ return (mirror_url + "/distfiles/" +
+ mirror_conf.get_best_supported_layout().get_path(filename))
+
+
def fetch(myuris, mysettings, listonly=0, fetchonly=0,
locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
allow_missing_digests=True):
@@ -434,8 +581,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
for myfile, myuri in file_uri_tuples:
if myfile not in filedict:
filedict[myfile]=[]
- for y in range(0,len(locations)):
- filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+ for l in locations:
+ filedict[myfile].append(functools.partial(
+ get_mirror_url, l, myfile, mysettings["EROOT"]))
if myuri is None:
continue
if myuri[:9]=="mirror://":
@@ -895,6 +1043,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
tried_locations = set()
while uri_list:
loc = uri_list.pop()
+ if isinstance(loc, functools.partial):
+ loc = loc()
# Eliminate duplicates here in case we've switched to
# "primaryuri" mode on the fly due to a checksum failure.
if loc in tried_locations:
diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py
index 83321fed7..f2254c468 100644
--- a/lib/portage/tests/ebuild/test_fetch.py
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -4,6 +4,7 @@
from __future__ import unicode_literals
import functools
+import io
import tempfile
import portage
@@ -11,12 +12,14 @@ from portage import shutil, os
from portage.tests import TestCase
from portage.tests.resolver.ResolverPlayground import ResolverPlayground
from portage.tests.util.test_socks5 import AsyncHTTPServer
+from portage.util.configparser import ConfigParserError
from portage.util.futures.executor.fork import ForkExecutor
from portage.util._async.SchedulerInterface import SchedulerInterface
from portage.util._eventloop.global_event_loop import global_event_loop
from portage.package.ebuild.config import config
from portage.package.ebuild.digestgen import digestgen
-from portage.package.ebuild.fetch import _download_suffix
+from portage.package.ebuild.fetch import (_download_suffix, FlatLayout,
+ FilenameHashLayout, MirrorLayoutConfig)
from _emerge.EbuildFetcher import EbuildFetcher
from _emerge.Package import Package
@@ -228,3 +231,92 @@ class EbuildFetchTestCase(TestCase):
finally:
shutil.rmtree(ro_distdir)
playground.cleanup()
+
+ def test_flat_layout(self):
+ self.assertTrue(FlatLayout.verify_args(('flat',)))
+ self.assertFalse(FlatLayout.verify_args(('flat', 'extraneous-arg')))
+ self.assertEqual(FlatLayout().get_path('foo-1.tar.gz'), 'foo-1.tar.gz')
+
+ def test_filename_hash_layout(self):
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash',)))
+ self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '8')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'INVALID-HASH', '8')))
+ self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:12')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '3')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 'junk')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:junk')))
+
+ self.assertEqual(FilenameHashLayout('SHA1', '4').get_path('foo-1.tar.gz'),
+ '1/foo-1.tar.gz')
+ self.assertEqual(FilenameHashLayout('SHA1', '8').get_path('foo-1.tar.gz'),
+ '19/foo-1.tar.gz')
+ self.assertEqual(FilenameHashLayout('SHA1', '8:16').get_path('foo-1.tar.gz'),
+ '19/c3b6/foo-1.tar.gz')
+ self.assertEqual(FilenameHashLayout('SHA1', '8:16:24').get_path('foo-1.tar.gz'),
+ '19/c3b6/37a94b/foo-1.tar.gz')
+
+ def test_mirror_layout_config(self):
+ mlc = MirrorLayoutConfig()
+ self.assertEqual(mlc.serialize(), ())
+ self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
+
+ conf = '''
+[structure]
+0=flat
+'''
+ mlc.read_from_file(io.StringIO(conf))
+ self.assertEqual(mlc.serialize(), (('flat',),))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ 'foo-1.tar.gz')
+
+ conf = '''
+[structure]
+0=filename-hash SHA1 8:16
+1=flat
+'''
+ mlc.read_from_file(io.StringIO(conf))
+ self.assertEqual(mlc.serialize(), (
+ ('filename-hash', 'SHA1', '8:16'),
+ ('flat',)
+ ))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ '19/c3b6/foo-1.tar.gz')
+ serialized = mlc.serialize()
+
+ # test fallback
+ conf = '''
+[structure]
+0=filename-hash INVALID-HASH 8:16
+1=filename-hash SHA1 32
+2=flat
+'''
+ mlc.read_from_file(io.StringIO(conf))
+ self.assertEqual(mlc.serialize(), (
+ ('filename-hash', 'INVALID-HASH', '8:16'),
+ ('filename-hash', 'SHA1', '32'),
+ ('flat',)
+ ))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ '19c3b637/foo-1.tar.gz')
+
+ # test deserialization
+ mlc.deserialize(serialized)
+ self.assertEqual(mlc.serialize(), (
+ ('filename-hash', 'SHA1', '8:16'),
+ ('flat',)
+ ))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ '19/c3b6/foo-1.tar.gz')
+
+ # test erraneous input
+ conf = '''
+[#(*DA*&*F
+[structure]
+0=filename-hash SHA1 32
+'''
+ self.assertRaises(ConfigParserError, mlc.read_from_file,
+ io.StringIO(conf))
--
2.23.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2019-10-04 9:19 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-10-04 9:18 [gentoo-portage-dev] [PATCH v3] fetch: Support GLEP 75 mirror structure Michał Górny
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox