* [gentoo-portage-dev] [PATCH] fetch: atomic downloads (bug 175612)
@ 2019-04-28 9:14 Zac Medico
0 siblings, 0 replies; only message in thread
From: Zac Medico @ 2019-04-28 9:14 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
Direct FETCHCOMMAND/RESUMECOMMAND output to a temporary file with
a constant .__download__ suffix, and atomically rename the file
to remove the suffix only after the download has completed
successfully (includes digest verification when applicable).
Also add unit tests to cover most fetch cases.
Bug: https://bugs.gentoo.org/175612
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
lib/_emerge/BinpkgVerifier.py | 2 +-
lib/portage/package/ebuild/fetch.py | 97 ++++++++-----
lib/portage/tests/ebuild/test_fetch.py | 186 +++++++++++++++++++++++++
3 files changed, 251 insertions(+), 34 deletions(-)
create mode 100644 lib/portage/tests/ebuild/test_fetch.py
diff --git a/lib/_emerge/BinpkgVerifier.py b/lib/_emerge/BinpkgVerifier.py
index 7a6d15e80..bde1328ea 100644
--- a/lib/_emerge/BinpkgVerifier.py
+++ b/lib/_emerge/BinpkgVerifier.py
@@ -108,7 +108,7 @@ class BinpkgVerifier(CompositeTask):
def _digest_exception(self, name, value, expected):
head, tail = os.path.split(self._pkg_path)
- temp_filename = _checksum_failure_temp_file(head, tail)
+ temp_filename = _checksum_failure_temp_file(self.pkg.root_config.settings, head, tail)
self.scheduler.output((
"\n!!! Digest verification failed:\n"
diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index bfd97601c..cd4a5955c 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -30,7 +30,7 @@ portage.proxy.lazyimport.lazyimport(globals(),
)
from portage import os, selinux, shutil, _encodings, \
- _shell_quote, _unicode_encode
+ _movefile, _shell_quote, _unicode_encode
from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
_filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
@@ -46,6 +46,8 @@ from portage.util import apply_recursive_permissions, \
varexpand, writemsg, writemsg_level, writemsg_stdout
from portage.process import spawn
+_download_suffix = '.__download__'
+
_userpriv_spawn_kwargs = (
("uid", portage_uid),
("gid", portage_gid),
@@ -139,7 +141,7 @@ def _userpriv_test_write_file(settings, file_path):
_userpriv_test_write_file_cache[file_path] = rval
return rval
-def _checksum_failure_temp_file(distdir, basename):
+def _checksum_failure_temp_file(settings, distdir, basename):
"""
First try to find a duplicate temp file with the same checksum and return
that filename if available. Otherwise, use mkstemp to create a new unique
@@ -149,9 +151,13 @@ def _checksum_failure_temp_file(distdir, basename):
"""
filename = os.path.join(distdir, basename)
+ if basename.endswith(_download_suffix):
+ normal_basename = basename[:-len(_download_suffix)]
+ else:
+ normal_basename = basename
size = os.stat(filename).st_size
checksum = None
- tempfile_re = re.compile(re.escape(basename) + r'\._checksum_failure_\..*')
+ tempfile_re = re.compile(re.escape(normal_basename) + r'\._checksum_failure_\..*')
for temp_filename in os.listdir(distdir):
if not tempfile_re.match(temp_filename):
continue
@@ -173,9 +179,9 @@ def _checksum_failure_temp_file(distdir, basename):
return temp_filename
fd, temp_filename = \
- tempfile.mkstemp("", basename + "._checksum_failure_.", distdir)
+ tempfile.mkstemp("", normal_basename + "._checksum_failure_.", distdir)
os.close(fd)
- os.rename(filename, temp_filename)
+ _movefile(filename, temp_filename, mysettings=settings)
return temp_filename
def _check_digests(filename, digests, show_errors=1):
@@ -602,6 +608,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
pruned_digests["size"] = size
myfile_path = os.path.join(mysettings["DISTDIR"], myfile)
+ download_path = myfile_path + _download_suffix
has_space = True
has_space_superuser = True
file_lock = None
@@ -679,12 +686,15 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
del e
continue
- if distdir_writable and mystat is None:
- # Remove broken symlinks if necessary.
+ # Remove broken symlinks or symlinks to files which
+ # _check_distfile did not match above.
+ if distdir_writable and mystat is None or os.path.islink(myfile_path):
try:
os.unlink(myfile_path)
- except OSError:
- pass
+ except OSError as e:
+ if e.errno not in (errno.ENOENT, errno.ESTALE):
+ raise
+ mystat = None
if mystat is not None:
if stat.S_ISDIR(mystat.st_mode):
@@ -695,10 +705,28 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
level=logging.ERROR, noiselevel=-1)
return 0
+ # Since _check_distfile did not match above, the file
+ # is either corrupt or its identity has changed since
+ # the last time it was fetched, so rename it.
+ temp_filename = \
+ _checksum_failure_temp_file(
+ mysettings, mysettings["DISTDIR"], myfile)
+ writemsg_stdout(_("Refetching... "
+ "File renamed to '%s'\n\n") % \
+ temp_filename, noiselevel=-1)
+
+ # Stat the temporary download file for comparison with
+ # fetch_resume_size.
+ try:
+ mystat = os.stat(download_path)
+ except OSError:
+ mystat = None
+
+ if mystat is not None:
if mystat.st_size == 0:
if distdir_writable:
try:
- os.unlink(myfile_path)
+ os.unlink(download_path)
except OSError:
pass
elif distdir_writable and size is not None:
@@ -717,14 +745,14 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
"ME_MIN_SIZE)\n") % mystat.st_size)
temp_filename = \
_checksum_failure_temp_file(
- mysettings["DISTDIR"], myfile)
+ mysettings, mysettings["DISTDIR"], os.path.basename(download_path))
writemsg_stdout(_("Refetching... "
"File renamed to '%s'\n\n") % \
temp_filename, noiselevel=-1)
elif mystat.st_size >= size:
temp_filename = \
_checksum_failure_temp_file(
- mysettings["DISTDIR"], myfile)
+ mysettings, mysettings["DISTDIR"], os.path.basename(download_path))
writemsg_stdout(_("Refetching... "
"File renamed to '%s'\n\n") % \
temp_filename, noiselevel=-1)
@@ -766,7 +794,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
for mydir in fsmirrors:
mirror_file = os.path.join(mydir, myfile)
try:
- shutil.copyfile(mirror_file, myfile_path)
+ shutil.copyfile(mirror_file, download_path)
writemsg(_("Local mirror has file: %s\n") % myfile)
break
except (IOError, OSError) as e:
@@ -775,7 +803,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
del e
try:
- mystat = os.stat(myfile_path)
+ mystat = os.stat(download_path)
except OSError as e:
if e.errno not in (errno.ENOENT, errno.ESTALE):
raise
@@ -784,13 +812,13 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
# Skip permission adjustment for symlinks, since we don't
# want to modify anything outside of the primary DISTDIR,
# and symlinks typically point to PORTAGE_RO_DISTDIRS.
- if not os.path.islink(myfile_path):
+ if not os.path.islink(download_path):
try:
- apply_secpass_permissions(myfile_path,
+ apply_secpass_permissions(download_path,
gid=portage_gid, mode=0o664, mask=0o2,
stat_cached=mystat)
except PortageException as e:
- if not os.access(myfile_path, os.R_OK):
+ if not os.access(download_path, os.R_OK):
writemsg(_("!!! Failed to adjust permissions:"
" %s\n") % (e,), noiselevel=-1)
@@ -799,7 +827,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
if mystat.st_size == 0:
if distdir_writable:
try:
- os.unlink(myfile_path)
+ os.unlink(download_path)
except EnvironmentError:
pass
elif myfile not in mydigests:
@@ -824,7 +852,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
digests = _filter_unaccelarated_hashes(mydigests[myfile])
if hash_filter is not None:
digests = _apply_hash_filter(digests, hash_filter)
- verified_ok, reason = verify_all(myfile_path, digests)
+ verified_ok, reason = verify_all(download_path, digests)
if not verified_ok:
writemsg(_("!!! Previously fetched"
" file: '%s'\n") % myfile, noiselevel=-1)
@@ -838,11 +866,12 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
if distdir_writable:
temp_filename = \
_checksum_failure_temp_file(
- mysettings["DISTDIR"], myfile)
+ mysettings, mysettings["DISTDIR"], os.path.basename(download_path))
writemsg_stdout(_("Refetching... "
"File renamed to '%s'\n\n") % \
temp_filename, noiselevel=-1)
else:
+ _movefile(download_path, myfile_path, mysettings=mysettings)
eout = EOutput()
eout.quiet = \
mysettings.get("PORTAGE_QUIET", None) == "1"
@@ -928,7 +957,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
if not can_fetch:
if fetched != 2:
try:
- mysize = os.stat(myfile_path).st_size
+ mysize = os.stat(download_path).st_size
except OSError as e:
if e.errno not in (errno.ENOENT, errno.ESTALE):
raise
@@ -952,7 +981,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
#we either need to resume or start the download
if fetched == 1:
try:
- mystat = os.stat(myfile_path)
+ mystat = os.stat(download_path)
except OSError as e:
if e.errno not in (errno.ENOENT, errno.ESTALE):
raise
@@ -964,7 +993,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
"%d (smaller than " "PORTAGE_FETCH_RESU"
"ME_MIN_SIZE)\n") % mystat.st_size)
try:
- os.unlink(myfile_path)
+ os.unlink(download_path)
except OSError as e:
if e.errno not in \
(errno.ENOENT, errno.ESTALE):
@@ -984,7 +1013,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
_hide_url_passwd(loc))
variables = {
"URI": loc,
- "FILE": myfile
+ "FILE": os.path.basename(download_path)
}
for k in ("DISTDIR", "PORTAGE_SSH_OPTS"):
@@ -1001,12 +1030,12 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
finally:
try:
- apply_secpass_permissions(myfile_path,
+ apply_secpass_permissions(download_path,
gid=portage_gid, mode=0o664, mask=0o2)
except FileNotFound:
pass
except PortageException as e:
- if not os.access(myfile_path, os.R_OK):
+ if not os.access(download_path, os.R_OK):
writemsg(_("!!! Failed to adjust permissions:"
" %s\n") % str(e), noiselevel=-1)
del e
@@ -1015,8 +1044,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
# trust the return value from the fetcher. Remove the
# empty file and try to download again.
try:
- if os.stat(myfile_path).st_size == 0:
- os.unlink(myfile_path)
+ if os.stat(download_path).st_size == 0:
+ os.unlink(download_path)
fetched = 0
continue
except EnvironmentError:
@@ -1024,7 +1053,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
if mydigests is not None and myfile in mydigests:
try:
- mystat = os.stat(myfile_path)
+ mystat = os.stat(download_path)
except OSError as e:
if e.errno not in (errno.ENOENT, errno.ESTALE):
raise
@@ -1065,13 +1094,13 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
if (mystat[stat.ST_SIZE]<100000) and (len(myfile)>4) and not ((myfile[-5:]==".html") or (myfile[-4:]==".htm")):
html404=re.compile("<title>.*(not found|404).*</title>",re.I|re.M)
with io.open(
- _unicode_encode(myfile_path,
+ _unicode_encode(download_path,
encoding=_encodings['fs'], errors='strict'),
mode='r', encoding=_encodings['content'], errors='replace'
) as f:
if html404.search(f.read()):
try:
- os.unlink(mysettings["DISTDIR"]+"/"+myfile)
+ os.unlink(download_path)
writemsg(_(">>> Deleting invalid distfile. (Improper 404 redirect from server.)\n"))
fetched = 0
continue
@@ -1087,7 +1116,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
digests = _filter_unaccelarated_hashes(mydigests[myfile])
if hash_filter is not None:
digests = _apply_hash_filter(digests, hash_filter)
- verified_ok, reason = verify_all(myfile_path, digests)
+ verified_ok, reason = verify_all(download_path, digests)
if not verified_ok:
writemsg(_("!!! Fetched file: %s VERIFY FAILED!\n") % myfile,
noiselevel=-1)
@@ -1099,7 +1128,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
return 0
temp_filename = \
_checksum_failure_temp_file(
- mysettings["DISTDIR"], myfile)
+ mysettings, mysettings["DISTDIR"], os.path.basename(download_path))
writemsg_stdout(_("Refetching... "
"File renamed to '%s'\n\n") % \
temp_filename, noiselevel=-1)
@@ -1119,6 +1148,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
checksum_failure_max_tries:
break
else:
+ _movefile(download_path, myfile_path, mysettings=mysettings)
eout = EOutput()
eout.quiet = mysettings.get("PORTAGE_QUIET", None) == "1"
if digests:
@@ -1130,6 +1160,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
else:
if not myret:
fetched=2
+ _movefile(download_path, myfile_path, mysettings=mysettings)
break
elif mydigests!=None:
writemsg(_("No digest file available and download failed.\n\n"),
diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py
new file mode 100644
index 000000000..d345f7703
--- /dev/null
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -0,0 +1,186 @@
+# Copyright 2019 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+from __future__ import unicode_literals
+
+import tempfile
+
+import portage
+from portage import shutil, os
+from portage.tests import TestCase
+from portage.tests.resolver.ResolverPlayground import ResolverPlayground
+from portage.tests.util.test_socks5 import AsyncHTTPServer
+from portage.util._async.SchedulerInterface import SchedulerInterface
+from portage.util._eventloop.global_event_loop import global_event_loop
+from portage.package.ebuild.config import config
+from _emerge.EbuildFetcher import EbuildFetcher
+from _emerge.Package import Package
+
+
+class EbuildFetchTestCase(TestCase):
+
+ def testEbuildFetch(self):
+
+ distfiles = {
+ 'bar': b'bar\n',
+ 'foo': b'foo\n',
+ }
+
+ ebuilds = {
+ 'dev-libs/A-1': {
+ 'EAPI': '7',
+ 'RESTRICT': 'primaryuri',
+ 'SRC_URI': '''{scheme}://{host}:{port}/distfiles/bar.txt -> bar
+ {scheme}://{host}:{port}/distfiles/foo.txt -> foo''',
+ },
+ }
+
+ loop = SchedulerInterface(global_event_loop())
+ scheme = 'http'
+ host = '127.0.0.1'
+ content = {}
+ for k, v in distfiles.items():
+ content['/distfiles/{}.txt'.format(k)] = v
+
+ with AsyncHTTPServer(host, content, loop) as server:
+ ebuilds_subst = {}
+ for cpv, metadata in ebuilds.items():
+ metadata = metadata.copy()
+ metadata['SRC_URI'] = metadata['SRC_URI'].format(
+ scheme=scheme, host=host, port=server.server_port)
+ ebuilds_subst[cpv] = metadata
+
+ playground = ResolverPlayground(ebuilds=ebuilds_subst, distfiles=distfiles)
+ ro_distdir = tempfile.mkdtemp()
+ try:
+ fetchcommand = portage.util.shlex_split(playground.settings['FETCHCOMMAND'])
+ fetch_bin = portage.process.find_binary(fetchcommand[0])
+ if fetch_bin is None:
+ self.skipTest('FETCHCOMMAND not found: {}'.format(playground.settings['FETCHCOMMAND']))
+ root_config = playground.trees[playground.eroot]['root_config']
+ portdb = root_config.trees["porttree"].dbapi
+ settings = config(clone=playground.settings)
+
+ # Tests only work with one ebuild at a time, so the config
+ # pool only needs a single config instance.
+ class config_pool:
+ @staticmethod
+ def allocate():
+ return settings
+ @staticmethod
+ def deallocate(settings):
+ pass
+
+ def async_fetch(pkg, ebuild_path):
+ fetcher = EbuildFetcher(config_pool=config_pool, ebuild_path=ebuild_path,
+ fetchonly=False, fetchall=True, pkg=pkg, scheduler=loop)
+ fetcher.start()
+ return fetcher.async_wait()
+
+ for cpv in ebuilds:
+ metadata = dict(zip(Package.metadata_keys,
+ portdb.aux_get(cpv, Package.metadata_keys)))
+
+ pkg = Package(built=False, cpv=cpv, installed=False,
+ metadata=metadata, root_config=root_config,
+ type_name='ebuild')
+
+ settings.setcpv(pkg)
+ ebuild_path = portdb.findname(pkg.cpv)
+ portage.doebuild_environment(ebuild_path, 'fetch', settings=settings, db=portdb)
+
+ # Test good files in DISTDIR
+ for k in settings['AA'].split():
+ os.stat(os.path.join(settings['DISTDIR'], k))
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+
+ # Test missing files in DISTDIR
+ for k in settings['AA'].split():
+ os.unlink(os.path.join(settings['DISTDIR'], k))
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+
+ # Test empty files in DISTDIR
+ for k in settings['AA'].split():
+ file_path = os.path.join(settings['DISTDIR'], k)
+ with open(file_path, 'wb') as f:
+ pass
+ self.assertEqual(os.stat(file_path).st_size, 0)
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+
+ # Test non-empty files containing null bytes in DISTDIR
+ for k in settings['AA'].split():
+ file_path = os.path.join(settings['DISTDIR'], k)
+ with open(file_path, 'wb') as f:
+ for i in range(len(distfiles[k])):
+ f.write(b'\0')
+ self.assertEqual(os.stat(file_path).st_size, len(distfiles[k]))
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+
+ # Test PORTAGE_RO_DISTDIRS
+ settings['PORTAGE_RO_DISTDIRS'] = '"{}"'.format(ro_distdir)
+ try:
+ for k in settings['AA'].split():
+ file_path = os.path.join(settings['DISTDIR'], k)
+ os.rename(file_path, os.path.join(ro_distdir, k))
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ file_path = os.path.join(settings['DISTDIR'], k)
+ self.assertTrue(os.path.islink(file_path))
+ with open(file_path, 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+ os.unlink(file_path)
+ finally:
+ settings.pop('PORTAGE_RO_DISTDIRS')
+
+ # Test local filesystem in GENTOO_MIRRORS
+ orig_mirrors = settings['GENTOO_MIRRORS']
+ try:
+ settings['GENTOO_MIRRORS'] = ro_distdir
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+ finally:
+ settings['GENTOO_MIRRORS'] = orig_mirrors
+
+ # Test readonly DISTDIR
+ orig_distdir_mode = os.stat(settings['DISTDIR']).st_mode
+ try:
+ os.chmod(settings['DISTDIR'], 0o555)
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+ finally:
+ os.chmod(settings['DISTDIR'], orig_distdir_mode)
+
+ # Test parallel-fetch mode
+ settings['PORTAGE_PARALLEL_FETCHONLY'] = '1'
+ try:
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+ for k in settings['AA'].split():
+ os.unlink(os.path.join(settings['DISTDIR'], k))
+ self.assertEqual(loop.run_until_complete(async_fetch(pkg, ebuild_path)), 0)
+ for k in settings['AA'].split():
+ with open(os.path.join(settings['DISTDIR'], k), 'rb') as f:
+ self.assertEqual(f.read(), distfiles[k])
+ finally:
+ settings.pop('PORTAGE_PARALLEL_FETCHONLY')
+ finally:
+ shutil.rmtree(ro_distdir)
+ playground.cleanup()
--
2.21.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2019-04-28 9:18 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-04-28 9:14 [gentoo-portage-dev] [PATCH] fetch: atomic downloads (bug 175612) Zac Medico
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox