From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id E25CF138306 for ; Wed, 13 Jul 2016 11:32:16 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 7A97521C170; Wed, 13 Jul 2016 11:32:14 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 1040221C170 for ; Wed, 13 Jul 2016 11:32:13 +0000 (UTC) Received: from oystercatcher.gentoo.org (unknown [IPv6:2a01:4f8:202:4333:225:90ff:fed9:fc84]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id 9F028340D54 for ; Wed, 13 Jul 2016 11:32:12 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by oystercatcher.gentoo.org (Postfix) with ESMTP id A14682436 for ; Wed, 13 Jul 2016 11:32:07 +0000 (UTC) From: "Zac Medico" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Zac Medico" Message-ID: <1468409374.9abbda7d054761ae6c333d3e6d420632b9658b6d.zmedico@gentoo> Subject: [gentoo-commits] proj/portage:master commit in: pym/portage/cache/ X-VCS-Repository: proj/portage X-VCS-Files: pym/portage/cache/anydbm.py pym/portage/cache/flat_hash.py pym/portage/cache/sqlite.py pym/portage/cache/template.py X-VCS-Directories: pym/portage/cache/ X-VCS-Committer: zmedico X-VCS-Committer-Name: Zac Medico X-VCS-Revision: 9abbda7d054761ae6c333d3e6d420632b9658b6d X-VCS-Branch: master Date: Wed, 13 Jul 2016 11:32:07 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: 3726f14c-fc10-4b45-abaa-4854cc0e92b0 X-Archives-Hash: 5181062d7ab228ce34fbe0bdf0694aa0 commit: 9abbda7d054761ae6c333d3e6d420632b9658b6d Author: Zac Medico gentoo org> AuthorDate: Sun Jul 10 06:11:41 2016 +0000 Commit: Zac Medico gentoo org> CommitDate: Wed Jul 13 11:29:34 2016 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=9abbda7d portage.cache: write md5 instead of mtime (bug 568934) Change cache modules to write md5 in cache entries, instead of mtime. Since portage-2.2.27, the relevant cache modules have had the ability to read cache entries containing either md5 or mtime, therefore this change is backward-compatible with portage-2.2.27 and later. Also fix the reconstruct_eclasses function to raise CacheCorruption when the specified chf_type is md5 and the cache entry contains mtime data, and optimize __getitem__ to skip reconstruct_eclasses calls when the entry appears to have a different chf_type. X-Gentoo-Bug: 568934 X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934 Acked-by: Alexander Berntsen gentoo.org> pym/portage/cache/anydbm.py | 4 ++-- pym/portage/cache/flat_hash.py | 4 ++-- pym/portage/cache/sqlite.py | 4 ++-- pym/portage/cache/template.py | 36 ++++++++++++++++++++++++++++++++---- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py index 80d24e5..88d85b0 100644 --- a/pym/portage/cache/anydbm.py +++ b/pym/portage/cache/anydbm.py @@ -36,8 +36,8 @@ from portage.cache import cache_errors class database(fs_template.FsBased): - validation_chf = 'mtime' - chf_types = ('mtime', 'md5') + validation_chf = 'md5' + chf_types = ('md5', 'mtime') autocommits = True cleanse_keys = True diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py index cca0f10..3a899c0 100644 --- a/pym/portage/cache/flat_hash.py +++ b/pym/portage/cache/flat_hash.py @@ -163,5 +163,5 @@ class md5_database(database): class mtime_md5_database(database): - validation_chf = 'mtime' - chf_types = ('mtime', 'md5') + validation_chf = 'md5' + chf_types = ('md5', 'mtime') diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py index 32e4076..69150f6 100644 --- a/pym/portage/cache/sqlite.py +++ b/pym/portage/cache/sqlite.py @@ -18,8 +18,8 @@ if sys.hexversion >= 0x3000000: class database(fs_template.FsBased): - validation_chf = 'mtime' - chf_types = ('mtime', 'md5') + validation_chf = 'md5' + chf_types = ('md5', 'mtime') autocommits = False synchronous = False diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py index a7c6de0..8662d85 100644 --- a/pym/portage/cache/template.py +++ b/pym/portage/cache/template.py @@ -54,6 +54,15 @@ class database(object): if self.serialize_eclasses and "_eclasses_" in d: for chf_type in chf_types: + if '_%s_' % chf_type not in d: + # Skip the reconstruct_eclasses call, since it's + # a waste of time if it contains a different chf_type + # than the current one. In the past, it was possible + # for reconstruct_eclasses called with chf_type='md5' + # to "successfully" return invalid data here, because + # it was unable to distinguish between md5 data and + # mtime data. + continue try: d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"], chf_type, paths=self.store_eclass_paths) @@ -62,6 +71,9 @@ class database(object): raise else: break + else: + raise cache_errors.CacheCorruption(cpv, + 'entry does not contain a recognized chf_type') elif "_eclasses_" not in d: d["_eclasses_"] = {} @@ -310,6 +322,23 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True): for k, v in sorted(eclass_dict.items(), key=_keysorter)) +def _md5_deserializer(md5): + """ + Without this validation, it's possible for reconstruct_eclasses to + mistakenly interpret mtime data as md5 data, and return an invalid + data structure containing strings where ints are expected. + """ + if len(md5) != 32: + raise ValueError('expected 32 hex digits') + return md5 + + +_chf_deserializers = { + 'md5': _md5_deserializer, + 'mtime': long, +} + + def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): """returns a dict when handed a string generated by serialize_eclasses""" eclasses = eclass_string.rstrip().lstrip().split("\t") @@ -317,9 +346,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): # occasionally this occurs in the fs backends. they suck. return {} - converter = _unicode - if chf_type == 'mtime': - converter = long + converter = _chf_deserializers.get(chf_type, lambda x: x) if paths: if len(eclasses) % 3 != 0: @@ -340,6 +367,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) except ValueError: - raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed") + raise cache_errors.CacheCorruption(cpv, + "_eclasses_ not valid for chf_type {}".format(chf_type)) del eclasses return d