From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id 303ED1382FE for ; Sun, 10 Jul 2016 19:45:27 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id B554C21C074; Sun, 10 Jul 2016 19:45:21 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 34A4621C06B for ; Sun, 10 Jul 2016 19:45:21 +0000 (UTC) Received: from localhost.localdomain (ip68-5-185-102.oc.oc.cox.net [68.5.185.102]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-SHA256 (128/128 bits)) (No client certificate requested) (Authenticated sender: zmedico) by smtp.gentoo.org (Postfix) with ESMTPSA id 73994340D92; Sun, 10 Jul 2016 19:45:19 +0000 (UTC) From: Zac Medico To: gentoo-portage-dev@lists.gentoo.org Cc: Zac Medico Subject: [gentoo-portage-dev] [PATCH v2] portage.cache: write md5 instead of mtime (bug 568934) Date: Sun, 10 Jul 2016 12:44:52 -0700 Message-Id: <1468179892-23288-1-git-send-email-zmedico@gentoo.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1468133471-18377-1-git-send-email-zmedico@gentoo.org> References: <1468133471-18377-1-git-send-email-zmedico@gentoo.org> Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Archives-Salt: b0344295-9c06-43d3-84d7-454a4276d929 X-Archives-Hash: 09b23659ed36fdf82ce0abbcfc992693 Change cache modules to write md5 in cache entries, instead of mtime. Since portage-2.2.27, the relevant cache modules have had the ability to read cache entries containing either md5 or mtime, therefore this change is backward-compatible with portage-2.2.27 and later. Also fix the reconstruct_eclasses function to raise CacheCorruption when the specified chf_type is md5 and the cache entry contains mtime data, and optimize __getitem__ to skip reconstruct_eclasses calls when the entry appears to have a different chf_type. X-Gentoo-Bug: 568934 X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934 --- [PATCH v2] adds a __getitem__ optimization to skip reconstruct_eclasses calls when the entry appears to have a different chf_type pym/portage/cache/anydbm.py | 4 ++-- pym/portage/cache/flat_hash.py | 4 ++-- pym/portage/cache/sqlite.py | 4 ++-- pym/portage/cache/template.py | 23 +++++++++++++++++++---- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py index 80d24e5..88d85b0 100644 --- a/pym/portage/cache/anydbm.py +++ b/pym/portage/cache/anydbm.py @@ -36,8 +36,8 @@ from portage.cache import cache_errors class database(fs_template.FsBased): - validation_chf = 'mtime' - chf_types = ('mtime', 'md5') + validation_chf = 'md5' + chf_types = ('md5', 'mtime') autocommits = True cleanse_keys = True diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py index cca0f10..3a899c0 100644 --- a/pym/portage/cache/flat_hash.py +++ b/pym/portage/cache/flat_hash.py @@ -163,5 +163,5 @@ class md5_database(database): class mtime_md5_database(database): - validation_chf = 'mtime' - chf_types = ('mtime', 'md5') + validation_chf = 'md5' + chf_types = ('md5', 'mtime') diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py index 32e4076..69150f6 100644 --- a/pym/portage/cache/sqlite.py +++ b/pym/portage/cache/sqlite.py @@ -18,8 +18,8 @@ if sys.hexversion >= 0x3000000: class database(fs_template.FsBased): - validation_chf = 'mtime' - chf_types = ('mtime', 'md5') + validation_chf = 'md5' + chf_types = ('md5', 'mtime') autocommits = False synchronous = False diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py index a7c6de0..24d8f8f 100644 --- a/pym/portage/cache/template.py +++ b/pym/portage/cache/template.py @@ -54,6 +54,10 @@ class database(object): if self.serialize_eclasses and "_eclasses_" in d: for chf_type in chf_types: + if '_%s_' % chf_type not in d: + # Skip the reconstruct_eclasses call, since this + # entry appears to have a different chf_type. + continue try: d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"], chf_type, paths=self.store_eclass_paths) @@ -310,6 +314,18 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True): for k, v in sorted(eclass_dict.items(), key=_keysorter)) +def _md5_deserializer(md5): + if len(md5) != 32: + raise ValueError('expected 32 hex digits') + return md5 + + +_chf_deserializers = { + 'md5': _md5_deserializer, + 'mtime': long, +} + + def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): """returns a dict when handed a string generated by serialize_eclasses""" eclasses = eclass_string.rstrip().lstrip().split("\t") @@ -317,9 +333,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): # occasionally this occurs in the fs backends. they suck. return {} - converter = _unicode - if chf_type == 'mtime': - converter = long + converter = _chf_deserializers.get(chf_type, lambda x: x) if paths: if len(eclasses) % 3 != 0: @@ -340,6 +354,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) except ValueError: - raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed") + raise cache_errors.CacheCorruption(cpv, + "_eclasses_ not valid for chf_type {}".format(chf_type)) del eclasses return d -- 2.7.4