From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id 2A0A9139694 for ; Fri, 10 Mar 2017 17:31:39 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 1A8EFE0C22; Fri, 10 Mar 2017 17:31:37 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id D7F08E0C22 for ; Fri, 10 Mar 2017 17:31:36 +0000 (UTC) Received: from oystercatcher.gentoo.org (unknown [IPv6:2a01:4f8:202:4333:225:90ff:fed9:fc84]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id 6E82233E142 for ; Fri, 10 Mar 2017 17:31:35 +0000 (UTC) Received: from localhost.localdomain (localhost [IPv6:::1]) by oystercatcher.gentoo.org (Postfix) with ESMTP id 951186390 for ; Fri, 10 Mar 2017 17:31:33 +0000 (UTC) From: "Zac Medico" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Zac Medico" Message-ID: <1489167053.9ffefd66e618155ffb479cd1dbce9c3afe9a9ea4.zmedico@gentoo> Subject: [gentoo-commits] proj/gentoolkit:master commit in: pym/gentoolkit/revdep_rebuild/ X-VCS-Repository: proj/gentoolkit X-VCS-Files: pym/gentoolkit/revdep_rebuild/assign.py X-VCS-Directories: pym/gentoolkit/revdep_rebuild/ X-VCS-Committer: zmedico X-VCS-Committer-Name: Zac Medico X-VCS-Revision: 9ffefd66e618155ffb479cd1dbce9c3afe9a9ea4 X-VCS-Branch: master Date: Fri, 10 Mar 2017 17:31:33 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: da966008-3e7d-4192-847b-0e2a24f008d3 X-Archives-Hash: 445a00a0ad0ad07835cc5d1931556e72 commit: 9ffefd66e618155ffb479cd1dbce9c3afe9a9ea4 Author: Zac Medico gentoo org> AuthorDate: Fri Mar 10 09:38:04 2017 +0000 Commit: Zac Medico gentoo org> CommitDate: Fri Mar 10 17:30:53 2017 +0000 URL: https://gitweb.gentoo.org/proj/gentoolkit.git/commit/?id=9ffefd66 revdep_rebuild/assign.py: handle directory symlinks (bug 611808) Use a _file_matcher class to make file comparisons work regardless of directory symlinks. X-Gentoo-bug: 611808 X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=611808 Acked-by: Brian Dolbec gentoo.org> pym/gentoolkit/revdep_rebuild/assign.py | 67 +++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/pym/gentoolkit/revdep_rebuild/assign.py b/pym/gentoolkit/revdep_rebuild/assign.py index 00dda6e..84bd59f 100644 --- a/pym/gentoolkit/revdep_rebuild/assign.py +++ b/pym/gentoolkit/revdep_rebuild/assign.py @@ -6,6 +6,7 @@ Functions used for determining the package the broken lib belongs to. from __future__ import print_function +import errno import os import io import re @@ -22,11 +23,61 @@ try: except NameError: pass + +class _file_matcher(object): + """ + Compares files by basename and parent directory (device, inode), + so comparisons work regardless of directory symlinks. If a + parent directory does not exist, the realpath of the parent + directory is used instead of the (device, inode). When multiple + files share the same parent directory, stat is only called + once per directory, and the result is cached internally. + """ + def __init__(self): + self._file_ids = {} + self._added = {} + + def _file_id(self, filename): + try: + return self._file_ids[filename] + except KeyError: + try: + st = os.stat(filename) + except OSError as e: + if e.errno != errno.ENOENT: + raise + file_id = (os.path.realpath(filename),) + else: + file_id = (st.st_dev, st.st_ino) + + self._file_ids[filename] = file_id + return file_id + + def _file_key(self, filename): + head, tail = os.path.split(filename) + key = self._file_id(head) + (tail,) + return key + + def add(self, filename): + self._added[self._file_key(filename)] = filename + + def intersection(self, other): + for file_key in self._added: + match = other._added.get(file_key) + if match is not None: + yield match + + def assign_packages(broken, logger, settings): ''' Finds and returns packages that owns files placed in broken. Broken is list of files ''' stime = current_milli_time() + + broken_matcher = _file_matcher() + for filename in broken: + broken_matcher.add(filename) + assigned_pkgs = set() assigned_filenames = set() for group in os.listdir(settings['PKG_DIR']): @@ -39,21 +90,23 @@ def assign_packages(broken, logger, settings): continue f = pkgpath + '/CONTENTS' if os.path.exists(f): + contents_matcher = _file_matcher() try: with io.open(f, 'r', encoding='utf_8') as cnt: for line in cnt.readlines(): m = re.match('^obj (/[^ ]+)', line) if m is not None: - m = m.group(1) - if m in broken: - found = group+'/'+pkg - assigned_pkgs.add(found) - assigned_filenames.add(m) - logger.info('\t' + green('* ') + m + - ' -> ' + bold(found)) + contents_matcher.add(m.group(1)) except Exception as e: logger.warning(red(' !! Failed to read ' + f)) logger.warning(red(' !! Error was:' + str(e))) + else: + for m in contents_matcher.intersection(broken_matcher): + found = group+'/'+pkg + assigned_pkgs.add(found) + assigned_filenames.add(m) + logger.info('\t' + green('* ') + m + + ' -> ' + bold(found)) broken_filenames = set(broken) orphaned = broken_filenames.difference(assigned_filenames)