From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id 2C361138BD3 for ; Tue, 4 Nov 2014 05:07:36 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 72927E079B; Tue, 4 Nov 2014 05:07:34 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id D81ADE078A for ; Tue, 4 Nov 2014 05:07:33 +0000 (UTC) Received: from localhost.localdomain (ip70-181-96-121.oc.oc.cox.net [70.181.96.121]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) (Authenticated sender: zmedico) by smtp.gentoo.org (Postfix) with ESMTPSA id 56F34340340; Tue, 4 Nov 2014 05:07:32 +0000 (UTC) From: Zac Medico To: gentoo-portage-dev@lists.gentoo.org Cc: Zac Medico Subject: [gentoo-portage-dev] [PATCH 3/5 v2] Add IndexedPortdb class. Date: Mon, 3 Nov 2014 21:07:27 -0800 Message-Id: <1415077647-13708-1-git-send-email-zmedico@gentoo.org> X-Mailer: git-send-email 2.0.4 In-Reply-To: <1414881983-19877-4-git-send-email-zmedico@gentoo.org> References: <1414881983-19877-4-git-send-email-zmedico@gentoo.org> Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Archives-Salt: 65d1bef8-7315-4fda-9a6e-ab88d97d5939 X-Archives-Hash: 06b98f553b96335b92bbc92c6f62ce37 The IndexedPortdb class uses pkg_desc_index to optimize searchs for package names and descriptions. If the package description index is missing from a particular repository, then all metadata for that repository is obtained using the normal pordbapi.aux_get method. This class only implements a subset of portdbapi functionality that is useful for searching pkg_desc_index incrementally. For this reason, the cp_all method returns an ordered iterator instead of a list, so that search results can be displayed incrementally. X-Gentoo-Bug: 525718 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 --- This updated patch has some optimizations in _init_index that should improve performance for cases where some repositories are not indexed. pym/portage/dbapi/IndexedPortdb.py | 153 +++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 pym/portage/dbapi/IndexedPortdb.py diff --git a/pym/portage/dbapi/IndexedPortdb.py b/pym/portage/dbapi/IndexedPortdb.py new file mode 100644 index 0000000..e95ff4b --- /dev/null +++ b/pym/portage/dbapi/IndexedPortdb.py @@ -0,0 +1,153 @@ +# Copyright 2014 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +import errno +import io +import functools +import operator +import os + +import portage +from portage import _encodings +from portage.dep import Atom +from portage.exception import FileNotFound +from portage.cache.index.IndexStreamIterator import IndexStreamIterator +from portage.cache.index.pkg_desc_index import \ + pkg_desc_index_line_read, pkg_desc_index_node +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy +from portage.versions import _pkg_str + +class IndexedPortdb(object): + """ + A portdbapi interface that uses a package description index to + improve performance. If the description index is missing for a + particular repository, then all metadata for that repository is + obtained using the normal pordbapi.aux_get method. + + For performance reasons, the match method only supports package + name and version constraints. For the same reason, the xmatch + method is not implemented. + """ + + _copy_attrs = ('cpv_exists', 'findname', 'getFetchMap', + '_aux_cache_keys', '_cpv_sort_ascending', + '_have_root_eclass_dir') + + def __init__(self, portdb): + + self._portdb = portdb + + for k in self._copy_attrs: + setattr(self, k, getattr(portdb, k)) + + self._desc_cache = None + self._cp_map = None + + def _init_index(self): + + cp_map = {} + desc_cache = {} + self._desc_cache = desc_cache + self._cp_map = cp_map + index_missing = [] + + streams = [] + for repo_path in self._portdb.porttrees: + outside_repo = os.path.join(self._portdb.depcachedir, + repo_path.lstrip(os.sep)) + filenames = [] + for parent_dir in (repo_path, outside_repo): + filenames.append(os.path.join(parent_dir, + "metadata", "pkg_desc_index")) + + repo_name = self._portdb.getRepositoryName(repo_path) + + try: + f = None + for filename in filenames: + try: + f = io.open(filename, + encoding=_encodings["repo.content"]) + except IOError as e: + if e.errno not in (errno.ENOENT, errno.ESTALE): + raise + else: + break + + if f is None: + raise FileNotFound(filename) + + streams.append(iter(IndexStreamIterator(f, + functools.partial(pkg_desc_index_line_read, + repo = repo_name)))) + except FileNotFound: + index_missing.append(repo_path) + + if index_missing: + + class _NonIndexedStream(object): + def __iter__(self_): + for cp in self._portdb.cp_all( + trees = index_missing): + cp_list = self._portdb.cp_list( + cp, mytree = index_missing) + yield pkg_desc_index_node(cp, + tuple(_pkg_str(cpv) for cpv in cp_list), + None) + + streams.append(iter(_NonIndexedStream())) + + if streams: + if len(streams) == 1: + cp_group_iter = ([node] for node in streams[0]) + else: + cp_group_iter = MultiIterGroupBy(streams, + key = operator.attrgetter("cp")) + + for cp_group in cp_group_iter: + + new_cp = None + cp_list = cp_map.get(cp_group[0].cp) + if cp_list is None: + new_cp = cp_group[0].cp + cp_list = [] + cp_map[cp_group[0].cp] = cp_list + + for entry in cp_group: + cp_list.extend(entry.cpv_list) + if entry.desc is not None: + for cpv in entry.cpv_list: + desc_cache[cpv] = entry.desc + + if new_cp is not None: + yield cp_group[0].cp + + def cp_all(self): + """ + Returns an ordered iterator instead of a list, so that search + results can be displayed incrementally. + """ + if self._cp_map is None: + return self._init_index() + return iter(sorted(self._cp_map)) + + def match(self, atom): + """ + For performance reasons, only package name and version + constraints are supported. + """ + if not isinstance(atom, Atom): + atom = Atom(atom) + cp_list = self._cp_map.get(atom.cp) + if cp_list is None: + return [] + self._portdb._cpv_sort_ascending(cp_list) + return portage.match_from_list(atom, cp_list) + + def aux_get(self, cpv, attrs, myrepo = None): + if len(attrs) == 1 and attrs[0] == "DESCRIPTION": + try: + return [self._desc_cache[cpv]] + except KeyError: + pass + return self._portdb.aux_get(cpv, attrs) -- 2.0.4