From: Zac Medico <zmedico@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Cc: Zac Medico <zmedico@gentoo.org>
Subject: [gentoo-portage-dev] [PATCH 3/5] Add IndexedPortdb class.
Date: Sat, 1 Nov 2014 15:46:21 -0700 [thread overview]
Message-ID: <1414881983-19877-4-git-send-email-zmedico@gentoo.org> (raw)
In-Reply-To: <1414881983-19877-1-git-send-email-zmedico@gentoo.org>
The IndexedPortdb class uses pkg_desc_index to optimize searchs for
package names and descriptions. If the package description index is
missing from a particular repository, then all metadata for that
repository is obtained using the normal pordbapi.aux_get method.
This class only implements a subset of portdbapi functionality that is
useful for searching pkg_desc_index incrementally. For this reason,
the cp_all method returns an ordered iterator instead of a list, so
that search results can be displayed incrementally.
X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
---
pym/portage/dbapi/IndexedPortdb.py | 151 +++++++++++++++++++++++++++++++++++++
1 file changed, 151 insertions(+)
create mode 100644 pym/portage/dbapi/IndexedPortdb.py
diff --git a/pym/portage/dbapi/IndexedPortdb.py b/pym/portage/dbapi/IndexedPortdb.py
new file mode 100644
index 0000000..4fb2cf1
--- /dev/null
+++ b/pym/portage/dbapi/IndexedPortdb.py
@@ -0,0 +1,151 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import io
+import functools
+import operator
+import os
+
+import portage
+from portage import _encodings
+from portage.dep import Atom
+from portage.exception import FileNotFound
+from portage.cache.index.IndexStreamIterator import IndexStreamIterator
+from portage.cache.index.pkg_desc_index import pkg_desc_index_line_read
+from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
+from portage.versions import _pkg_str
+
+class IndexedPortdb(object):
+ """
+ A portdbapi interface that uses a package description index to
+ improve performance. If the description index is missing for a
+ particular repository, then all metadata for that repository is
+ obtained using the normal pordbapi.aux_get method.
+
+ For performance reasons, the match method only supports package
+ name and version constraints. For the same reason, the xmatch
+ method is not implemented.
+ """
+
+ _copy_attrs = ('cpv_exists', 'findname', 'getFetchMap',
+ '_aux_cache_keys', '_cpv_sort_ascending',
+ '_have_root_eclass_dir')
+
+ def __init__(self, portdb):
+
+ self._portdb = portdb
+
+ for k in self._copy_attrs:
+ setattr(self, k, getattr(portdb, k))
+
+ self._desc_cache = None
+ self._cp_map = None
+
+ def _init_index(self):
+
+ cp_map = {}
+ desc_cache = {}
+ self._desc_cache = desc_cache
+ self._cp_map = cp_map
+
+ streams = []
+ for repo_path in self._portdb.porttrees:
+ outside_repo = os.path.join(self._portdb.depcachedir,
+ repo_path.lstrip(os.sep))
+ filenames = []
+ for parent_dir in (repo_path, outside_repo):
+ filenames.append(os.path.join(parent_dir,
+ "metadata", "pkg_desc_index"))
+
+ repo_name = self._portdb.getRepositoryName(repo_path)
+
+ try:
+ f = None
+ for filename in filenames:
+ try:
+ f = io.open(filename,
+ encoding=_encodings["repo.content"])
+ except IOError as e:
+ if e.errno not in (errno.ENOENT, errno.ESTALE):
+ raise
+ else:
+ break
+
+ if f is None:
+ raise FileNotFound(filename)
+
+ streams.append(iter(IndexStreamIterator(f,
+ functools.partial(pkg_desc_index_line_read,
+ repo = repo_name))))
+ except FileNotFound:
+
+ # No descriptions index was found, so populate
+ # cp_map the slow way.
+ for cp in self._portdb.cp_all(trees=[repo_path]):
+
+ cp_list = cp_map.get(cp)
+ if cp_list is None:
+ cp_list = []
+ cp_map[cp] = cp_list
+ for cpv in self._portdb.cp_list(
+ cp, mytree = repo_path):
+ cp_list.append(_pkg_str(cpv, repo = repo_name))
+
+ # Create a sorted queue that will be merged with the
+ # sorted/grouped results from MultiIterGroupBy as they
+ # become available.
+ yield_queue = sorted(cp_map, reverse = True)
+
+ for cp_group in MultiIterGroupBy(streams,
+ key = operator.attrgetter("cp")):
+
+ new_cp = None
+ cp_list = cp_map.get(cp_group[0].cp)
+ if cp_list is None:
+ new_cp = cp_group[0].cp
+ cp_list = []
+ cp_map[cp_group[0].cp] = cp_list
+
+ for entry in cp_group:
+ cp_list.extend(entry.cpv_list)
+ for cpv in entry.cpv_list:
+ desc_cache[cpv] = entry.desc
+
+ if new_cp is not None:
+ while yield_queue and yield_queue[-1] < new_cp:
+ yield yield_queue.pop()
+ yield cp_group[0].cp
+
+ while yield_queue:
+ yield yield_queue.pop()
+
+ def cp_all(self):
+ """
+ Returns an ordered iterator instead of a list, so that search
+ results can be displayed incrementally.
+ """
+ if self._cp_map is None:
+ return self._init_index()
+ return iter(sorted(self._cp_map))
+
+ def match(self, atom):
+ """
+ For performance reasons, only package name and version
+ constraints are supported.
+ """
+ if not isinstance(atom, Atom):
+ atom = Atom(atom)
+ cp_list = self._cp_map.get(atom.cp)
+ if cp_list is None:
+ return []
+ self._portdb._cpv_sort_ascending(cp_list)
+ return portage.match_from_list(atom, cp_list)
+
+ def aux_get(self, cpv, attrs, myrepo = None):
+ if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
+ try:
+ return [self._desc_cache[cpv]]
+ except KeyError:
+ pass
+ return self._portdb.aux_get(cpv, attrs)
--
2.0.4
next prev parent reply other threads:[~2014-11-01 22:46 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-18 3:28 [gentoo-portage-dev] [PATCH] emerge --search: use description index Zac Medico
2014-10-18 5:59 ` [gentoo-portage-dev] " Zac Medico
2014-10-19 21:51 ` Zac Medico
2014-10-23 8:55 ` Brian Dolbec
2014-10-23 9:22 ` Zac Medico
2014-11-01 6:15 ` Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 1/5] Add egencache --update-pkg-desc-index action Zac Medico
2014-11-04 9:03 ` [gentoo-portage-dev] [PATCH 1/5 v2] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy Zac Medico
2014-11-02 0:18 ` Zac Medico
2014-11-02 22:50 ` [gentoo-portage-dev] [PATCH 2/5 v3] " Zac Medico
2014-11-03 3:07 ` [gentoo-portage-dev] [PATCH 2/5 v4] " Zac Medico
2014-11-01 22:46 ` Zac Medico [this message]
2014-11-04 5:07 ` [gentoo-portage-dev] [PATCH 3/5 v2] Add IndexedPortdb class Zac Medico
2014-11-04 20:34 ` [gentoo-portage-dev] [PATCH 3/5 v3] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 4/5] Add IndexedVardb class Zac Medico
2014-11-05 9:59 ` [gentoo-portage-dev] " Zac Medico
2014-11-07 8:45 ` [gentoo-portage-dev] [PATCH] Log changes between vdb_metadata.pickle updates Zac Medico
2014-11-07 16:51 ` Brian Dolbec
2014-11-07 20:17 ` Zac Medico
2014-11-08 9:16 ` [gentoo-portage-dev] [PATCH v2] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option Zac Medico
2014-11-01 23:04 ` Zac Medico
2014-11-04 5:42 ` [gentoo-portage-dev] [PATCH 5/5 v3] " Zac Medico
2014-11-04 9:10 ` [gentoo-portage-dev] " Zac Medico
2014-11-04 22:09 ` [gentoo-portage-dev] [PATCH 5/5 v4] " Zac Medico
2014-11-03 21:42 ` [gentoo-portage-dev] Brian Dolbec
2014-11-04 9:19 ` [gentoo-portage-dev] Zac Medico
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1414881983-19877-4-git-send-email-zmedico@gentoo.org \
--to=zmedico@gentoo.org \
--cc=gentoo-portage-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox