public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
From: Zac Medico <zmedico@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Cc: Zac Medico <zmedico@gentoo.org>
Subject: [gentoo-portage-dev] [PATCH 3/5 v3] Add IndexedPortdb class.
Date: Tue,  4 Nov 2014 12:34:34 -0800	[thread overview]
Message-ID: <1415133274-24929-1-git-send-email-zmedico@gentoo.org> (raw)
In-Reply-To: <1415077647-13708-1-git-send-email-zmedico@gentoo.org>

The IndexedPortdb class uses pkg_desc_index to optimize searchs for
package names and descriptions. If the package description index is
missing from a particular repository, then all metadata for that
repository is obtained using the normal pordbapi.aux_get method.

This class only implements a subset of portdbapi functionality that is
useful for searching pkg_desc_index incrementally. For this reason,
the cp_all method returns an ordered iterator instead of a list, so
that search results can be displayed incrementally.

X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
---
This updated patch optimizes IndexedPortdb to avoid unnecessary cp_list calls
for repositories that are not indexed. Now IndexedPortdb performs almost as well
as the regular portdbapi for the case where no repositories are indexed.

 pym/portage/dbapi/IndexedPortdb.py | 165 +++++++++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 pym/portage/dbapi/IndexedPortdb.py

diff --git a/pym/portage/dbapi/IndexedPortdb.py b/pym/portage/dbapi/IndexedPortdb.py
new file mode 100644
index 0000000..fc431a2
--- /dev/null
+++ b/pym/portage/dbapi/IndexedPortdb.py
@@ -0,0 +1,165 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import io
+import functools
+import operator
+import os
+
+import portage
+from portage import _encodings
+from portage.dep import Atom
+from portage.exception import FileNotFound
+from portage.cache.index.IndexStreamIterator import IndexStreamIterator
+from portage.cache.index.pkg_desc_index import \
+	pkg_desc_index_line_read, pkg_desc_index_node
+from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
+from portage.versions import _pkg_str
+
+class IndexedPortdb(object):
+	"""
+	A portdbapi interface that uses a package description index to
+	improve performance. If the description index is missing for a
+	particular repository, then all metadata for that repository is
+	obtained using the normal pordbapi.aux_get method.
+
+	For performance reasons, the match method only supports package
+	name and version constraints. For the same reason, the xmatch
+	method is not implemented.
+	"""
+
+	_copy_attrs = ('cpv_exists', 'findname', 'getFetchMap',
+		'_aux_cache_keys', '_cpv_sort_ascending',
+		'_have_root_eclass_dir')
+
+	def __init__(self, portdb):
+
+		self._portdb = portdb
+
+		for k in self._copy_attrs:
+			setattr(self, k, getattr(portdb, k))
+
+		self._desc_cache = None
+		self._cp_map = None
+		self._unindexed_cp_map = None
+
+	def _init_index(self):
+
+		cp_map = {}
+		desc_cache = {}
+		self._desc_cache = desc_cache
+		self._cp_map = cp_map
+		index_missing = []
+
+		streams = []
+		for repo_path in self._portdb.porttrees:
+			outside_repo = os.path.join(self._portdb.depcachedir,
+				repo_path.lstrip(os.sep))
+			filenames = []
+			for parent_dir in (repo_path, outside_repo):
+				filenames.append(os.path.join(parent_dir,
+					"metadata", "pkg_desc_index"))
+
+			repo_name = self._portdb.getRepositoryName(repo_path)
+
+			try:
+				f = None
+				for filename in filenames:
+					try:
+						f = io.open(filename,
+							encoding=_encodings["repo.content"])
+					except IOError as e:
+						if e.errno not in (errno.ENOENT, errno.ESTALE):
+							raise
+					else:
+						break
+
+				if f is None:
+					raise FileNotFound(filename)
+
+				streams.append(iter(IndexStreamIterator(f,
+					functools.partial(pkg_desc_index_line_read,
+					repo = repo_name))))
+			except FileNotFound:
+				index_missing.append(repo_path)
+
+		if index_missing:
+			self._unindexed_cp_map = {}
+
+			class _NonIndexedStream(object):
+				def __iter__(self_):
+					for cp in self._portdb.cp_all(
+						trees = index_missing):
+						# Don't call cp_list yet, since it's a waste
+						# if the package name does not match the current
+						# search.
+						self._unindexed_cp_map[cp] = index_missing
+						yield pkg_desc_index_node(cp, (), None)
+
+			streams.append(iter(_NonIndexedStream()))
+
+		if streams:
+			if len(streams) == 1:
+				cp_group_iter = ([node] for node in streams[0])
+			else:
+				cp_group_iter = MultiIterGroupBy(streams,
+					key = operator.attrgetter("cp"))
+
+			for cp_group in cp_group_iter:
+
+				new_cp = None
+				cp_list = cp_map.get(cp_group[0].cp)
+				if cp_list is None:
+					new_cp = cp_group[0].cp
+					cp_list = []
+					cp_map[cp_group[0].cp] = cp_list
+
+				for entry in cp_group:
+					cp_list.extend(entry.cpv_list)
+					if entry.desc is not None:
+						for cpv in entry.cpv_list:
+							desc_cache[cpv] = entry.desc
+
+				if new_cp is not None:
+					yield cp_group[0].cp
+
+	def cp_all(self):
+		"""
+		Returns an ordered iterator instead of a list, so that search
+		results can be displayed incrementally.
+		"""
+		if self._cp_map is None:
+			return self._init_index()
+		return iter(sorted(self._cp_map))
+
+	def match(self, atom):
+		"""
+		For performance reasons, only package name and version
+		constraints are supported.
+		"""
+		if not isinstance(atom, Atom):
+			atom = Atom(atom)
+		cp_list = self._cp_map.get(atom.cp)
+		if cp_list is None:
+			return []
+
+		if self._unindexed_cp_map is not None:
+			try:
+				unindexed = self._unindexed_cp_map.pop(atom.cp)
+			except KeyError:
+				pass
+			else:
+				cp_list.extend(self._portdb.cp_list(atom.cp,
+					mytree = unindexed))
+
+		self._portdb._cpv_sort_ascending(cp_list)
+		return portage.match_from_list(atom, cp_list)
+
+	def aux_get(self, cpv, attrs, myrepo = None):
+		if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
+			try:
+				return [self._desc_cache[cpv]]
+			except KeyError:
+				pass
+		return self._portdb.aux_get(cpv, attrs)
-- 
2.0.4



  reply	other threads:[~2014-11-04 20:34 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-18  3:28 [gentoo-portage-dev] [PATCH] emerge --search: use description index Zac Medico
2014-10-18  5:59 ` [gentoo-portage-dev] " Zac Medico
2014-10-19 21:51   ` Zac Medico
2014-10-23  8:55     ` Brian Dolbec
2014-10-23  9:22       ` Zac Medico
2014-11-01  6:15         ` Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 1/5] Add egencache --update-pkg-desc-index action Zac Medico
2014-11-04  9:03     ` [gentoo-portage-dev] [PATCH 1/5 v2] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy Zac Medico
2014-11-02  0:18     ` Zac Medico
2014-11-02 22:50     ` [gentoo-portage-dev] [PATCH 2/5 v3] " Zac Medico
2014-11-03  3:07     ` [gentoo-portage-dev] [PATCH 2/5 v4] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 3/5] Add IndexedPortdb class Zac Medico
2014-11-04  5:07     ` [gentoo-portage-dev] [PATCH 3/5 v2] " Zac Medico
2014-11-04 20:34       ` Zac Medico [this message]
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 4/5] Add IndexedVardb class Zac Medico
2014-11-05  9:59     ` [gentoo-portage-dev] " Zac Medico
2014-11-07  8:45       ` [gentoo-portage-dev] [PATCH] Log changes between vdb_metadata.pickle updates Zac Medico
2014-11-07 16:51         ` Brian Dolbec
2014-11-07 20:17           ` Zac Medico
2014-11-08  9:16         ` [gentoo-portage-dev] [PATCH v2] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option Zac Medico
2014-11-01 23:04     ` Zac Medico
2014-11-04  5:42       ` [gentoo-portage-dev] [PATCH 5/5 v3] " Zac Medico
2014-11-04  9:10         ` [gentoo-portage-dev] " Zac Medico
2014-11-04 22:09     ` [gentoo-portage-dev] [PATCH 5/5 v4] " Zac Medico
2014-11-03 21:42   ` [gentoo-portage-dev] Brian Dolbec
2014-11-04  9:19     ` [gentoo-portage-dev] Zac Medico

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1415133274-24929-1-git-send-email-zmedico@gentoo.org \
    --to=zmedico@gentoo.org \
    --cc=gentoo-portage-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox