public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
From: Zac Medico <zmedico@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Cc: Zac Medico <zmedico@gentoo.org>
Subject: [gentoo-portage-dev] [PATCH 3/5] Add IndexedPortdb class.
Date: Sat,  1 Nov 2014 15:46:21 -0700	[thread overview]
Message-ID: <1414881983-19877-4-git-send-email-zmedico@gentoo.org> (raw)
In-Reply-To: <1414881983-19877-1-git-send-email-zmedico@gentoo.org>

The IndexedPortdb class uses pkg_desc_index to optimize searchs for
package names and descriptions. If the package description index is
missing from a particular repository, then all metadata for that
repository is obtained using the normal pordbapi.aux_get method.

This class only implements a subset of portdbapi functionality that is
useful for searching pkg_desc_index incrementally. For this reason,
the cp_all method returns an ordered iterator instead of a list, so
that search results can be displayed incrementally.

X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
---
 pym/portage/dbapi/IndexedPortdb.py | 151 +++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100644 pym/portage/dbapi/IndexedPortdb.py

diff --git a/pym/portage/dbapi/IndexedPortdb.py b/pym/portage/dbapi/IndexedPortdb.py
new file mode 100644
index 0000000..4fb2cf1
--- /dev/null
+++ b/pym/portage/dbapi/IndexedPortdb.py
@@ -0,0 +1,151 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import io
+import functools
+import operator
+import os
+
+import portage
+from portage import _encodings
+from portage.dep import Atom
+from portage.exception import FileNotFound
+from portage.cache.index.IndexStreamIterator import IndexStreamIterator
+from portage.cache.index.pkg_desc_index import pkg_desc_index_line_read
+from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
+from portage.versions import _pkg_str
+
+class IndexedPortdb(object):
+	"""
+	A portdbapi interface that uses a package description index to
+	improve performance. If the description index is missing for a
+	particular repository, then all metadata for that repository is
+	obtained using the normal pordbapi.aux_get method.
+
+	For performance reasons, the match method only supports package
+	name and version constraints. For the same reason, the xmatch
+	method is not implemented.
+	"""
+
+	_copy_attrs = ('cpv_exists', 'findname', 'getFetchMap',
+		'_aux_cache_keys', '_cpv_sort_ascending',
+		'_have_root_eclass_dir')
+
+	def __init__(self, portdb):
+
+		self._portdb = portdb
+
+		for k in self._copy_attrs:
+			setattr(self, k, getattr(portdb, k))
+
+		self._desc_cache = None
+		self._cp_map = None
+
+	def _init_index(self):
+
+		cp_map = {}
+		desc_cache = {}
+		self._desc_cache = desc_cache
+		self._cp_map = cp_map
+
+		streams = []
+		for repo_path in self._portdb.porttrees:
+			outside_repo = os.path.join(self._portdb.depcachedir,
+				repo_path.lstrip(os.sep))
+			filenames = []
+			for parent_dir in (repo_path, outside_repo):
+				filenames.append(os.path.join(parent_dir,
+					"metadata", "pkg_desc_index"))
+
+			repo_name = self._portdb.getRepositoryName(repo_path)
+
+			try:
+				f = None
+				for filename in filenames:
+					try:
+						f = io.open(filename,
+							encoding=_encodings["repo.content"])
+					except IOError as e:
+						if e.errno not in (errno.ENOENT, errno.ESTALE):
+							raise
+					else:
+						break
+
+				if f is None:
+					raise FileNotFound(filename)
+
+				streams.append(iter(IndexStreamIterator(f,
+					functools.partial(pkg_desc_index_line_read,
+					repo = repo_name))))
+			except FileNotFound:
+
+				# No descriptions index was found, so populate
+				# cp_map the slow way.
+				for cp in self._portdb.cp_all(trees=[repo_path]):
+
+					cp_list = cp_map.get(cp)
+					if cp_list is None:
+						cp_list = []
+						cp_map[cp] = cp_list
+					for cpv in self._portdb.cp_list(
+						cp, mytree = repo_path):
+						cp_list.append(_pkg_str(cpv, repo = repo_name))
+
+		# Create a sorted queue that will be merged with the
+		# sorted/grouped results from MultiIterGroupBy as they
+		# become available.
+		yield_queue = sorted(cp_map, reverse = True)
+
+		for cp_group in MultiIterGroupBy(streams,
+			key = operator.attrgetter("cp")):
+
+			new_cp = None
+			cp_list = cp_map.get(cp_group[0].cp)
+			if cp_list is None:
+				new_cp = cp_group[0].cp
+				cp_list = []
+				cp_map[cp_group[0].cp] = cp_list
+
+			for entry in cp_group:
+				cp_list.extend(entry.cpv_list)
+				for cpv in entry.cpv_list:
+					desc_cache[cpv] = entry.desc
+
+			if new_cp is not None:
+				while yield_queue and yield_queue[-1] < new_cp:
+					yield yield_queue.pop()
+				yield cp_group[0].cp
+
+		while yield_queue:
+			yield yield_queue.pop()
+
+	def cp_all(self):
+		"""
+		Returns an ordered iterator instead of a list, so that search
+		results can be displayed incrementally.
+		"""
+		if self._cp_map is None:
+			return self._init_index()
+		return iter(sorted(self._cp_map))
+
+	def match(self, atom):
+		"""
+		For performance reasons, only package name and version
+		constraints are supported.
+		"""
+		if not isinstance(atom, Atom):
+			atom = Atom(atom)
+		cp_list = self._cp_map.get(atom.cp)
+		if cp_list is None:
+			return []
+		self._portdb._cpv_sort_ascending(cp_list)
+		return portage.match_from_list(atom, cp_list)
+
+	def aux_get(self, cpv, attrs, myrepo = None):
+		if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
+			try:
+				return [self._desc_cache[cpv]]
+			except KeyError:
+				pass
+		return self._portdb.aux_get(cpv, attrs)
-- 
2.0.4



  parent reply	other threads:[~2014-11-01 22:46 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-18  3:28 [gentoo-portage-dev] [PATCH] emerge --search: use description index Zac Medico
2014-10-18  5:59 ` [gentoo-portage-dev] " Zac Medico
2014-10-19 21:51   ` Zac Medico
2014-10-23  8:55     ` Brian Dolbec
2014-10-23  9:22       ` Zac Medico
2014-11-01  6:15         ` Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 1/5] Add egencache --update-pkg-desc-index action Zac Medico
2014-11-04  9:03     ` [gentoo-portage-dev] [PATCH 1/5 v2] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy Zac Medico
2014-11-02  0:18     ` Zac Medico
2014-11-02 22:50     ` [gentoo-portage-dev] [PATCH 2/5 v3] " Zac Medico
2014-11-03  3:07     ` [gentoo-portage-dev] [PATCH 2/5 v4] " Zac Medico
2014-11-01 22:46   ` Zac Medico [this message]
2014-11-04  5:07     ` [gentoo-portage-dev] [PATCH 3/5 v2] Add IndexedPortdb class Zac Medico
2014-11-04 20:34       ` [gentoo-portage-dev] [PATCH 3/5 v3] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 4/5] Add IndexedVardb class Zac Medico
2014-11-05  9:59     ` [gentoo-portage-dev] " Zac Medico
2014-11-07  8:45       ` [gentoo-portage-dev] [PATCH] Log changes between vdb_metadata.pickle updates Zac Medico
2014-11-07 16:51         ` Brian Dolbec
2014-11-07 20:17           ` Zac Medico
2014-11-08  9:16         ` [gentoo-portage-dev] [PATCH v2] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option Zac Medico
2014-11-01 23:04     ` Zac Medico
2014-11-04  5:42       ` [gentoo-portage-dev] [PATCH 5/5 v3] " Zac Medico
2014-11-04  9:10         ` [gentoo-portage-dev] " Zac Medico
2014-11-04 22:09     ` [gentoo-portage-dev] [PATCH 5/5 v4] " Zac Medico
2014-11-03 21:42   ` [gentoo-portage-dev] Brian Dolbec
2014-11-04  9:19     ` [gentoo-portage-dev] Zac Medico

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1414881983-19877-4-git-send-email-zmedico@gentoo.org \
    --to=zmedico@gentoo.org \
    --cc=gentoo-portage-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox