public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
From: Zac Medico <zmedico@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Cc: Zac Medico <zmedico@gentoo.org>
Subject: [gentoo-portage-dev] [PATCH 1/5 v2] Add egencache --update-pkg-desc-index action.
Date: Tue,  4 Nov 2014 01:03:54 -0800	[thread overview]
Message-ID: <1415091834-20792-1-git-send-email-zmedico@gentoo.org> (raw)
In-Reply-To: <1414881983-19877-2-git-send-email-zmedico@gentoo.org>

This adds an egencache --update-pkg-desc-index action which generates
a plain-text index of package names, versions, and descriptions. The
index can then be used to optimize emerge --search / --searchdesc
actions.

X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
---
This updated patch optimizes pkg_desc_index_line_read to skip package name
and version validation. This fixes a performance problem reported by
Brian Dolbec.

 bin/egencache                             | 38 ++++++++++++++++++--
 man/egencache.1                           |  4 +++
 man/portage.5                             | 12 +++++++
 pym/portage/cache/index/__init__.py       |  2 ++
 pym/portage/cache/index/pkg_desc_index.py | 59 +++++++++++++++++++++++++++++++
 5 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 pym/portage/cache/index/__init__.py
 create mode 100644 pym/portage/cache/index/pkg_desc_index.py

diff --git a/bin/egencache b/bin/egencache
index e366058..f97432f 100755
--- a/bin/egencache
+++ b/bin/egencache
@@ -48,6 +48,7 @@ portage._internal_caller = True
 from portage import os, _encodings, _unicode_encode, _unicode_decode
 from _emerge.MetadataRegen import MetadataRegen
 from portage.cache.cache_errors import CacheError, StatCollision
+from portage.cache.index.pkg_desc_index import pkg_desc_index_line_format
 from portage.const import TIMESTAMP_FORMAT
 from portage.manifest import guessManifestFileType
 from portage.package.ebuild._parallel_manifest.ManifestScheduler import ManifestScheduler
@@ -57,7 +58,7 @@ from portage.util._async.run_main_scheduler import run_main_scheduler
 from portage.util._eventloop.global_event_loop import global_event_loop
 from portage import cpv_getkey
 from portage.dep import Atom, isjustname
-from portage.versions import pkgsplit, vercmp
+from portage.versions import pkgsplit, vercmp, _pkg_str
 
 try:
 	from xml.etree import ElementTree
@@ -91,6 +92,9 @@ def parse_args(args):
 	actions.add_argument("--update-changelogs",
 		action="store_true",
 		help="update the ChangeLog files from SCM logs")
+	actions.add_argument("--update-pkg-desc-index",
+		action="store_true",
+		help="update package description index")
 	actions.add_argument("--update-manifests",
 		action="store_true",
 		help="update manifests")
@@ -451,6 +455,29 @@ class GenCache(object):
 		if hasattr(trg_cache, '_prune_empty_dirs'):
 			trg_cache._prune_empty_dirs()
 
+class GenPkgDescIndex(object):
+	def __init__(self, portdb, output_file):
+		self.returncode = os.EX_OK
+		self._portdb = portdb
+		self._output_file = output_file
+
+	def run(self):
+
+		portage.util.ensure_dirs(os.path.dirname(self._output_file))
+		f = portage.util.atomic_ofstream(self._output_file,
+			encoding=_encodings["repo.content"])
+
+		portdb = self._portdb
+		for cp in portdb.cp_all():
+			pkgs = portdb.cp_list(cp)
+			if not pkgs:
+				continue
+			desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"])
+
+			f.write(pkg_desc_index_line_format(cp, pkgs, desc))
+
+		f.close()
+
 class GenUseLocalDesc(object):
 	def __init__(self, portdb, output=None,
 			preserve_comments=False):
@@ -893,7 +920,8 @@ def egencache_main(args):
 			local_config=False, env=env)
 
 	if not (options.update or options.update_use_local_desc or
-			options.update_changelogs or options.update_manifests):
+			options.update_changelogs or options.update_manifests or
+			options.update_pkg_desc_index):
 		parser.error('No action specified')
 		return 1
 
@@ -1057,6 +1085,12 @@ def egencache_main(args):
 		else:
 			ret.append(scheduler.returncode)
 
+	if options.update_pkg_desc_index:
+		gen_index = GenPkgDescIndex(portdb, os.path.join(
+			repo_config.location, "metadata", "pkg_desc_index"))
+		gen_index.run()
+		ret.append(gen_index.returncode)
+
 	if options.update_use_local_desc:
 		gen_desc = GenUseLocalDesc(portdb,
 			output=options.uld_output,
diff --git a/man/egencache.1 b/man/egencache.1
index f71feb3..3a3197f 100644
--- a/man/egencache.1
+++ b/man/egencache.1
@@ -19,6 +19,10 @@ for the details on package atom syntax.
 .BR "\-\-update\-changelogs"
 Update the ChangeLog files from SCM logs (supported only in git repos).
 .TP
+.BR "\-\-update\-pkg\-desc\-index"
+Update the package description index which is located at
+\fImetadata/pkg_desc_index\fR in the repository.
+.TP
 .BR "\-\-update\-use\-local\-desc"
 Update the \fIprofiles/use.local.desc\fR file from metadata.xml.
 .TP
diff --git a/man/portage.5 b/man/portage.5
index 309e259..f2f5243 100644
--- a/man/portage.5
+++ b/man/portage.5
@@ -76,6 +76,7 @@ user\-defined package sets
 .BR /usr/portage/metadata/
 .nf
 layout.conf
+pkg_desc_index
 .fi
 .TP
 .BR /usr/portage/profiles/
@@ -1138,6 +1139,17 @@ cache\-formats = md5-dict pms
 profile\-formats = portage-2
 .fi
 .RE
+.TP
+.BR pkg_desc_index
+This is an index of package names, versions, and descriptions which
+may be generated by \fBegencache\fR(1) in order to optimize
+\fBemerge\fR(1) search actions.
+
+.I Example:
+.nf
+sys-apps/sed 4.2 4.2.1 4.2.1-r1 4.2.2: Super-useful stream editor
+sys-apps/usleep 0.1: A wrapper for usleep
+.fi
 .RE
 .TP
 .BR /usr/portage/profiles/
diff --git a/pym/portage/cache/index/__init__.py b/pym/portage/cache/index/__init__.py
new file mode 100644
index 0000000..7cd880e
--- /dev/null
+++ b/pym/portage/cache/index/__init__.py
@@ -0,0 +1,2 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
diff --git a/pym/portage/cache/index/pkg_desc_index.py b/pym/portage/cache/index/pkg_desc_index.py
new file mode 100644
index 0000000..ed2cdf7
--- /dev/null
+++ b/pym/portage/cache/index/pkg_desc_index.py
@@ -0,0 +1,59 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+from __future__ import unicode_literals
+
+import collections
+import sys
+
+from portage.versions import _pkg_str
+
+if sys.hexversion >= 0x3000000:
+	_unicode = str
+else:
+	_unicode = unicode
+
+pkg_desc_index_node = collections.namedtuple("pkg_desc_index_node",
+	["cp", "cpv_list", "desc"])
+
+class pkg_node(_unicode):
+	"""
+	A minimal package node class. For performance reasons, inputs
+	are not validated.
+	"""
+
+	def __init__(self, cp, version, repo = None):
+		self.__dict__['cp'] = cp
+		self.__dict__['repo'] = repo
+		self.__dict__['version'] = version
+
+	def __new__(cls, cp, version, repo = None):
+		return _unicode.__new__(cls, cp + "-" + version)
+
+	def __setattr__(self, name, value):
+		raise AttributeError("pkg_node instances are immutable",
+			self.__class__, name, value)
+
+def pkg_desc_index_line_format(cp, pkgs, desc):
+	return "%s %s: %s\n" % (cp,
+		" ".join(_pkg_str(cpv).version
+		for cpv in pkgs), desc)
+
+def pkg_desc_index_line_read(line, repo = None):
+
+	try:
+		pkgs, desc = line.split(":", 1)
+	except ValueError:
+		return None
+	desc = desc.strip()
+
+	try:
+		cp, pkgs = pkgs.split(" ", 1)
+	except ValueError:
+		return None
+
+	cp_list = []
+	for ver in pkgs.split():
+		cp_list.append(pkg_node(cp, ver, repo))
+
+	return pkg_desc_index_node(cp, tuple(cp_list), desc)
-- 
2.0.4



  reply	other threads:[~2014-11-04  9:04 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-18  3:28 [gentoo-portage-dev] [PATCH] emerge --search: use description index Zac Medico
2014-10-18  5:59 ` [gentoo-portage-dev] " Zac Medico
2014-10-19 21:51   ` Zac Medico
2014-10-23  8:55     ` Brian Dolbec
2014-10-23  9:22       ` Zac Medico
2014-11-01  6:15         ` Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 1/5] Add egencache --update-pkg-desc-index action Zac Medico
2014-11-04  9:03     ` Zac Medico [this message]
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy Zac Medico
2014-11-02  0:18     ` Zac Medico
2014-11-02 22:50     ` [gentoo-portage-dev] [PATCH 2/5 v3] " Zac Medico
2014-11-03  3:07     ` [gentoo-portage-dev] [PATCH 2/5 v4] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 3/5] Add IndexedPortdb class Zac Medico
2014-11-04  5:07     ` [gentoo-portage-dev] [PATCH 3/5 v2] " Zac Medico
2014-11-04 20:34       ` [gentoo-portage-dev] [PATCH 3/5 v3] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 4/5] Add IndexedVardb class Zac Medico
2014-11-05  9:59     ` [gentoo-portage-dev] " Zac Medico
2014-11-07  8:45       ` [gentoo-portage-dev] [PATCH] Log changes between vdb_metadata.pickle updates Zac Medico
2014-11-07 16:51         ` Brian Dolbec
2014-11-07 20:17           ` Zac Medico
2014-11-08  9:16         ` [gentoo-portage-dev] [PATCH v2] " Zac Medico
2014-11-01 22:46   ` [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option Zac Medico
2014-11-01 23:04     ` Zac Medico
2014-11-04  5:42       ` [gentoo-portage-dev] [PATCH 5/5 v3] " Zac Medico
2014-11-04  9:10         ` [gentoo-portage-dev] " Zac Medico
2014-11-04 22:09     ` [gentoo-portage-dev] [PATCH 5/5 v4] " Zac Medico
2014-11-03 21:42   ` [gentoo-portage-dev] Brian Dolbec
2014-11-04  9:19     ` [gentoo-portage-dev] Zac Medico

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1415091834-20792-1-git-send-email-zmedico@gentoo.org \
    --to=zmedico@gentoo.org \
    --cc=gentoo-portage-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox