From: Zac Medico <zmedico@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Cc: Zac Medico <zmedico@gentoo.org>
Subject: [gentoo-portage-dev] [PATCH 1/5 v2] Add egencache --update-pkg-desc-index action.
Date: Tue, 4 Nov 2014 01:03:54 -0800 [thread overview]
Message-ID: <1415091834-20792-1-git-send-email-zmedico@gentoo.org> (raw)
In-Reply-To: <1414881983-19877-2-git-send-email-zmedico@gentoo.org>
This adds an egencache --update-pkg-desc-index action which generates
a plain-text index of package names, versions, and descriptions. The
index can then be used to optimize emerge --search / --searchdesc
actions.
X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
---
This updated patch optimizes pkg_desc_index_line_read to skip package name
and version validation. This fixes a performance problem reported by
Brian Dolbec.
bin/egencache | 38 ++++++++++++++++++--
man/egencache.1 | 4 +++
man/portage.5 | 12 +++++++
pym/portage/cache/index/__init__.py | 2 ++
pym/portage/cache/index/pkg_desc_index.py | 59 +++++++++++++++++++++++++++++++
5 files changed, 113 insertions(+), 2 deletions(-)
create mode 100644 pym/portage/cache/index/__init__.py
create mode 100644 pym/portage/cache/index/pkg_desc_index.py
diff --git a/bin/egencache b/bin/egencache
index e366058..f97432f 100755
--- a/bin/egencache
+++ b/bin/egencache
@@ -48,6 +48,7 @@ portage._internal_caller = True
from portage import os, _encodings, _unicode_encode, _unicode_decode
from _emerge.MetadataRegen import MetadataRegen
from portage.cache.cache_errors import CacheError, StatCollision
+from portage.cache.index.pkg_desc_index import pkg_desc_index_line_format
from portage.const import TIMESTAMP_FORMAT
from portage.manifest import guessManifestFileType
from portage.package.ebuild._parallel_manifest.ManifestScheduler import ManifestScheduler
@@ -57,7 +58,7 @@ from portage.util._async.run_main_scheduler import run_main_scheduler
from portage.util._eventloop.global_event_loop import global_event_loop
from portage import cpv_getkey
from portage.dep import Atom, isjustname
-from portage.versions import pkgsplit, vercmp
+from portage.versions import pkgsplit, vercmp, _pkg_str
try:
from xml.etree import ElementTree
@@ -91,6 +92,9 @@ def parse_args(args):
actions.add_argument("--update-changelogs",
action="store_true",
help="update the ChangeLog files from SCM logs")
+ actions.add_argument("--update-pkg-desc-index",
+ action="store_true",
+ help="update package description index")
actions.add_argument("--update-manifests",
action="store_true",
help="update manifests")
@@ -451,6 +455,29 @@ class GenCache(object):
if hasattr(trg_cache, '_prune_empty_dirs'):
trg_cache._prune_empty_dirs()
+class GenPkgDescIndex(object):
+ def __init__(self, portdb, output_file):
+ self.returncode = os.EX_OK
+ self._portdb = portdb
+ self._output_file = output_file
+
+ def run(self):
+
+ portage.util.ensure_dirs(os.path.dirname(self._output_file))
+ f = portage.util.atomic_ofstream(self._output_file,
+ encoding=_encodings["repo.content"])
+
+ portdb = self._portdb
+ for cp in portdb.cp_all():
+ pkgs = portdb.cp_list(cp)
+ if not pkgs:
+ continue
+ desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"])
+
+ f.write(pkg_desc_index_line_format(cp, pkgs, desc))
+
+ f.close()
+
class GenUseLocalDesc(object):
def __init__(self, portdb, output=None,
preserve_comments=False):
@@ -893,7 +920,8 @@ def egencache_main(args):
local_config=False, env=env)
if not (options.update or options.update_use_local_desc or
- options.update_changelogs or options.update_manifests):
+ options.update_changelogs or options.update_manifests or
+ options.update_pkg_desc_index):
parser.error('No action specified')
return 1
@@ -1057,6 +1085,12 @@ def egencache_main(args):
else:
ret.append(scheduler.returncode)
+ if options.update_pkg_desc_index:
+ gen_index = GenPkgDescIndex(portdb, os.path.join(
+ repo_config.location, "metadata", "pkg_desc_index"))
+ gen_index.run()
+ ret.append(gen_index.returncode)
+
if options.update_use_local_desc:
gen_desc = GenUseLocalDesc(portdb,
output=options.uld_output,
diff --git a/man/egencache.1 b/man/egencache.1
index f71feb3..3a3197f 100644
--- a/man/egencache.1
+++ b/man/egencache.1
@@ -19,6 +19,10 @@ for the details on package atom syntax.
.BR "\-\-update\-changelogs"
Update the ChangeLog files from SCM logs (supported only in git repos).
.TP
+.BR "\-\-update\-pkg\-desc\-index"
+Update the package description index which is located at
+\fImetadata/pkg_desc_index\fR in the repository.
+.TP
.BR "\-\-update\-use\-local\-desc"
Update the \fIprofiles/use.local.desc\fR file from metadata.xml.
.TP
diff --git a/man/portage.5 b/man/portage.5
index 309e259..f2f5243 100644
--- a/man/portage.5
+++ b/man/portage.5
@@ -76,6 +76,7 @@ user\-defined package sets
.BR /usr/portage/metadata/
.nf
layout.conf
+pkg_desc_index
.fi
.TP
.BR /usr/portage/profiles/
@@ -1138,6 +1139,17 @@ cache\-formats = md5-dict pms
profile\-formats = portage-2
.fi
.RE
+.TP
+.BR pkg_desc_index
+This is an index of package names, versions, and descriptions which
+may be generated by \fBegencache\fR(1) in order to optimize
+\fBemerge\fR(1) search actions.
+
+.I Example:
+.nf
+sys-apps/sed 4.2 4.2.1 4.2.1-r1 4.2.2: Super-useful stream editor
+sys-apps/usleep 0.1: A wrapper for usleep
+.fi
.RE
.TP
.BR /usr/portage/profiles/
diff --git a/pym/portage/cache/index/__init__.py b/pym/portage/cache/index/__init__.py
new file mode 100644
index 0000000..7cd880e
--- /dev/null
+++ b/pym/portage/cache/index/__init__.py
@@ -0,0 +1,2 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
diff --git a/pym/portage/cache/index/pkg_desc_index.py b/pym/portage/cache/index/pkg_desc_index.py
new file mode 100644
index 0000000..ed2cdf7
--- /dev/null
+++ b/pym/portage/cache/index/pkg_desc_index.py
@@ -0,0 +1,59 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+from __future__ import unicode_literals
+
+import collections
+import sys
+
+from portage.versions import _pkg_str
+
+if sys.hexversion >= 0x3000000:
+ _unicode = str
+else:
+ _unicode = unicode
+
+pkg_desc_index_node = collections.namedtuple("pkg_desc_index_node",
+ ["cp", "cpv_list", "desc"])
+
+class pkg_node(_unicode):
+ """
+ A minimal package node class. For performance reasons, inputs
+ are not validated.
+ """
+
+ def __init__(self, cp, version, repo = None):
+ self.__dict__['cp'] = cp
+ self.__dict__['repo'] = repo
+ self.__dict__['version'] = version
+
+ def __new__(cls, cp, version, repo = None):
+ return _unicode.__new__(cls, cp + "-" + version)
+
+ def __setattr__(self, name, value):
+ raise AttributeError("pkg_node instances are immutable",
+ self.__class__, name, value)
+
+def pkg_desc_index_line_format(cp, pkgs, desc):
+ return "%s %s: %s\n" % (cp,
+ " ".join(_pkg_str(cpv).version
+ for cpv in pkgs), desc)
+
+def pkg_desc_index_line_read(line, repo = None):
+
+ try:
+ pkgs, desc = line.split(":", 1)
+ except ValueError:
+ return None
+ desc = desc.strip()
+
+ try:
+ cp, pkgs = pkgs.split(" ", 1)
+ except ValueError:
+ return None
+
+ cp_list = []
+ for ver in pkgs.split():
+ cp_list.append(pkg_node(cp, ver, repo))
+
+ return pkg_desc_index_node(cp, tuple(cp_list), desc)
--
2.0.4
next prev parent reply other threads:[~2014-11-04 9:04 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-18 3:28 [gentoo-portage-dev] [PATCH] emerge --search: use description index Zac Medico
2014-10-18 5:59 ` [gentoo-portage-dev] " Zac Medico
2014-10-19 21:51 ` Zac Medico
2014-10-23 8:55 ` Brian Dolbec
2014-10-23 9:22 ` Zac Medico
2014-11-01 6:15 ` Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 1/5] Add egencache --update-pkg-desc-index action Zac Medico
2014-11-04 9:03 ` Zac Medico [this message]
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy Zac Medico
2014-11-02 0:18 ` Zac Medico
2014-11-02 22:50 ` [gentoo-portage-dev] [PATCH 2/5 v3] " Zac Medico
2014-11-03 3:07 ` [gentoo-portage-dev] [PATCH 2/5 v4] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 3/5] Add IndexedPortdb class Zac Medico
2014-11-04 5:07 ` [gentoo-portage-dev] [PATCH 3/5 v2] " Zac Medico
2014-11-04 20:34 ` [gentoo-portage-dev] [PATCH 3/5 v3] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 4/5] Add IndexedVardb class Zac Medico
2014-11-05 9:59 ` [gentoo-portage-dev] " Zac Medico
2014-11-07 8:45 ` [gentoo-portage-dev] [PATCH] Log changes between vdb_metadata.pickle updates Zac Medico
2014-11-07 16:51 ` Brian Dolbec
2014-11-07 20:17 ` Zac Medico
2014-11-08 9:16 ` [gentoo-portage-dev] [PATCH v2] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option Zac Medico
2014-11-01 23:04 ` Zac Medico
2014-11-04 5:42 ` [gentoo-portage-dev] [PATCH 5/5 v3] " Zac Medico
2014-11-04 9:10 ` [gentoo-portage-dev] " Zac Medico
2014-11-04 22:09 ` [gentoo-portage-dev] [PATCH 5/5 v4] " Zac Medico
2014-11-03 21:42 ` [gentoo-portage-dev] Brian Dolbec
2014-11-04 9:19 ` [gentoo-portage-dev] Zac Medico
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1415091834-20792-1-git-send-email-zmedico@gentoo.org \
--to=zmedico@gentoo.org \
--cc=gentoo-portage-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox