From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id 5EA2F138BD3 for ; Tue, 4 Nov 2014 05:42:20 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 37DC7E0805; Tue, 4 Nov 2014 05:42:19 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 9362DE07D5 for ; Tue, 4 Nov 2014 05:42:18 +0000 (UTC) Received: from localhost.localdomain (ip70-181-96-121.oc.oc.cox.net [70.181.96.121]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) (Authenticated sender: zmedico) by smtp.gentoo.org (Postfix) with ESMTPSA id 924F7340414; Tue, 4 Nov 2014 05:42:17 +0000 (UTC) From: Zac Medico To: gentoo-portage-dev@lists.gentoo.org Cc: Zac Medico Subject: [gentoo-portage-dev] [PATCH 5/5 v3] Add emerge --search-index option. Date: Mon, 3 Nov 2014 21:42:14 -0800 Message-Id: <1415079734-14386-1-git-send-email-zmedico@gentoo.org> X-Mailer: git-send-email 2.0.4 In-Reply-To: <1414883090-20554-1-git-send-email-zmedico@gentoo.org> References: <1414883090-20554-1-git-send-email-zmedico@gentoo.org> Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Archives-Salt: cd86f12f-543a-447e-a227-a5dc5b1be8af X-Archives-Hash: 493f280431400784f930cd5eea4c5c08 The new emerge --search-index option, which is enabled by default, causes pkg_desc_index to be used for search optimization. The search index needs to be regenerated by egencache after changes are made to a repository (see the --update-pkg-desc-index action). For users that would like to modify ebuilds in a repository without running egencache afterwards, emerge --search-index=n can be used to get non-indexed search. Alternatively, the user could simply remove the stale index file, in order to disable the search index for a particular repository. In order to conserve memory, indices are read as streams, and MultiIterGroupBy is used to group results from IndexedPortdb and IndexedVardb. Stream-oriented search also makes it possible to display search results incrementally (fixing bug #412471). X-Gentoo-Bug: 525718 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 --- This updated patch causes indexed search to be enabled only for searchdesc, since indexed variants can actually be slower when only package names need to be searched. man/emerge.1 | 8 ++++ pym/_emerge/actions.py | 3 +- pym/_emerge/depgraph.py | 2 +- pym/_emerge/main.py | 5 ++ pym/_emerge/search.py | 122 +++++++++++++++++++++++++++++++++++------------- 5 files changed, 105 insertions(+), 35 deletions(-) diff --git a/man/emerge.1 b/man/emerge.1 index bbe71ac..7bcdd9a 100644 --- a/man/emerge.1 +++ b/man/emerge.1 @@ -796,6 +796,14 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support \fBHDEPEND\fR, this option affects only the former. .TP +.BR "\-\-search\-index < y | n >" +Enable or disable indexed search for search actions. This option is +enabled by default. The search index needs to be regenerated by +\fBegencache\fR(1) after changes are made to a repository (see the +\fB\-\-update\-pkg\-desc\-index\fR action). This setting can be added +to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later +overridden via the command line. +.TP .BR "\-\-select [ y | n ] (\-w short option)" Add specified packages to the world set (inverse of \fB\-\-oneshot\fR). This is useful if you want to diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py index 48b0826..8a22ab5 100644 --- a/pym/_emerge/actions.py +++ b/pym/_emerge/actions.py @@ -2015,7 +2015,8 @@ def action_search(root_config, myopts, myfiles, spinner): searchinstance = search(root_config, spinner, "--searchdesc" in myopts, "--quiet" not in myopts, "--usepkg" in myopts, - "--usepkgonly" in myopts) + "--usepkgonly" in myopts, + search_index = myopts.get("--search-index", "y") != "n") for mysearch in myfiles: try: searchinstance.execute(mysearch) diff --git a/pym/_emerge/depgraph.py b/pym/_emerge/depgraph.py index 94eaed8..da408ad 100644 --- a/pym/_emerge/depgraph.py +++ b/pym/_emerge/depgraph.py @@ -8656,7 +8656,7 @@ def ambiguous_package_name(arg, atoms, root_config, spinner, myopts): s = search(root_config, spinner, "--searchdesc" in myopts, "--quiet" not in myopts, "--usepkg" in myopts, - "--usepkgonly" in myopts) + "--usepkgonly" in myopts, search_index = False) null_cp = portage.dep_getkey(insert_category_into_atom( arg, "null")) cat, atom_pn = portage.catsplit(null_cp) diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py index cf7966c..c08e12a 100644 --- a/pym/_emerge/main.py +++ b/pym/_emerge/main.py @@ -616,6 +616,11 @@ def parse_opts(tmpcmdline, silent=False): "choices" :("True", "rdeps") }, + "--search-index": { + "help": "Enable or disable indexed search (enabled by default)", + "choices": y_or_n + }, + "--select": { "shortopt" : "-w", "help" : "add specified packages to the world set " + \ diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py index 4b0fd9f..5821c37 100644 --- a/pym/_emerge/search.py +++ b/pym/_emerge/search.py @@ -7,9 +7,12 @@ import re import portage from portage import os from portage.dbapi.porttree import _parse_uri_map +from portage.dbapi.IndexedPortdb import IndexedPortdb +from portage.dbapi.IndexedVardb import IndexedVardb from portage.localization import localized_size from portage.output import bold, bold as white, darkgreen, green, red from portage.util import writemsg_stdout +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy from _emerge.Package import Package @@ -25,15 +28,17 @@ class search(object): # public interface # def __init__(self, root_config, spinner, searchdesc, - verbose, usepkg, usepkgonly): + verbose, usepkg, usepkgonly, search_index = True): """Searches the available and installed packages for the supplied search key. The list of available and installed packages is created at object instantiation. This makes successive searches faster.""" self.settings = root_config.settings - self.vartree = root_config.trees["vartree"] - self.spinner = spinner self.verbose = verbose self.searchdesc = searchdesc + self.searchkey = None + # Disable the spinner since search results are displayed + # incrementally. + self.spinner = None self.root_config = root_config self.setconfig = root_config.setconfig self.matches = {"pkg" : []} @@ -45,6 +50,13 @@ class search(object): bindb = root_config.trees["bintree"].dbapi vardb = root_config.trees["vartree"].dbapi + # The indexed variants can actually be slower when only + # package names need to be searched, so only use indices + # for searchdesc. + if search_index and searchdesc: + portdb = IndexedPortdb(portdb) + vardb = IndexedVardb(vardb) + if not usepkgonly and portdb._have_root_eclass_dir: self._dbs.append(portdb) @@ -53,16 +65,23 @@ class search(object): self._dbs.append(vardb) self._portdb = portdb + self._vardb = vardb def _spinner_update(self): if self.spinner: self.spinner.update() def _cp_all(self): - cp_all = set() + iterators = [] for db in self._dbs: - cp_all.update(db.cp_all()) - return list(sorted(cp_all)) + i = db.cp_all() + try: + i = iter(i) + except TypeError: + pass + iterators.append(i) + for group in MultiIterGroupBy(iterators): + yield group[0] def _aux_get(self, *args, **kwargs): for db in self._dbs: @@ -97,7 +116,7 @@ class search(object): return {} def _visible(self, db, cpv, metadata): - installed = db is self.vartree.dbapi + installed = db is self._vardb built = installed or db is not self._portdb pkg_type = "ebuild" if installed: @@ -171,8 +190,11 @@ class search(object): def execute(self,searchkey): """Performs the search for the supplied search key""" + self.searchkey = searchkey + + def _iter_search(self): + match_category = 0 - self.searchkey=searchkey self.packagematches = [] if self.searchdesc: self.searchdesc=1 @@ -181,6 +203,7 @@ class search(object): self.searchdesc=0 self.matches = {"pkg":[], "set":[]} print("Searching... ", end=' ') + print() regexsearch = False if self.searchkey.startswith('%'): @@ -206,8 +229,24 @@ class search(object): if self.searchre.search(match_string): if not self._xmatch("match-visible", package): masked=1 - self.matches["pkg"].append([package,masked]) + yield ("pkg", package, masked) elif self.searchdesc: # DESCRIPTION searching + # Check for DESCRIPTION match first, so that we can skip + # the expensive visiblity check if it doesn't match. + full_package = self._xmatch("match-all", package) + if not full_package: + continue + full_package = full_package[-1] + try: + full_desc = self._aux_get( + full_package, ["DESCRIPTION"])[0] + except KeyError: + portage.writemsg( + "emerge: search: aux_get() failed, skipping\n", + noiselevel=-1) + continue + if not self.searchre.search(full_desc): + continue full_package = self._xmatch("bestmatch-visible", package) if not full_package: #no match found; we don't want to query description @@ -217,14 +256,8 @@ class search(object): continue else: masked=1 - try: - full_desc = self._aux_get( - full_package, ["DESCRIPTION"])[0] - except KeyError: - print("emerge: search: aux_get() failed, skipping") - continue - if self.searchre.search(full_desc): - self.matches["desc"].append([full_package,masked]) + + yield ("desc", full_package, masked) self.sdict = self.setconfig.getSets() for setname in self.sdict: @@ -235,16 +268,11 @@ class search(object): match_string = setname.split("/")[-1] if self.searchre.search(match_string): - self.matches["set"].append([setname, False]) + yield ("set", setname, False) elif self.searchdesc: if self.searchre.search( self.sdict[setname].getMetadata("DESCRIPTION")): - self.matches["set"].append([setname, False]) - - self.mlen=0 - for mtype in self.matches: - self.matches[mtype].sort() - self.mlen += len(self.matches[mtype]) + yield ("set", setname, False) def addCP(self, cp): if not self._xmatch("match-all", cp): @@ -257,17 +285,32 @@ class search(object): def output(self): """Outputs the results of the search.""" - msg = [] + + class msg(object): + @staticmethod + def append(msg): + writemsg_stdout(msg, noiselevel=-1) + msg.append("\b\b \n[ Results for search key : " + \ bold(self.searchkey) + " ]\n") - msg.append("[ Applications found : " + \ - bold(str(self.mlen)) + " ]\n\n") - vardb = self.vartree.dbapi + vardb = self._vardb metadata_keys = set(Package.metadata_keys) metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"]) metadata_keys = tuple(metadata_keys) - for mtype in self.matches: - for match,masked in self.matches[mtype]: + + if self.searchkey is None: + # Handle results added via addCP + addCP_matches = [] + for mytype, (match, masked) in self.matches.items(): + addCP_matches.append(mytype, match, masked) + iterator = iter(addCP_matches) + + else: + # Do a normal search + iterator = self._iter_search() + + for mtype, match, masked in iterator: + self.mlen += 1 full_package = None if mtype == "pkg": full_package = self._xmatch( @@ -367,12 +410,26 @@ class search(object): + " " + desc + "\n") msg.append(" " + darkgreen("License:") + \ " " + license + "\n\n") - writemsg_stdout(''.join(msg), noiselevel=-1) + + msg.append("[ Applications found : " + \ + bold(str(self.mlen)) + " ]\n\n") + + # This method can be called multiple times, so + # reset the match count for the next call. Don't + # reset it at the beginning of this method, since + # that would lose modfications from the addCP + # method. + self.mlen = 0 + # # private interface # def getInstallationStatus(self,package): - installed_package = self.vartree.dep_bestmatch(package) + installed_package = self._vardb.match(package) + if installed_package: + installed_package = installed_package[-1] + else: + installed_package = "" result = "" version = self.getVersion(installed_package,search.VERSION_RELEASE) if len(version) > 0: @@ -391,4 +448,3 @@ class search(object): else: result = "" return result - -- 2.0.4