From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id C006D138A1D for ; Sat, 1 Nov 2014 23:04:59 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id D8773E10EE; Sat, 1 Nov 2014 23:04:57 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 47888E10ED for ; Sat, 1 Nov 2014 23:04:57 +0000 (UTC) Received: from localhost.localdomain (ip70-181-96-121.oc.oc.cox.net [70.181.96.121]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) (Authenticated sender: zmedico) by smtp.gentoo.org (Postfix) with ESMTPSA id 78F37340437; Sat, 1 Nov 2014 23:04:55 +0000 (UTC) From: Zac Medico To: gentoo-portage-dev@lists.gentoo.org Cc: Zac Medico Subject: [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option. Date: Sat, 1 Nov 2014 16:04:50 -0700 Message-Id: <1414883090-20554-1-git-send-email-zmedico@gentoo.org> X-Mailer: git-send-email 2.0.4 In-Reply-To: <1414881983-19877-6-git-send-email-zmedico@gentoo.org> References: <1414881983-19877-6-git-send-email-zmedico@gentoo.org> Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Archives-Salt: 00dd4594-ee68-4e87-9b27-9c697d359c63 X-Archives-Hash: fe49953b3a03db72e7076cc14cbe8709 The new emerge --search-index option, which is enabled by default, causes pkg_desc_index to be used for search optimization. The search index needs to be regenerated by egencache after changes are made to a repository (see the --update-pkg-desc-index action). For users that would like to modify ebuilds in a repository without running egencache afterwards, emerge --search-index=n can be used to get non-indexed search. Alternatively, the user could simply remove the stale index file, in order to disable the search index for a particular repository. In order to conserve memory, indices are read as streams, and MultiIterGroupBy is used to group results from IndexedPortdb and IndexedVardb. Stream-oriented search also makes it possible to display search results incrementally (fixing bug #412471). X-Gentoo-Bug: 525718 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 --- This updated patch fixes the search.output method to reset the match count after each search is performed, in case there are multiple searches. man/emerge.1 | 8 ++++ pym/_emerge/actions.py | 3 +- pym/_emerge/depgraph.py | 2 +- pym/_emerge/main.py | 5 ++ pym/_emerge/search.py | 119 ++++++++++++++++++++++++++++++++++-------------- 5 files changed, 102 insertions(+), 35 deletions(-) diff --git a/man/emerge.1 b/man/emerge.1 index bbe71ac..7bcdd9a 100644 --- a/man/emerge.1 +++ b/man/emerge.1 @@ -796,6 +796,14 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support \fBHDEPEND\fR, this option affects only the former. .TP +.BR "\-\-search\-index < y | n >" +Enable or disable indexed search for search actions. This option is +enabled by default. The search index needs to be regenerated by +\fBegencache\fR(1) after changes are made to a repository (see the +\fB\-\-update\-pkg\-desc\-index\fR action). This setting can be added +to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later +overridden via the command line. +.TP .BR "\-\-select [ y | n ] (\-w short option)" Add specified packages to the world set (inverse of \fB\-\-oneshot\fR). This is useful if you want to diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py index 48b0826..8a22ab5 100644 --- a/pym/_emerge/actions.py +++ b/pym/_emerge/actions.py @@ -2015,7 +2015,8 @@ def action_search(root_config, myopts, myfiles, spinner): searchinstance = search(root_config, spinner, "--searchdesc" in myopts, "--quiet" not in myopts, "--usepkg" in myopts, - "--usepkgonly" in myopts) + "--usepkgonly" in myopts, + search_index = myopts.get("--search-index", "y") != "n") for mysearch in myfiles: try: searchinstance.execute(mysearch) diff --git a/pym/_emerge/depgraph.py b/pym/_emerge/depgraph.py index 78b9236..2fbb7ce 100644 --- a/pym/_emerge/depgraph.py +++ b/pym/_emerge/depgraph.py @@ -8596,7 +8596,7 @@ def ambiguous_package_name(arg, atoms, root_config, spinner, myopts): s = search(root_config, spinner, "--searchdesc" in myopts, "--quiet" not in myopts, "--usepkg" in myopts, - "--usepkgonly" in myopts) + "--usepkgonly" in myopts, search_index = False) null_cp = portage.dep_getkey(insert_category_into_atom( arg, "null")) cat, atom_pn = portage.catsplit(null_cp) diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py index cf7966c..c08e12a 100644 --- a/pym/_emerge/main.py +++ b/pym/_emerge/main.py @@ -616,6 +616,11 @@ def parse_opts(tmpcmdline, silent=False): "choices" :("True", "rdeps") }, + "--search-index": { + "help": "Enable or disable indexed search (enabled by default)", + "choices": y_or_n + }, + "--select": { "shortopt" : "-w", "help" : "add specified packages to the world set " + \ diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py index 4b0fd9f..1d710ee 100644 --- a/pym/_emerge/search.py +++ b/pym/_emerge/search.py @@ -7,9 +7,12 @@ import re import portage from portage import os from portage.dbapi.porttree import _parse_uri_map +from portage.dbapi.IndexedPortdb import IndexedPortdb +from portage.dbapi.IndexedVardb import IndexedVardb from portage.localization import localized_size from portage.output import bold, bold as white, darkgreen, green, red from portage.util import writemsg_stdout +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy from _emerge.Package import Package @@ -25,15 +28,17 @@ class search(object): # public interface # def __init__(self, root_config, spinner, searchdesc, - verbose, usepkg, usepkgonly): + verbose, usepkg, usepkgonly, search_index = True): """Searches the available and installed packages for the supplied search key. The list of available and installed packages is created at object instantiation. This makes successive searches faster.""" self.settings = root_config.settings - self.vartree = root_config.trees["vartree"] - self.spinner = spinner self.verbose = verbose self.searchdesc = searchdesc + self.searchkey = None + # Disable the spinner since search results are displayed + # incrementally. + self.spinner = None self.root_config = root_config self.setconfig = root_config.setconfig self.matches = {"pkg" : []} @@ -45,6 +50,10 @@ class search(object): bindb = root_config.trees["bintree"].dbapi vardb = root_config.trees["vartree"].dbapi + if search_index: + portdb = IndexedPortdb(portdb) + vardb = IndexedVardb(vardb) + if not usepkgonly and portdb._have_root_eclass_dir: self._dbs.append(portdb) @@ -53,16 +62,23 @@ class search(object): self._dbs.append(vardb) self._portdb = portdb + self._vardb = vardb def _spinner_update(self): if self.spinner: self.spinner.update() def _cp_all(self): - cp_all = set() + iterators = [] for db in self._dbs: - cp_all.update(db.cp_all()) - return list(sorted(cp_all)) + i = db.cp_all() + try: + i = iter(i) + except TypeError: + pass + iterators.append(i) + for group in MultiIterGroupBy(iterators): + yield group[0] def _aux_get(self, *args, **kwargs): for db in self._dbs: @@ -97,7 +113,7 @@ class search(object): return {} def _visible(self, db, cpv, metadata): - installed = db is self.vartree.dbapi + installed = db is self._vardb built = installed or db is not self._portdb pkg_type = "ebuild" if installed: @@ -171,8 +187,11 @@ class search(object): def execute(self,searchkey): """Performs the search for the supplied search key""" + self.searchkey = searchkey + + def _iter_search(self): + match_category = 0 - self.searchkey=searchkey self.packagematches = [] if self.searchdesc: self.searchdesc=1 @@ -181,6 +200,7 @@ class search(object): self.searchdesc=0 self.matches = {"pkg":[], "set":[]} print("Searching... ", end=' ') + print() regexsearch = False if self.searchkey.startswith('%'): @@ -206,8 +226,24 @@ class search(object): if self.searchre.search(match_string): if not self._xmatch("match-visible", package): masked=1 - self.matches["pkg"].append([package,masked]) + yield ("pkg", package, masked) elif self.searchdesc: # DESCRIPTION searching + # Check for DESCRIPTION match first, so that we can skip + # the expensive visiblity check if it doesn't match. + full_package = self._xmatch("match-all", package) + if not full_package: + continue + full_package = full_package[-1] + try: + full_desc = self._aux_get( + full_package, ["DESCRIPTION"])[0] + except KeyError: + portage.writemsg( + "emerge: search: aux_get() failed, skipping\n", + noiselevel=-1) + continue + if not self.searchre.search(full_desc): + continue full_package = self._xmatch("bestmatch-visible", package) if not full_package: #no match found; we don't want to query description @@ -217,14 +253,8 @@ class search(object): continue else: masked=1 - try: - full_desc = self._aux_get( - full_package, ["DESCRIPTION"])[0] - except KeyError: - print("emerge: search: aux_get() failed, skipping") - continue - if self.searchre.search(full_desc): - self.matches["desc"].append([full_package,masked]) + + yield ("desc", full_package, masked) self.sdict = self.setconfig.getSets() for setname in self.sdict: @@ -235,16 +265,11 @@ class search(object): match_string = setname.split("/")[-1] if self.searchre.search(match_string): - self.matches["set"].append([setname, False]) + yield ("set", setname, False) elif self.searchdesc: if self.searchre.search( self.sdict[setname].getMetadata("DESCRIPTION")): - self.matches["set"].append([setname, False]) - - self.mlen=0 - for mtype in self.matches: - self.matches[mtype].sort() - self.mlen += len(self.matches[mtype]) + yield ("set", setname, False) def addCP(self, cp): if not self._xmatch("match-all", cp): @@ -257,17 +282,32 @@ class search(object): def output(self): """Outputs the results of the search.""" - msg = [] + + class msg(object): + @staticmethod + def append(msg): + writemsg_stdout(msg, noiselevel=-1) + msg.append("\b\b \n[ Results for search key : " + \ bold(self.searchkey) + " ]\n") - msg.append("[ Applications found : " + \ - bold(str(self.mlen)) + " ]\n\n") - vardb = self.vartree.dbapi + vardb = self._vardb metadata_keys = set(Package.metadata_keys) metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"]) metadata_keys = tuple(metadata_keys) - for mtype in self.matches: - for match,masked in self.matches[mtype]: + + if self.searchkey is None: + # Handle results added via addCP + addCP_matches = [] + for mytype, (match, masked) in self.matches.items(): + addCP_matches.append(mytype, match, masked) + iterator = iter(addCP_matches) + + else: + # Do a normal search + iterator = self._iter_search() + + for mtype, match, masked in iterator: + self.mlen += 1 full_package = None if mtype == "pkg": full_package = self._xmatch( @@ -367,12 +407,26 @@ class search(object): + " " + desc + "\n") msg.append(" " + darkgreen("License:") + \ " " + license + "\n\n") - writemsg_stdout(''.join(msg), noiselevel=-1) + + msg.append("[ Applications found : " + \ + bold(str(self.mlen)) + " ]\n\n") + + # This method can be called multiple times, so + # reset the match count for the next call. Don't + # reset it at the beginning of this method, since + # that would lose modfications from the addCP + # method. + self.mlen = 0 + # # private interface # def getInstallationStatus(self,package): - installed_package = self.vartree.dep_bestmatch(package) + installed_package = self._vardb.match(package) + if installed_package: + installed_package = installed_package[-1] + else: + installed_package = "" result = "" version = self.getVersion(installed_package,search.VERSION_RELEASE) if len(version) > 0: @@ -391,4 +445,3 @@ class search(object): else: result = "" return result - -- 2.0.4