From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id 97D4A59CAF for ; Mon, 4 Apr 2016 05:03:44 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id E0C1D21C0F4; Mon, 4 Apr 2016 05:03:34 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id A301721C0F2 for ; Mon, 4 Apr 2016 05:03:28 +0000 (UTC) Received: from localhost.localdomain (ip68-5-185-102.oc.oc.cox.net [68.5.185.102]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-SHA256 (128/128 bits)) (No client certificate requested) (Authenticated sender: zmedico) by smtp.gentoo.org (Postfix) with ESMTPSA id C1864340C90; Mon, 4 Apr 2016 05:03:26 +0000 (UTC) From: Zac Medico To: gentoo-portage-dev@lists.gentoo.org Cc: Zac Medico Subject: [gentoo-portage-dev] [PATCH] emerge: add --search-fuzzy and --search-fuzzy-cutoff options (bug 65566) Date: Sun, 3 Apr 2016 22:03:02 -0700 Message-Id: <1459746182-13420-1-git-send-email-zmedico@gentoo.org> X-Mailer: git-send-email 2.7.4 Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Archives-Salt: 99f0ef1b-2c20-43a8-a5f3-5a12941d18d8 X-Archives-Hash: 8994f6f733d4b49a08721526531a7867 Add --search-fuzzy option, with adjustable similarity ratio cutoff that defaults to 0.8 (80% similarity). X-Gentoo-bug: 65566 X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=65566 --- man/emerge.1 | 13 ++++++++++++- pym/_emerge/actions.py | 6 ++++-- pym/_emerge/main.py | 32 +++++++++++++++++++++++++++++++- pym/_emerge/search.py | 25 +++++++++++++++++++++++-- 4 files changed, 70 insertions(+), 6 deletions(-) diff --git a/man/emerge.1 b/man/emerge.1 index bfa2f73..2727ccb 100644 --- a/man/emerge.1 +++ b/man/emerge.1 @@ -1,4 +1,4 @@ -.TH "EMERGE" "1" "Feb 2016" "Portage VERSION" "Portage" +.TH "EMERGE" "1" "Apr 2016" "Portage VERSION" "Portage" .SH "NAME" emerge \- Command\-line interface to the Portage system .SH "SYNOPSIS" @@ -854,6 +854,17 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support \fBHDEPEND\fR, this option affects only the former. .TP +.BR "\-\-search\-fuzzy [ y | n ]" +Enable or disable fuzzy search for search actions. +.TP +.BR "\-\-search\-fuzzy\-cutoff CUTOFF" +Set similarity ratio cutoff (a floating-point number between 0 and 1). +Results with similarity ratios lower than the cutoff are discarded. +This option has no effect unless the \fB\-\-search\-fuzzy\fR option +is enabled. +.br +Defaults to 0.8 (80% similarity). +.TP .BR "\-\-search\-index < y | n >" Enable or disable indexed search for search actions. This option is enabled by default. The search index needs to be regenerated by diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py index 59626ad..caae79a 100644 --- a/pym/_emerge/actions.py +++ b/pym/_emerge/actions.py @@ -1,4 +1,4 @@ -# Copyright 1999-2015 Gentoo Foundation +# Copyright 1999-2016 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 from __future__ import division, print_function, unicode_literals @@ -1955,7 +1955,9 @@ def action_search(root_config, myopts, myfiles, spinner): spinner, "--searchdesc" in myopts, "--quiet" not in myopts, "--usepkg" in myopts, "--usepkgonly" in myopts, - search_index = myopts.get("--search-index", "y") != "n") + search_index=myopts.get("--search-index", "y") != "n", + fuzzy=myopts.get("--search-fuzzy", False), + fuzzy_cutoff=myopts.get("--search-fuzzy-cutoff")) for mysearch in myfiles: try: searchinstance.execute(mysearch) diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py index 5dbafee..06c385e 100644 --- a/pym/_emerge/main.py +++ b/pym/_emerge/main.py @@ -1,4 +1,4 @@ -# Copyright 1999-2015 Gentoo Foundation +# Copyright 1999-2016 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 from __future__ import print_function @@ -156,6 +156,7 @@ def insert_optional_args(args): '--rebuild-if-unbuilt' : y_or_n, '--rebuilt-binaries' : y_or_n, '--root-deps' : ('rdeps',), + '--search-fuzzy' : y_or_n, '--select' : y_or_n, '--selective' : y_or_n, "--use-ebuild-visibility": y_or_n, @@ -647,6 +648,16 @@ def parse_opts(tmpcmdline, silent=False): "choices" :("True", "rdeps") }, + "--search-fuzzy": { + "help": "Enable or disable fuzzy search", + "choices": true_y_or_n + }, + + "--search-fuzzy-cutoff": { + "help": "Set similarity ratio cutoff (a floating-point number between 0 and 1)", + "action": "store" + }, + "--search-index": { "help": "Enable or disable indexed search (enabled by default)", "choices": y_or_n @@ -908,6 +919,11 @@ def parse_opts(tmpcmdline, silent=False): if myoptions.root_deps in true_y: myoptions.root_deps = True + if myoptions.search_fuzzy in true_y: + myoptions.search_fuzzy = True + else: + myoptions.search_fuzzy = None + if myoptions.select in true_y: myoptions.select = True myoptions.oneshot = False @@ -1000,6 +1016,20 @@ def parse_opts(tmpcmdline, silent=False): myoptions.rebuilt_binaries_timestamp = rebuilt_binaries_timestamp + if myoptions.search_fuzzy_cutoff: + try: + fuzzy_cutoff = float(myoptions.search_fuzzy_cutoff) + except ValueError: + fuzzy_cutoff = 0.0 + + if fuzzy_cutoff <= 0.0: + fuzzy_cutoff = None + if not silent: + parser.error("Invalid --search-fuzzy-cutoff parameter: '%s'\n" % \ + (myoptions.search_fuzzy_cutoff,)) + + myoptions.search_fuzzy_cutoff = fuzzy_cutoff + if myoptions.use_ebuild_visibility in true_y: myoptions.use_ebuild_visibility = True else: diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py index 32d326e..3210854 100644 --- a/pym/_emerge/search.py +++ b/pym/_emerge/search.py @@ -1,8 +1,9 @@ -# Copyright 1999-2015 Gentoo Foundation +# Copyright 1999-2016 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 from __future__ import unicode_literals +import difflib import re import portage from portage import os @@ -28,7 +29,8 @@ class search(object): # public interface # def __init__(self, root_config, spinner, searchdesc, - verbose, usepkg, usepkgonly, search_index=True): + verbose, usepkg, usepkgonly, search_index=True, + fuzzy=False, fuzzy_cutoff=None): """Searches the available and installed packages for the supplied search key. The list of available and installed packages is created at object instantiation. This makes successive searches faster.""" @@ -42,6 +44,8 @@ class search(object): self.spinner = None self.root_config = root_config self.setconfig = root_config.setconfig + self.fuzzy = fuzzy + self.fuzzy_cutoff = 0.8 if fuzzy_cutoff is None else fuzzy_cutoff self.matches = {"pkg" : []} self.mlen = 0 @@ -248,11 +252,26 @@ class search(object): if self.searchkey.startswith('@'): match_category = 1 self.searchkey = self.searchkey[1:] + fuzzy = False if regexsearch: self.searchre=re.compile(self.searchkey,re.I) else: self.searchre=re.compile(re.escape(self.searchkey), re.I) + # Fuzzy search does not support regular expressions, therefore + # it is disabled for regular expression searches. + if self.fuzzy: + fuzzy = True + cutoff = self.fuzzy_cutoff + seq_match = difflib.SequenceMatcher() + seq_match.set_seq2(self.searchkey.lower()) + + def fuzzy_search(match_string): + seq_match.set_seq1(match_string.lower()) + return (seq_match.real_quick_ratio() >= cutoff and + seq_match.quick_ratio() >= cutoff and + seq_match.ratio() >= cutoff) + for package in self._cp_all(): self._spinner_update() @@ -280,6 +299,8 @@ class search(object): continue yield ("desc", package) + elif fuzzy and fuzzy_search(match_string): + yield ("pkg", package) self.sdict = self.setconfig.getSets() for setname in self.sdict: -- 2.7.4