From: Brian Dolbec <dolsen@gentoo.org>
To: gentoo-portage-dev@lists.gentoo.org
Subject: Re: [gentoo-portage-dev] Re: [PATCH] emerge --search: use description index
Date: Thu, 23 Oct 2014 01:55:15 -0700 [thread overview]
Message-ID: <20141023015515.26ca0c7e.dolsen@gentoo.org> (raw)
In-Reply-To: <54443249.1090901@gentoo.org>
On Sun, 19 Oct 2014 14:51:05 -0700
Zac Medico <zmedico@gentoo.org> wrote:
> This updated patch changes the index format to use spaces instead of
> commas, for readability. This example given in man/portage.5:
>
> sys-apps/sed 4.2 4.2.1 4.2.1-r1 4.2.2: Super-useful stream editor
> sys-apps/usleep 0.1: A wrapper for usleep
>
> Hopeful057,6 +1086,12 @@ def egencache_main(args):
>
...
/snip
All the above looks good to me
> b/pym/_emerge/search.py index 4b0fd9f..37fee20 100644
> --- a/pym/_emerge/search.py
> +++ b/pym/_emerge/search.py
> @@ -3,13 +3,17 @@
>
> from __future__ import print_function
>
> +import io
> import re
> import portage
> -from portage import os
> +from portage import os, _encodings
> from portage.dbapi.porttree import _parse_uri_map
> +from portage.dep import Atom
> +from portage.exception import InvalidAtom, InvalidData
> from portage.localization import localized_size
> from portage.output import bold, bold as white, darkgreen, green,
> red from portage.util import writemsg_stdout
> +from portage.versions import _pkg_str
>
> from _emerge.Package import Package
>
> @@ -25,12 +29,11 @@ class search(object):
> # public interface
> #
> def __init__(self, root_config, spinner, searchdesc,
> - verbose, usepkg, usepkgonly):
> + verbose, usepkg, usepkgonly, search_index = True):
> """Searches the available and installed packages for
> the supplied search key. The list of available and installed packages
> is created at object instantiation. This makes successive searches
> faster.""" self.settings = root_config.settings
> - self.vartree = root_config.trees["vartree"]
> self.spinner = spinner
> self.verbose = verbose
> self.searchdesc = searchdesc
> @@ -45,6 +48,10 @@ class search(object):
> bindb = root_config.trees["bintree"].dbapi
> vardb = root_config.trees["vartree"].dbapi
>
> + if search_index:
> + portdb = IndexedPortdb(portdb)
> + vardb = IndexedVardb(vardb)
> +
> if not usepkgonly and portdb._have_root_eclass_dir:
> self._dbs.append(portdb)
>
> @@ -53,6 +60,7 @@ class search(object):
>
> self._dbs.append(vardb)
> self._portdb = portdb
> + self._vardb = vardb
>
> def _spinner_update(self):
> if self.spinner:
> @@ -97,7 +105,7 @@ class search(object):
> return {}
>
> def _visible(self, db, cpv, metadata):
> - installed = db is self.vartree.dbapi
> + installed = db is self._vardb
> built = installed or db is not self._portdb
> pkg_type = "ebuild"
> if installed:
> @@ -208,6 +216,22 @@ class search(object):
> masked=1
> self.matches["pkg"].append([package,masked])
> elif self.searchdesc: # DESCRIPTION searching
> + # Check for DESCRIPTION match first,
> so that we can skip
> + # the expensive visiblity check if
> it doesn't match.
> + full_package =
> self._xmatch("match-all", package)
> + if not full_package:
> + continue
> + full_package = full_package[-1]
> + try:
> + full_desc = self._aux_get(
> + full_package,
> ["DESCRIPTION"])[0]
> + except KeyError:
> + portage.writemsg(
> + "emerge: search:
> aux_get() failed, skipping\n",
> + noiselevel=-1)
> + continue
> + if not
> self.searchre.search(full_desc):
> + continue
> full_package =
> self._xmatch("bestmatch-visible", package) if not full_package:
> #no match found; we don't
> want to query description @@ -217,14 +241,8 @@ class search(object):
> continue
> else:
> masked=1
> - try:
> - full_desc = self._aux_get(
> - full_package,
> ["DESCRIPTION"])[0]
> - except KeyError:
> - print("emerge: search:
> aux_get() failed, skipping")
> - continue
> - if self.searchre.search(full_desc):
> -
> self.matches["desc"].append([full_package,masked]) +
> +
> self.matches["desc"].append((full_package, masked))
> self.sdict = self.setconfig.getSets()
> for setname in self.sdict:
> @@ -262,7 +280,7 @@ class search(object):
> bold(self.searchkey) + " ]\n")
> msg.append("[ Applications found : " + \
> bold(str(self.mlen)) + " ]\n\n")
> - vardb = self.vartree.dbapi
> + vardb = self._vardb
> metadata_keys = set(Package.metadata_keys)
> metadata_keys.update(["DESCRIPTION", "HOMEPAGE",
> "LICENSE", "SRC_URI"]) metadata_keys = tuple(metadata_keys)
> @@ -372,7 +390,11 @@ class search(object):
> # private interface
> #
> def getInstallationStatus(self,package):
> - installed_package =
> self.vartree.dep_bestmatch(package)
> + installed_package = self._vardb.match(package)
> + if installed_package:
> + installed_package = installed_package[-1]
> + else:
> + installed_package = ""
> result = ""
> version =
> self.getVersion(installed_package,search.VERSION_RELEASE) if
> len(version) > 0: @@ -392,3 +414,160 @@ class search(object):
> result = ""
> return result
>
What I wonder, is why the following two classes aren't in the portage
namespace. There is far too much logic embedded in the _emerge
namespace. Most probably under the portage/dpapi subpkg. Looking at
them, they do look very similar to the portdbapi and vardbapi classes.
They are just stripped down and optimised for this data. They also
don't seem to use any _emerge specific namespace modules that I saw.
Perhaps with a file name of index.py or indexers.py
> +
> +class IndexedPortdb(object):
> + """
> + A portdbapi interface that uses a package description index
> to
*** See ^^^ even the second word of the class description seems
to agree with me :)
> + improve performance. If the description index is missing for
> a
> + particular repository, then all metadata for that repository
> is
> + obtained using the normal pordbapi.aux_get method.
> + """
> + def __init__(self, portdb):
> + self._portdb = portdb
> + self.cpv_exists = portdb.cpv_exists
> + self.getFetchMap = portdb.getFetchMap
> + self.findname = portdb.findname
> + self._aux_cache_keys = portdb._aux_cache_keys
> + self._have_root_eclass_dir =
> portdb._have_root_eclass_dir
> + self._cpv_sort_ascending = portdb._cpv_sort_ascending
> + self._desc_cache = None
> + self._cp_map = None
> +
> + def _init_index(self):
> + cp_map = {}
> + desc_cache = {}
> + for repo_path in self._portdb.porttrees:
> + outside_repo =
> os.path.join(self._portdb.depcachedir,
> + repo_path.lstrip(os.sep))
> + for parent_dir in (repo_path, outside_repo):
> + file_path = os.path.join(parent_dir,
> + "metadata", "pkg_desc_index")
> +
> + try:
> + with io.open(file_path,
> +
> encoding=_encodings["repo.content"]) as f:
> + for line in f:
> + try:
> +
> pkgs, desc = line.split(":", 1)
> + except
> ValueError:
> +
> continue
> + desc =
> desc.strip()
> + try:
> + cp,
> pkgs = pkgs.split(" ", 1)
> + except
> ValueError:
> +
> continue
> + if not cp:
> +
> continue
> + try:
> + atom
> = Atom(cp)
> + except
> InvalidAtom:
> +
> continue
> + if cp !=
> atom.cp:
> +
> continue
> + cp_list =
> cp_map.get(cp)
> + if cp_list
> is None:
> +
> cp_list = []
> +
> cp_map[cp] = cp_list
> + for ver in
> pkgs.split():
> + try:
> +
> cpv = _pkg_str(cp + "-" + ver)
> +
> except InvalidData:
> +
> pass
> + else:
> +
> cp_list.append(cpv)
> +
> desc_cache[cpv] = desc
> + except IOError:
> + pass
> + else:
> + break
> + else:
> + # No descriptions index was found,
> so populate
> + # cp_map the slow way.
> + for cp in
> self._portdb.cp_all(trees=[repo_path]):
> + cp_list = cp_map.get(cp)
> + if cp_list is None:
> + cp_list = []
> + cp_map[cp] = cp_list
> + for cpv in
> self._portdb.cp_list(cp, mytree=repo_path):
> + if cpv not in
> cp_list:
> +
> cp_list.append(_pkg_str(cpv)) +
> + self._desc_cache = desc_cache
> + self._cp_map = cp_map
> +
> + def cp_all(self):
> + if self._cp_map is None:
> + self._init_index()
> + return list(self._cp_map)
> +
> + def match(self, atom):
> + if not isinstance(atom, Atom):
> + atom = Atom(atom)
> + cp_list = self._cp_map.get(atom.cp)
> + if cp_list is None:
> + return []
> + self._portdb._cpv_sort_ascending(cp_list)
> + return portage.match_from_list(atom, cp_list)
> +
> + def aux_get(self, cpv, attrs, myrepo = None):
> + if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
> + try:
> + return [self._desc_cache[cpv]]
> + except KeyError:
> + pass
> + return self._portdb.aux_get(cpv, attrs)
> +
> +
> +class IndexedVardb(object):
> + """
> + A vardbapi interface that sacrifices validation in order to
> + improve performance. It takes advantage of
> vardbdbapi._aux_cache,
> + which is backed by vdb_metadata.pickle. Since _aux_cache is
> + not updated for every single merge/unmerge (see
> + _aux_cache_threshold), the list of packages is obtained
> directly
> + from the real vardbapi instance. If a package is missing from
> + _aux_cache, then its metadata is obtained using the normal
> + (validated) vardbapi.aux_get method.
> + """
> + def __init__(self, vardb):
> + self._vardb = vardb
> + self._aux_cache_keys = vardb._aux_cache_keys
> + self._cpv_sort_ascending = vardb._cpv_sort_ascending
> + self._cp_map = {}
> + self.cpv_exists = vardb.cpv_exists
> +
> + def cp_all(self):
> + if self._cp_map:
> + return list(self._cp_map)
> + cp_map = self._cp_map
> + for cpv in self._vardb.cpv_all():
> + cp = portage.cpv_getkey(cpv)
> + if cp is not None:
> + cp_list = cp_map.get(cp)
> + if cp_list is None:
> + cp_list = []
> + cp_map[cp] = cp_list
> + cp_list.append(_pkg_str(cpv))
> + return list(cp_map)
> +
> + def match(self, atom):
> + if not isinstance(atom, Atom):
> + atom = Atom(atom)
> + cp_list = self._cp_map.get(atom.cp)
> + if cp_list is None:
> + return []
> + self._vardb._cpv_sort_ascending(cp_list)
> + return portage.match_from_list(atom, cp_list)
> +
> + def aux_get(self, cpv, attrs, myrepo = None):
> + pkg_data =
> self._vardb._aux_cache["packages"].get(cpv)
> + if not isinstance(pkg_data, tuple) or \
> + len(pkg_data) != 2 or \
> + not isinstance(pkg_data[1], dict):
> + pkg_data = None
> + if pkg_data is None:
> + # It may be missing from _aux_cache due to
> + # _aux_cache_threshold.
> + return self._vardb.aux_get(cpv, attrs)
> + metadata = pkg_data[1]
> + return [metadata.get(k, "") for k in attrs]
Otherwise it looks good.
--
Brian Dolbec <dolsen>
next prev parent reply other threads:[~2014-10-23 8:56 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-18 3:28 [gentoo-portage-dev] [PATCH] emerge --search: use description index Zac Medico
2014-10-18 5:59 ` [gentoo-portage-dev] " Zac Medico
2014-10-19 21:51 ` Zac Medico
2014-10-23 8:55 ` Brian Dolbec [this message]
2014-10-23 9:22 ` Zac Medico
2014-11-01 6:15 ` Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 1/5] Add egencache --update-pkg-desc-index action Zac Medico
2014-11-04 9:03 ` [gentoo-portage-dev] [PATCH 1/5 v2] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy Zac Medico
2014-11-02 0:18 ` Zac Medico
2014-11-02 22:50 ` [gentoo-portage-dev] [PATCH 2/5 v3] " Zac Medico
2014-11-03 3:07 ` [gentoo-portage-dev] [PATCH 2/5 v4] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 3/5] Add IndexedPortdb class Zac Medico
2014-11-04 5:07 ` [gentoo-portage-dev] [PATCH 3/5 v2] " Zac Medico
2014-11-04 20:34 ` [gentoo-portage-dev] [PATCH 3/5 v3] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 4/5] Add IndexedVardb class Zac Medico
2014-11-05 9:59 ` [gentoo-portage-dev] " Zac Medico
2014-11-07 8:45 ` [gentoo-portage-dev] [PATCH] Log changes between vdb_metadata.pickle updates Zac Medico
2014-11-07 16:51 ` Brian Dolbec
2014-11-07 20:17 ` Zac Medico
2014-11-08 9:16 ` [gentoo-portage-dev] [PATCH v2] " Zac Medico
2014-11-01 22:46 ` [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option Zac Medico
2014-11-01 23:04 ` Zac Medico
2014-11-04 5:42 ` [gentoo-portage-dev] [PATCH 5/5 v3] " Zac Medico
2014-11-04 9:10 ` [gentoo-portage-dev] " Zac Medico
2014-11-04 22:09 ` [gentoo-portage-dev] [PATCH 5/5 v4] " Zac Medico
2014-11-03 21:42 ` [gentoo-portage-dev] Brian Dolbec
2014-11-04 9:19 ` [gentoo-portage-dev] Zac Medico
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141023015515.26ca0c7e.dolsen@gentoo.org \
--to=dolsen@gentoo.org \
--cc=gentoo-portage-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox