* [gentoo-commits] proj/portage:master commit in: pym/portage/dbapi/, /, pym/_emerge/
@ 2017-10-14 4:49 Zac Medico
0 siblings, 0 replies; only message in thread
From: Zac Medico @ 2017-10-14 4:49 UTC (permalink / raw
To: gentoo-commits
commit: 8b1f9dbd925ccf9c23909116c56eaa4d4f996474
Author: Daniel Robbins <drobbins <AT> funtoo <DOT> org>
AuthorDate: Fri Oct 13 21:33:19 2017 +0000
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sat Oct 14 04:44:29 2017 +0000
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=8b1f9dbd
portdbapi: cache catpkg to repository mappings
In order to avoid performance problems as the number
of repositories increases, use a cache of catpkg to
repository mappings to optimize findname2, cp_list,
and getRepositories methods.
Bug: https://bugs.gentoo.org/634210
Closes: https://github.com/gentoo/portage/pull/218
NEWS | 7 +++
RELEASE-NOTES | 6 +++
pym/_emerge/depgraph.py | 4 +-
pym/portage/dbapi/porttree.py | 103 +++++++++++++++++++++++++++++++++++++-----
4 files changed, 107 insertions(+), 13 deletions(-)
diff --git a/NEWS b/NEWS
index 60a436522..c773530e3 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,12 @@
News (mainly features/major bug fixes)
+portage-2.3.12
+----------------
+* better_cache implemented to use less expensive os.listdir() instead of
+ os.stat() operations to scan for ebuilds. Avoids exhaustively scanning
+ overlays for all ebuilds which allows Portage to not slow down significantly
+ with lots of overlays enabled. (Daniel Robbins)
+
portage-2.3.7
-----------------
* eapply_user combines patch basenames from all matched directories into a
diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index 81c54e550..749322aaa 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -1,6 +1,12 @@
Release Notes; upgrade information mainly.
Features/major bugfixes are listed in NEWS
+portage-2.3.12
+==================================
+* Bug Fixes:
+ - Bug 634210 optimize portdbapi performance to handle large numbers
+ of repositories (Daniel Robbins)
+
portage-2.3.11
==================================
* Bug Fixes:
diff --git a/pym/_emerge/depgraph.py b/pym/_emerge/depgraph.py
index 751111fb3..f54acdc26 100644
--- a/pym/_emerge/depgraph.py
+++ b/pym/_emerge/depgraph.py
@@ -5039,7 +5039,7 @@ class depgraph(object):
if atom.soname:
repo_list = [None]
elif atom.repo is None and hasattr(db, "getRepositories"):
- repo_list = db.getRepositories()
+ repo_list = db.getRepositories(catpkg=atom.cp)
else:
repo_list = [atom.repo]
@@ -5490,7 +5490,7 @@ class depgraph(object):
atom_set = InternalPackageSet(initial_atoms=(atom,),
allow_repo=True)
if atom.repo is None and hasattr(db, "getRepositories"):
- repo_list = db.getRepositories()
+ repo_list = db.getRepositories(catpkg=atom_exp.cp)
else:
repo_list = [atom.repo]
diff --git a/pym/portage/dbapi/porttree.py b/pym/portage/dbapi/porttree.py
index a3254d017..53edcd18f 100644
--- a/pym/portage/dbapi/porttree.py
+++ b/pym/portage/dbapi/porttree.py
@@ -43,6 +43,8 @@ import os as _os
import sys
import traceback
import warnings
+import errno
+import collections
try:
from urllib.parse import urlparse
@@ -253,6 +255,7 @@ class portdbapi(dbapi):
"RESTRICT", "SLOT", "DEFINED_PHASES", "REQUIRED_USE"])
self._aux_cache = {}
+ self._better_cache = None
self._broken_ebuilds = set()
@property
@@ -342,12 +345,21 @@ class portdbapi(dbapi):
except KeyError:
return None
- def getRepositories(self):
+ def getRepositories(self, catpkg=None):
"""
- This function is required for GLEP 42 compliance; it will return a list of
- repository IDs
- TreeMap = {id: path}
+ With catpkg=None, this will return a complete list of repositories in this dbapi. With catpkg set to a value,
+ this method will return a short-list of repositories that contain this catpkg. Use this second approach if
+ possible, to avoid exhaustively searching all repos for a particular catpkg. It's faster for this method to
+ find the catpkg than for you do it yourself.
+
+ This function is required for GLEP 42 compliance.
+
+ @param catpkg: catpkg for which we want a list of repositories; we'll get a list of all repos containing this
+ catpkg; if None, return a list of all Repositories that contain a particular catpkg.
+ @return: a list of repositories.
"""
+ if catpkg is not None and self._better_cache is not None and catpkg in self._better_cache:
+ return [repo.name for repo in self._better_cache[catpkg]]
return self._ordered_repo_name_list
def getMissingRepoNames(self):
@@ -363,7 +375,7 @@ class portdbapi(dbapi):
"""
return self.settings.repositories.ignored_repos
- def findname2(self, mycpv, mytree=None, myrepo = None):
+ def findname2(self, mycpv, mytree=None, myrepo=None):
"""
Returns the location of the CPV, and what overlay it was in.
Searches overlays first, then PORTDIR; this allows us to return the first
@@ -385,16 +397,33 @@ class portdbapi(dbapi):
if psplit is None or len(mysplit) != 2:
raise InvalidPackageName(mycpv)
+ try:
+ cp = mycpv.cp
+ except AttributeError:
+ cp = mysplit[0] + "/" + psplit[0]
+
+ if self._better_cache is None:
+ if mytree:
+ mytrees = [mytree]
+ else:
+ mytrees = reversed(self.porttrees)
+ else:
+ try:
+ repos = self._better_cache[cp]
+ except KeyError:
+ return (None, 0)
+
+ mytrees = []
+ for repo in repos:
+ if mytree is not None and mytree != repo.location:
+ continue
+ mytrees.append(repo.location)
+
# For optimal performace in this hot spot, we do manual unicode
# handling here instead of using the wrapped os module.
encoding = _encodings['fs']
errors = 'strict'
- if mytree:
- mytrees = [mytree]
- else:
- mytrees = reversed(self.porttrees)
-
relative_path = mysplit[0] + _os.sep + psplit[0] + _os.sep + \
mysplit[1] + ".ebuild"
@@ -764,8 +793,15 @@ class portdbapi(dbapi):
else:
# assume it's iterable
mytrees = mytree
- else:
+ elif self._better_cache is None:
mytrees = self.porttrees
+ else:
+ try:
+ repos = self._better_cache[mycp]
+ except KeyError:
+ mytrees = []
+ else:
+ mytrees = [repo.location for repo in repos]
for oroot in mytrees:
try:
file_list = os.listdir(os.path.join(oroot, mycp))
@@ -814,10 +850,55 @@ class portdbapi(dbapi):
"minimum-all-ignore-profile", "minimum-visible"):
self.xcache[x]={}
self.frozen=1
+ self._better_cache = better_cache = collections.defaultdict(list)
+
+ # The purpose of self._better_cache is to perform an initial quick scan of all repositories
+ # using os.listdir(), which is less expensive IO-wise than exhaustively doing a stat on each
+ # repo. self._better_cache stores a list of repos in which particular catpkgs appear.
+ #
+ # For example, better_cache data may look like this:
+ #
+ # { "sys-apps/portage" : [ repo1, repo2 ] }
+ #
+ # Without this tweak, Portage will get slower and slower as more overlays are added.
+ #
+ # Also note that it is OK if this cache has some 'false positive' catpkgs in it. We use it
+ # to search for specific catpkgs listed in ebuilds. The likelihood of a false positive catpkg
+ # in our cache causing a problem is extremely low. Thus, the code below is optimized for
+ # speed rather than painstaking correctness.
+
+ valid_categories = self.settings.categories
+ for repo_loc in reversed(self.porttrees):
+ repo = self.repositories.get_repo_for_location(repo_loc)
+ try:
+ categories = os.listdir(repo_loc)
+ except OSError as e:
+ if e.errno not in (errno.ENOTDIR, errno.ENOENT, errno.ESTALE):
+ raise
+ continue
+
+ for cat in categories:
+ if cat not in valid_categories:
+ continue
+ cat_dir = repo_loc + "/" + cat
+ try:
+ pkg_list = os.listdir(cat_dir)
+ except OSError as e:
+ if e.errno != errno.ENOTDIR:
+ raise
+ continue
+
+ for p in pkg_list:
+ catpkg_dir = cat_dir + "/" + p
+ if not os.path.isdir(catpkg_dir):
+ continue
+ catpkg = cat + "/" + p
+ better_cache[catpkg].append(repo)
def melt(self):
self.xcache = {}
self._aux_cache = {}
+ self._better_cache = None
self.frozen = 0
def xmatch(self,level,origdep,mydep=None,mykey=None,mylist=None):
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2017-10-14 4:49 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-14 4:49 [gentoo-commits] proj/portage:master commit in: pym/portage/dbapi/, /, pym/_emerge/ Zac Medico
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox