* [gentoo-commits] proj/R_overlay:master commit in: roverlay/stats/, roverlay/overlay/pkgdir/manifest/, roverlay/, roverlay/util/, ...
@ 2013-07-31 21:10 André Erdmann
0 siblings, 0 replies; only message in thread
From: André Erdmann @ 2013-07-31 21:10 UTC (permalink / raw
To: gentoo-commits
commit: 86d046b36e0a8a882a4f5d01a8e1eee82bc335b5
Author: André Erdmann <dywi <AT> mailerd <DOT> de>
AuthorDate: Wed Jul 31 21:07:36 2013 +0000
Commit: André Erdmann <dywi <AT> mailerd <DOT> de>
CommitDate: Wed Jul 31 21:07:36 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=86d046b3
incremental ov-creation: postpone revbump check
The revbump check needs hashes for comparing. Postponing the hash calculation
and executing it in a thread pool may have speed advantages (needs some
testing). Additionally, it allows to explicitly disable revbump checks when
doing incremental overlay creation.
---
roverlay/main.py | 1 +
roverlay/overlay/creator.py | 113 +++++++++++++++++++++-
roverlay/overlay/pkgdir/manifest/file.py | 3 +
roverlay/overlay/pkgdir/packagedir_base.py | 23 ++++-
roverlay/overlay/pkgdir/packagedir_newmanifest.py | 2 +
roverlay/overlay/root.py | 34 ++++---
roverlay/stats/base.py | 16 ++-
roverlay/util/hashpool.py | 66 +++++++++++++
8 files changed, 232 insertions(+), 26 deletions(-)
diff --git a/roverlay/main.py b/roverlay/main.py
index 9cca0a8..7830d23 100644
--- a/roverlay/main.py
+++ b/roverlay/main.py
@@ -292,6 +292,7 @@ def main (
)
repo_list.add_packages ( overlay_creator.add_package )
+ overlay_creator.enqueue_postponed()
overlay_creator.release_package_rules()
diff --git a/roverlay/overlay/creator.py b/roverlay/overlay/creator.py
index c21617e..32dacbc 100644
--- a/roverlay/overlay/creator.py
+++ b/roverlay/overlay/creator.py
@@ -38,7 +38,7 @@ import roverlay.overlay.pkgdir.distroot.static
import roverlay.recipe.distmap
import roverlay.recipe.easyresolver
import roverlay.stats.collector
-
+import roverlay.util.hashpool
class OverlayCreator ( object ):
"""This is a 'R packages -> Overlay' interface."""
@@ -46,6 +46,8 @@ class OverlayCreator ( object ):
LOGGER = logging.getLogger ( 'OverlayCreator' )
STATS = roverlay.stats.collector.static.overlay_creation
+ HASHPOOL_WORKER_COUNT = 0
+
def __init__ ( self,
skip_manifest, incremental, immediate_ebuild_writes,
logger=None, allow_write=True
@@ -82,7 +84,8 @@ class OverlayCreator ( object ):
self.NUMTHREADS = config.get ( 'EBUILD.jobcount', 0 )
- self._pkg_queue = queue.Queue()
+ self._pkg_queue = queue.Queue()
+ self._pkg_queue_postponed = list()
self._err_queue.attach_queue ( self._pkg_queue, None )
self._workers = None
@@ -119,14 +122,18 @@ class OverlayCreator ( object ):
)
# --- end of _get_resolver_channel (...) ---
- def add_package ( self, package_info ):
+ def add_package ( self, package_info, allow_postpone=True ):
"""Adds a PackageInfo to the package queue.
arguments:
* package_info --
"""
if self.package_rules.apply_actions ( package_info ):
- if self.overlay.add ( package_info ):
+ add_result = self.overlay.add (
+ package_info, allow_postpone=allow_postpone
+ )
+
+ if add_result is True:
ejob = roverlay.ebuild.creation.EbuildCreation (
package_info,
depres_channel_spawner = self._get_resolver_channel,
@@ -134,13 +141,109 @@ class OverlayCreator ( object ):
)
self._pkg_queue.put ( ejob )
self.stats.pkg_queued.inc()
- else:
+
+ elif add_result is False:
self.stats.pkg_dropped.inc()
+
+ elif allow_postpone:
+ self.stats.pkg_queue_postponed.inc()
+ self._pkg_queue_postponed.append ( ( package_info, add_result ) )
+
+ else:
+ raise Exception ( "bad return from overlay.add()" )
+
else:
# else filtered out
self.stats.pkg_filtered.inc()
# --- end of add_package (...) ---
+ def discard_postponed ( self ):
+ self._pkg_queue_postponed [:] = []
+ # --- end of discard_postponed (...) ---
+
+ def enqueue_postponed ( self, prehash_manifest=False ):
+ # !!! prehash_manifest results in threaded calculation taking
+ # _more_ time than single-threaded. Postponed packages usually
+ # don't need a revbump, so the time penalty here likely does
+ # not outweigh the benefits (i.e. no need to recalculate on revbump)
+ #
+
+ if self._pkg_queue_postponed:
+ qtime = self.stats.queue_postponed_time
+
+ self.logger.info ( "Checking for packages that need a revbump" )
+
+ if self.HASHPOOL_WORKER_COUNT < 1:
+ pass
+
+ elif roverlay.util.hashpool.HAVE_CONCURRENT_FUTURES:
+ qtime.begin ( "setup_hashpool" )
+
+ # determine hashes that should be calculated here
+ if prehash_manifest:
+ # * calculate all hashes, not just the distmap one
+ # * assuming that all package dirs are of the same class/type
+ #
+ distmap_hash = PackageInfo.DISTMAP_DIGEST_TYPE
+ extra_hashes = self._pkg_queue_postponed[0][1]().HASH_TYPES
+
+ if not extra_hashes:
+ hashes = { distmap_hash, }
+ elif distmap_hash in extra_hashes:
+ hashes = extra_hashes
+ else:
+ hashes = extra_hashes | { distmap_hash, }
+ else:
+ hashes = { PackageInfo.DISTMAP_DIGEST_TYPE, }
+
+ my_hashpool = roverlay.util.hashpool.HashPool (
+ hashes, self.HASHPOOL_WORKER_COUNT
+ )
+
+ for p_info, pkgdir_ref in self._pkg_queue_postponed:
+ my_hashpool.add (
+ id ( p_info ),
+ p_info.get ( "package_file" ), p_info.hashdict
+ )
+
+ qtime.end ( "setup_hashpool" )
+
+ qtime.begin ( "make_hashes" )
+ my_hashpool.run()
+ qtime.end ( "make_hashes" )
+ else:
+ self.logger.warning (
+ "enqueue_postponed(): falling back to single-threaded variant."
+ )
+ # -- end if HAVE_CONCURRENT_FUTURES
+
+ qtime.begin ( "queue_packages" )
+ for p_info, pkgdir_ref in self._pkg_queue_postponed:
+ add_result = pkgdir_ref().add ( p_info, allow_postpone=False )
+
+ if add_result is True:
+ ejob = roverlay.ebuild.creation.EbuildCreation (
+ p_info,
+ depres_channel_spawner = self._get_resolver_channel,
+ err_queue = self._err_queue
+ )
+ self._pkg_queue.put ( ejob )
+ self.stats.pkg_queued.inc()
+
+ elif add_result is False:
+ self.stats.pkg_dropped.inc()
+
+ else:
+ raise Exception (
+ "enqueue_postponed() should not postpone packages further."
+ )
+ # -- end for
+ qtime.end ( "queue_packages" )
+
+ # clear list
+ self._pkg_queue_postponed[:] = []
+ # --- end of enqueue_postponed (...) ---
+
def write_overlay ( self ):
"""Writes the overlay."""
if self.overlay.writeable():
diff --git a/roverlay/overlay/pkgdir/manifest/file.py b/roverlay/overlay/pkgdir/manifest/file.py
index 9cf8425..f9b6b8d 100644
--- a/roverlay/overlay/pkgdir/manifest/file.py
+++ b/roverlay/overlay/pkgdir/manifest/file.py
@@ -26,6 +26,9 @@ class ManifestFile ( object ):
provided by hashlib (via roverlay.digest).
"""
+ # ref
+ HASH_TYPES = ManifestEntry.HASHTYPES
+
def __init__ ( self, root ):
self.root = root
self.filepath = root + os.path.sep + 'Manifest'
diff --git a/roverlay/overlay/pkgdir/packagedir_base.py b/roverlay/overlay/pkgdir/packagedir_base.py
index 5f54279..7d5f906 100644
--- a/roverlay/overlay/pkgdir/packagedir_base.py
+++ b/roverlay/overlay/pkgdir/packagedir_base.py
@@ -57,6 +57,11 @@ class PackageDirBase ( object ):
#
MANIFEST_THREADSAFE = None
+ # a set(!) of hash types which are used by the package dir
+ # implementation (if any, else None)
+ # other subsystems might calculate them in advance if advertised here
+ HASH_TYPES = None
+
# DOEBUILD_FETCH
# doebuild function that fetches $SRC_URI
# can be overridden by subclasses if e.g. on-the-fly manifest creation
@@ -171,7 +176,9 @@ class PackageDirBase ( object ):
return p
# --- end of _scan_add_package (...) ---
- def add ( self, package_info, add_if_physical=False, _lock=True ):
+ def add ( self,
+ package_info, add_if_physical=False, allow_postpone=False, _lock=True
+ ):
"""Adds a package to this PackageDir.
arguments:
@@ -179,7 +186,7 @@ class PackageDirBase ( object ):
* add_if_physical -- add package even if it exists as ebuild file
(-> overwrite old ebuilds)
- returns: success as bool
+ returns: success as bool // weakref
raises: Exception when ebuild already exists.
"""
@@ -194,11 +201,15 @@ class PackageDirBase ( object ):
if shortver in self._packages:
# package exists, check if it existed before script invocation
if self._packages [shortver] ['physical_only']:
+
if add_if_physical:
# else ignore ebuilds that exist as file
self._packages [shortver] = package_info
added = True
+ elif allow_postpone:
+ added = None
+
elif self.DISTMAP.check_revbump_necessary ( package_info ):
# resolve by recursion
added = self.add (
@@ -213,8 +224,8 @@ class PackageDirBase ( object ):
)
else:
# package has been added to this packagedir before,
- # this most likely happens if it is available via several
- # remotes
+ # this most likely happens if it is available from
+ # more than one repo
self.logger.debug (
"'{PN}-{PVR}.ebuild' already exists, cannot add it!".format (
PN=self.name, PVR=shortver
@@ -232,8 +243,10 @@ class PackageDirBase ( object ):
# !! package_info <-> self (double-linked)
package_info.overlay_package_ref = weakref.ref ( self )
return True
+ elif added is None:
+ return weakref.ref ( self )
else:
- return False
+ return added
# --- end of add (...) ---
def check_empty ( self ):
diff --git a/roverlay/overlay/pkgdir/packagedir_newmanifest.py b/roverlay/overlay/pkgdir/packagedir_newmanifest.py
index ae6c059..f7c258c 100644
--- a/roverlay/overlay/pkgdir/packagedir_newmanifest.py
+++ b/roverlay/overlay/pkgdir/packagedir_newmanifest.py
@@ -28,6 +28,8 @@ class PackageDir ( roverlay.overlay.pkgdir.packagedir_base.PackageDirBase ):
MANIFEST_THREADSAFE = True
+ HASH_TYPES = frozenset ( ManifestFile.HASH_TYPES )
+
# Manifest entries for imported ebuilds have to be created during import
DOEBUILD_FETCH = roverlay.tools.ebuild.doebuild_fetch_and_manifest
diff --git a/roverlay/overlay/root.py b/roverlay/overlay/root.py
index 1e423b0..1ecf4ac 100644
--- a/roverlay/overlay/root.py
+++ b/roverlay/overlay/root.py
@@ -519,32 +519,44 @@ class Overlay ( object ):
)
# --- end of _write_rsuggests_use_desc (...) ---
- def add ( self, package_info ):
+ def add ( self, package_info, allow_postpone=False ):
"""Adds a package to this overlay (into its default category).
arguments:
- * package_info -- PackageInfo of the package to add
-
- returns: True if successfully added else False
+ * package_info -- PackageInfo of the package to add
+ * allow_postpone -- do not add the package if it already exists and
+ return None
+
+ returns:
+ * True if successfully added
+ * a weak reference to the package dir object if postponed
+ * else False
"""
# NOTE:
# * "category" keyword arg has been removed, use add_to(^2) instead
# * self.default_category must not be None (else KeyError is raised)
return self._get_category (
package_info.get ( "category", self.default_category )
- ).add ( package_info )
+ ).add ( package_info, allow_postpone=allow_postpone )
# --- end of add (...) ---
- def add_to ( self, package_info, category ):
+ def add_to ( self, package_info, category, allow_postpone=False ):
"""Adds a package to this overlay.
arguments:
- * package_info -- PackageInfo of the package to add
- * category -- category where the pkg should be put in
-
- returns: True if successfully added else False
+ * package_info -- PackageInfo of the package to add
+ * category -- category where the pkg should be put in
+ * allow_postpone -- do not add the package if it already exists and
+ return None
+
+ returns:
+ * True if successfully added
+ * a weak reference to the package dir object if postponed
+ * else False
"""
- return self._get_category ( category ).add ( package_info )
+ return self._get_category ( category ).add (
+ package_info, allow_postpone=allow_postpone
+ )
# --- end of add_to (...) ---
def has_dir ( self, _dir ):
diff --git a/roverlay/stats/base.py b/roverlay/stats/base.py
index b51226e..28531c2 100644
--- a/roverlay/stats/base.py
+++ b/roverlay/stats/base.py
@@ -66,16 +66,22 @@ class OverlayCreationStats ( OverlayCreationWorkerStats ):
DESCRIPTION = "overlay creation"
_MEMBERS = (
- ( 'creation_time', 'pkg_queued', 'pkg_filtered', 'pkg_dropped', )
+ (
+ 'creation_time', 'queue_postponed_time',
+ 'pkg_queued', 'pkg_queue_postponed',
+ 'pkg_filtered', 'pkg_dropped',
+ )
+ OverlayCreationWorkerStats._MEMBERS
)
def __init__ ( self ):
super ( OverlayCreationStats, self ).__init__()
- self.pkg_queued = abstract.Counter ( "queued" )
- self.pkg_dropped = abstract.Counter ( "dropped" )
- self.pkg_filtered = abstract.Counter ( "filtered" )
- self.creation_time = abstract.TimeStats ( "ebuild creation" )
+ self.pkg_queued = abstract.Counter ( "queued" )
+ self.pkg_queue_postponed = abstract.Counter ( "queue_postponed" )
+ self.pkg_dropped = abstract.Counter ( "dropped" )
+ self.pkg_filtered = abstract.Counter ( "filtered" )
+ self.creation_time = abstract.TimeStats ( "ebuild creation" )
+ self.queue_postponed_time = abstract.TimeStats ( "queue_postponed" )
# --- end of __init__ (...) ---
def get_relevant_package_count ( self ):
diff --git a/roverlay/util/hashpool.py b/roverlay/util/hashpool.py
new file mode 100644
index 0000000..921a968
--- /dev/null
+++ b/roverlay/util/hashpool.py
@@ -0,0 +1,66 @@
+# R overlay --
+# -*- coding: utf-8 -*-
+# Copyright (C) 2013 André Erdmann <dywi@mailerd.de>
+# Distributed under the terms of the GNU General Public License;
+# either version 2 of the License, or (at your option) any later version.
+
+try:
+ import concurrent.futures
+except ImportError:
+ sys.stderr.write (
+ '!!! concurrent.futures is not available.\n'
+ ' Falling back to single-threaded variants.\n\n'
+ )
+ HAVE_CONCURRENT_FUTURES = False
+else:
+ HAVE_CONCURRENT_FUTURES = True
+
+
+import roverlay.digest
+
+def _calculate_hashes ( hash_job, hashes ):
+ hash_job.hashdict.update (
+ roverlay.digest.multihash_file ( hash_job.filepath, hashes )
+ )
+# --- end of _calculate_hashes (...) ---
+
+class Hashjob ( object ):
+ def __init__ ( self, filepath, hashdict=None ):
+ self.filepath = filepath
+ self.hashdict = dict() if hashdict is None else hashdict
+ # --- end of __init__ (...) ---
+
+
+class HashPool ( object ):
+ def __init__ ( self, hashes, max_workers ):
+ super ( HashPool, self ).__init__()
+ self.hashes = frozenset ( hashes )
+ self._jobs = dict()
+ self.max_workers = int ( max_workers )
+ # --- end of __init__ (...) ---
+
+ def add ( self, backref, filepath, hashdict=None ):
+ self._jobs [backref] = Hashjob ( filepath, hashdict )
+ # --- end of add (...) ---
+
+ def run ( self ):
+ #with concurrent.futures.ProcessPoolExecutor ( self.max_workers ) as exe:
+ with concurrent.futures.ThreadPoolExecutor ( self.max_workers ) as exe:
+ running_jobs = frozenset (
+ exe.submit ( _calculate_hashes, job, self.hashes )
+ for job in self._jobs.values()
+ )
+
+ # wait
+ for finished_job in concurrent.futures.as_completed ( running_jobs ):
+ if finished_job.exception() is not None:
+ raise finished_job.exception()
+ # --- end of run (...) ---
+
+ def reset ( self ):
+ self._jobs.clear()
+ # --- end of reset (...) ---
+
+ def get ( self, backref ):
+ return self._jobs [backref].hashdict
+ # --- end of get (...) ---
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2013-07-31 21:10 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-31 21:10 [gentoo-commits] proj/R_overlay:master commit in: roverlay/stats/, roverlay/overlay/pkgdir/manifest/, roverlay/, roverlay/util/, André Erdmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox