public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] proj/R_overlay:master commit in: roverlay/stats/, roverlay/overlay/pkgdir/manifest/, roverlay/, roverlay/util/, ...
@ 2013-07-31 21:10 André Erdmann
  0 siblings, 0 replies; only message in thread
From: André Erdmann @ 2013-07-31 21:10 UTC (permalink / raw
  To: gentoo-commits

commit:     86d046b36e0a8a882a4f5d01a8e1eee82bc335b5
Author:     André Erdmann <dywi <AT> mailerd <DOT> de>
AuthorDate: Wed Jul 31 21:07:36 2013 +0000
Commit:     André Erdmann <dywi <AT> mailerd <DOT> de>
CommitDate: Wed Jul 31 21:07:36 2013 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=86d046b3

incremental ov-creation: postpone revbump check

The revbump check needs hashes for comparing. Postponing the hash calculation
and executing it in a thread pool may have speed advantages (needs some
testing). Additionally, it allows to explicitly disable revbump checks when
doing incremental overlay creation.

---
 roverlay/main.py                                  |   1 +
 roverlay/overlay/creator.py                       | 113 +++++++++++++++++++++-
 roverlay/overlay/pkgdir/manifest/file.py          |   3 +
 roverlay/overlay/pkgdir/packagedir_base.py        |  23 ++++-
 roverlay/overlay/pkgdir/packagedir_newmanifest.py |   2 +
 roverlay/overlay/root.py                          |  34 ++++---
 roverlay/stats/base.py                            |  16 ++-
 roverlay/util/hashpool.py                         |  66 +++++++++++++
 8 files changed, 232 insertions(+), 26 deletions(-)

diff --git a/roverlay/main.py b/roverlay/main.py
index 9cca0a8..7830d23 100644
--- a/roverlay/main.py
+++ b/roverlay/main.py
@@ -292,6 +292,7 @@ def main (
          )
 
          repo_list.add_packages ( overlay_creator.add_package )
+         overlay_creator.enqueue_postponed()
 
          overlay_creator.release_package_rules()
 

diff --git a/roverlay/overlay/creator.py b/roverlay/overlay/creator.py
index c21617e..32dacbc 100644
--- a/roverlay/overlay/creator.py
+++ b/roverlay/overlay/creator.py
@@ -38,7 +38,7 @@ import roverlay.overlay.pkgdir.distroot.static
 import roverlay.recipe.distmap
 import roverlay.recipe.easyresolver
 import roverlay.stats.collector
-
+import roverlay.util.hashpool
 
 class OverlayCreator ( object ):
    """This is a 'R packages -> Overlay' interface."""
@@ -46,6 +46,8 @@ class OverlayCreator ( object ):
    LOGGER = logging.getLogger ( 'OverlayCreator' )
    STATS  = roverlay.stats.collector.static.overlay_creation
 
+   HASHPOOL_WORKER_COUNT = 0
+
    def __init__ ( self,
       skip_manifest, incremental, immediate_ebuild_writes,
       logger=None, allow_write=True
@@ -82,7 +84,8 @@ class OverlayCreator ( object ):
 
       self.NUMTHREADS  = config.get ( 'EBUILD.jobcount', 0 )
 
-      self._pkg_queue  = queue.Queue()
+      self._pkg_queue           = queue.Queue()
+      self._pkg_queue_postponed = list()
       self._err_queue.attach_queue ( self._pkg_queue, None )
 
       self._workers   = None
@@ -119,14 +122,18 @@ class OverlayCreator ( object ):
       )
    # --- end of _get_resolver_channel (...) ---
 
-   def add_package ( self, package_info ):
+   def add_package ( self, package_info, allow_postpone=True ):
       """Adds a PackageInfo to the package queue.
 
       arguments:
       * package_info --
       """
       if self.package_rules.apply_actions ( package_info ):
-         if self.overlay.add ( package_info ):
+         add_result = self.overlay.add (
+            package_info, allow_postpone=allow_postpone
+         )
+
+         if add_result is True:
             ejob = roverlay.ebuild.creation.EbuildCreation (
                package_info,
                depres_channel_spawner = self._get_resolver_channel,
@@ -134,13 +141,109 @@ class OverlayCreator ( object ):
             )
             self._pkg_queue.put ( ejob )
             self.stats.pkg_queued.inc()
-         else:
+
+         elif add_result is False:
             self.stats.pkg_dropped.inc()
+
+         elif allow_postpone:
+            self.stats.pkg_queue_postponed.inc()
+            self._pkg_queue_postponed.append ( ( package_info, add_result ) )
+
+         else:
+            raise Exception ( "bad return from overlay.add()" )
+
       else:
          # else filtered out
          self.stats.pkg_filtered.inc()
    # --- end of add_package (...) ---
 
+   def discard_postponed ( self ):
+      self._pkg_queue_postponed [:] = []
+   # --- end of discard_postponed (...) ---
+
+   def enqueue_postponed ( self, prehash_manifest=False ):
+      # !!! prehash_manifest results in threaded calculation taking
+      #     _more_ time than single-threaded. Postponed packages usually
+      #     don't need a revbump, so the time penalty here likely does
+      #     not outweigh the benefits (i.e. no need to recalculate on revbump)
+      #
+
+      if self._pkg_queue_postponed:
+         qtime = self.stats.queue_postponed_time
+
+         self.logger.info ( "Checking for packages that need a revbump" )
+
+         if self.HASHPOOL_WORKER_COUNT < 1:
+            pass
+
+         elif roverlay.util.hashpool.HAVE_CONCURRENT_FUTURES:
+            qtime.begin ( "setup_hashpool" )
+
+            # determine hashes that should be calculated here
+            if prehash_manifest:
+               # * calculate all hashes, not just the distmap one
+               # * assuming that all package dirs are of the same class/type
+               #
+               distmap_hash = PackageInfo.DISTMAP_DIGEST_TYPE
+               extra_hashes = self._pkg_queue_postponed[0][1]().HASH_TYPES
+
+               if not extra_hashes:
+                  hashes = { distmap_hash, }
+               elif distmap_hash in extra_hashes:
+                  hashes = extra_hashes
+               else:
+                  hashes = extra_hashes | { distmap_hash, }
+            else:
+               hashes = { PackageInfo.DISTMAP_DIGEST_TYPE, }
+
+            my_hashpool = roverlay.util.hashpool.HashPool (
+               hashes, self.HASHPOOL_WORKER_COUNT
+            )
+
+            for p_info, pkgdir_ref in self._pkg_queue_postponed:
+               my_hashpool.add (
+                  id ( p_info ),
+                  p_info.get ( "package_file" ), p_info.hashdict
+               )
+
+            qtime.end ( "setup_hashpool" )
+
+            qtime.begin ( "make_hashes" )
+            my_hashpool.run()
+            qtime.end ( "make_hashes" )
+         else:
+            self.logger.warning (
+               "enqueue_postponed(): falling back to single-threaded variant."
+            )
+         # -- end if HAVE_CONCURRENT_FUTURES
+
+         qtime.begin ( "queue_packages" )
+         for p_info, pkgdir_ref in self._pkg_queue_postponed:
+            add_result = pkgdir_ref().add ( p_info, allow_postpone=False )
+
+            if add_result is True:
+               ejob = roverlay.ebuild.creation.EbuildCreation (
+                  p_info,
+                  depres_channel_spawner = self._get_resolver_channel,
+                  err_queue              = self._err_queue
+               )
+               self._pkg_queue.put ( ejob )
+               self.stats.pkg_queued.inc()
+
+            elif add_result is False:
+               self.stats.pkg_dropped.inc()
+
+            else:
+               raise Exception (
+                  "enqueue_postponed() should not postpone packages further."
+               )
+         # -- end for
+         qtime.end ( "queue_packages" )
+
+         # clear list
+         self._pkg_queue_postponed[:] = []
+   # --- end of enqueue_postponed (...) ---
+
    def write_overlay ( self ):
       """Writes the overlay."""
       if self.overlay.writeable():

diff --git a/roverlay/overlay/pkgdir/manifest/file.py b/roverlay/overlay/pkgdir/manifest/file.py
index 9cf8425..f9b6b8d 100644
--- a/roverlay/overlay/pkgdir/manifest/file.py
+++ b/roverlay/overlay/pkgdir/manifest/file.py
@@ -26,6 +26,9 @@ class ManifestFile ( object ):
    provided by hashlib (via roverlay.digest).
    """
 
+   # ref
+   HASH_TYPES = ManifestEntry.HASHTYPES
+
    def __init__ ( self, root ):
       self.root     = root
       self.filepath = root + os.path.sep + 'Manifest'

diff --git a/roverlay/overlay/pkgdir/packagedir_base.py b/roverlay/overlay/pkgdir/packagedir_base.py
index 5f54279..7d5f906 100644
--- a/roverlay/overlay/pkgdir/packagedir_base.py
+++ b/roverlay/overlay/pkgdir/packagedir_base.py
@@ -57,6 +57,11 @@ class PackageDirBase ( object ):
    #
    MANIFEST_THREADSAFE = None
 
+   # a set(!) of hash types which are used by the package dir
+   # implementation (if any, else None)
+   #  other subsystems might calculate them in advance if advertised here
+   HASH_TYPES = None
+
    # DOEBUILD_FETCH
    #  doebuild function that fetches $SRC_URI
    #  can be overridden by subclasses if e.g. on-the-fly manifest creation
@@ -171,7 +176,9 @@ class PackageDirBase ( object ):
       return p
    # --- end of _scan_add_package (...) ---
 
-   def add ( self, package_info, add_if_physical=False, _lock=True ):
+   def add ( self,
+      package_info, add_if_physical=False, allow_postpone=False, _lock=True
+   ):
       """Adds a package to this PackageDir.
 
       arguments:
@@ -179,7 +186,7 @@ class PackageDirBase ( object ):
       * add_if_physical -- add package even if it exists as ebuild file
                             (-> overwrite old ebuilds)
 
-      returns: success as bool
+      returns: success as bool // weakref
 
       raises: Exception when ebuild already exists.
       """
@@ -194,11 +201,15 @@ class PackageDirBase ( object ):
          if shortver in self._packages:
             # package exists, check if it existed before script invocation
             if self._packages [shortver] ['physical_only']:
+
                if add_if_physical:
                   # else ignore ebuilds that exist as file
                   self._packages [shortver] = package_info
                   added = True
 
+               elif allow_postpone:
+                  added = None
+
                elif self.DISTMAP.check_revbump_necessary ( package_info ):
                   # resolve by recursion
                   added = self.add (
@@ -213,8 +224,8 @@ class PackageDirBase ( object ):
                   )
             else:
                # package has been added to this packagedir before,
-               # this most likely happens if it is available via several
-               # remotes
+               # this most likely happens if it is available from
+               # more than one repo
                self.logger.debug (
                   "'{PN}-{PVR}.ebuild' already exists, cannot add it!".format (
                   PN=self.name, PVR=shortver
@@ -232,8 +243,10 @@ class PackageDirBase ( object ):
          # !! package_info <-> self (double-linked)
          package_info.overlay_package_ref = weakref.ref ( self )
          return True
+      elif added is None:
+         return weakref.ref ( self )
       else:
-         return False
+         return added
    # --- end of add (...) ---
 
    def check_empty ( self ):

diff --git a/roverlay/overlay/pkgdir/packagedir_newmanifest.py b/roverlay/overlay/pkgdir/packagedir_newmanifest.py
index ae6c059..f7c258c 100644
--- a/roverlay/overlay/pkgdir/packagedir_newmanifest.py
+++ b/roverlay/overlay/pkgdir/packagedir_newmanifest.py
@@ -28,6 +28,8 @@ class PackageDir ( roverlay.overlay.pkgdir.packagedir_base.PackageDirBase ):
 
    MANIFEST_THREADSAFE = True
 
+   HASH_TYPES = frozenset ( ManifestFile.HASH_TYPES )
+
    # Manifest entries for imported ebuilds have to be created during import
    DOEBUILD_FETCH = roverlay.tools.ebuild.doebuild_fetch_and_manifest
 

diff --git a/roverlay/overlay/root.py b/roverlay/overlay/root.py
index 1e423b0..1ecf4ac 100644
--- a/roverlay/overlay/root.py
+++ b/roverlay/overlay/root.py
@@ -519,32 +519,44 @@ class Overlay ( object ):
          )
    # --- end of _write_rsuggests_use_desc (...) ---
 
-   def add ( self, package_info ):
+   def add ( self, package_info, allow_postpone=False ):
       """Adds a package to this overlay (into its default category).
 
       arguments:
-      * package_info -- PackageInfo of the package to add
-
-      returns: True if successfully added else False
+      * package_info   -- PackageInfo of the package to add
+      * allow_postpone -- do not add the package if it already exists and
+                          return None
+
+      returns:
+      * True if successfully added
+      * a weak reference to the package dir object if postponed
+      * else False
       """
       # NOTE:
       # * "category" keyword arg has been removed, use add_to(^2) instead
       # * self.default_category must not be None (else KeyError is raised)
       return self._get_category (
          package_info.get ( "category", self.default_category )
-      ).add ( package_info )
+      ).add ( package_info, allow_postpone=allow_postpone )
    # --- end of add (...) ---
 
-   def add_to ( self, package_info, category ):
+   def add_to ( self, package_info, category, allow_postpone=False ):
       """Adds a package to this overlay.
 
       arguments:
-      * package_info -- PackageInfo of the package to add
-      * category     -- category where the pkg should be put in
-
-      returns: True if successfully added else False
+      * package_info   -- PackageInfo of the package to add
+      * category       -- category where the pkg should be put in
+      * allow_postpone -- do not add the package if it already exists and
+                          return None
+
+      returns:
+      * True if successfully added
+      * a weak reference to the package dir object if postponed
+      * else False
       """
-      return self._get_category ( category ).add ( package_info )
+      return self._get_category ( category ).add (
+         package_info, allow_postpone=allow_postpone
+      )
    # --- end of add_to (...) ---
 
    def has_dir ( self, _dir ):

diff --git a/roverlay/stats/base.py b/roverlay/stats/base.py
index b51226e..28531c2 100644
--- a/roverlay/stats/base.py
+++ b/roverlay/stats/base.py
@@ -66,16 +66,22 @@ class OverlayCreationStats ( OverlayCreationWorkerStats ):
    DESCRIPTION = "overlay creation"
 
    _MEMBERS = (
-      ( 'creation_time', 'pkg_queued', 'pkg_filtered', 'pkg_dropped', )
+      (
+         'creation_time', 'queue_postponed_time',
+         'pkg_queued', 'pkg_queue_postponed',
+         'pkg_filtered', 'pkg_dropped',
+      )
       + OverlayCreationWorkerStats._MEMBERS
    )
 
    def __init__ ( self ):
       super ( OverlayCreationStats, self ).__init__()
-      self.pkg_queued    = abstract.Counter ( "queued" )
-      self.pkg_dropped   = abstract.Counter ( "dropped" )
-      self.pkg_filtered  = abstract.Counter ( "filtered" )
-      self.creation_time = abstract.TimeStats ( "ebuild creation" )
+      self.pkg_queued           = abstract.Counter   ( "queued" )
+      self.pkg_queue_postponed  = abstract.Counter   ( "queue_postponed" )
+      self.pkg_dropped          = abstract.Counter   ( "dropped" )
+      self.pkg_filtered         = abstract.Counter   ( "filtered" )
+      self.creation_time        = abstract.TimeStats ( "ebuild creation" )
+      self.queue_postponed_time = abstract.TimeStats ( "queue_postponed" )
    # --- end of __init__ (...) ---
 
    def get_relevant_package_count ( self ):

diff --git a/roverlay/util/hashpool.py b/roverlay/util/hashpool.py
new file mode 100644
index 0000000..921a968
--- /dev/null
+++ b/roverlay/util/hashpool.py
@@ -0,0 +1,66 @@
+# R overlay --
+# -*- coding: utf-8 -*-
+# Copyright (C) 2013 André Erdmann <dywi@mailerd.de>
+# Distributed under the terms of the GNU General Public License;
+# either version 2 of the License, or (at your option) any later version.
+
+try:
+   import concurrent.futures
+except ImportError:
+   sys.stderr.write (
+      '!!! concurrent.futures is not available.\n'
+      '    Falling back to single-threaded variants.\n\n'
+   )
+   HAVE_CONCURRENT_FUTURES = False
+else:
+   HAVE_CONCURRENT_FUTURES = True
+
+
+import roverlay.digest
+
+def _calculate_hashes ( hash_job, hashes ):
+   hash_job.hashdict.update (
+      roverlay.digest.multihash_file ( hash_job.filepath, hashes )
+   )
+# --- end of _calculate_hashes (...) ---
+
+class Hashjob ( object ):
+   def __init__ ( self, filepath, hashdict=None ):
+      self.filepath = filepath
+      self.hashdict = dict() if hashdict is None else hashdict
+   # --- end of __init__ (...) ---
+
+
+class HashPool ( object ):
+   def __init__ ( self, hashes, max_workers ):
+      super ( HashPool, self ).__init__()
+      self.hashes      = frozenset ( hashes )
+      self._jobs       = dict()
+      self.max_workers = int ( max_workers )
+   # --- end of __init__ (...) ---
+
+   def add ( self, backref, filepath, hashdict=None ):
+      self._jobs [backref] = Hashjob ( filepath, hashdict )
+   # --- end of add (...) ---
+
+   def run ( self ):
+      #with concurrent.futures.ProcessPoolExecutor ( self.max_workers ) as exe:
+      with concurrent.futures.ThreadPoolExecutor ( self.max_workers ) as exe:
+         running_jobs = frozenset (
+            exe.submit ( _calculate_hashes, job, self.hashes )
+            for job in self._jobs.values()
+         )
+
+         # wait
+         for finished_job in concurrent.futures.as_completed ( running_jobs ):
+            if finished_job.exception() is not None:
+               raise finished_job.exception()
+   # --- end of run (...) ---
+
+   def reset ( self ):
+      self._jobs.clear()
+   # --- end of reset (...) ---
+
+   def get ( self, backref ):
+      return self._jobs [backref].hashdict
+   # --- end of get (...) ---


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2013-07-31 21:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-31 21:10 [gentoo-commits] proj/R_overlay:master commit in: roverlay/stats/, roverlay/overlay/pkgdir/manifest/, roverlay/, roverlay/util/, André Erdmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox