public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] proj/R_overlay:gsoc13/next commit in: roverlay/remote/
@ 2013-07-23  9:38 André Erdmann
  2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:master " André Erdmann
  0 siblings, 1 reply; 5+ messages in thread
From: André Erdmann @ 2013-07-23  9:38 UTC (permalink / raw
  To: gentoo-commits

commit:     92d373d921c730123e5728e62c54328ff2baed9a
Author:     André Erdmann <dywi <AT> mailerd <DOT> de>
AuthorDate: Tue Jul 23 09:30:50 2013 +0000
Commit:     André Erdmann <dywi <AT> mailerd <DOT> de>
CommitDate: Tue Jul 23 09:30:50 2013 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=92d373d9

roverlay/remote/rsync: minor fixup

don't use a string for the undefined value if "None" does it as well.

---
 roverlay/remote/rsync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/roverlay/remote/rsync.py b/roverlay/remote/rsync.py
index f60b032..8102417 100644
--- a/roverlay/remote/rsync.py
+++ b/roverlay/remote/rsync.py
@@ -132,7 +132,7 @@ class RsyncRepo ( BasicRepo ):
          return p.returncode
       # --- end of waitfor (...) ---
 
-      retcode = '<undef>'
+      retcode = None
 
       try:
 


^ permalink raw reply related	[flat|nested] 5+ messages in thread
* [gentoo-commits] proj/R_overlay:master commit in: roverlay/remote/
@ 2013-07-23 14:57 André Erdmann
  2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:gsoc13/next " André Erdmann
  0 siblings, 1 reply; 5+ messages in thread
From: André Erdmann @ 2013-07-23 14:57 UTC (permalink / raw
  To: gentoo-commits

commit:     a6aae1ff02d7cfa28cf6cf9025eccedbf71aab08
Author:     André Erdmann <dywi <AT> mailerd <DOT> de>
AuthorDate: Tue Jul 23 13:32:46 2013 +0000
Commit:     André Erdmann <dywi <AT> mailerd <DOT> de>
CommitDate: Tue Jul 23 13:32:46 2013 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=a6aae1ff

roverlay/remote/websync, retry: skip fetched files

Entirely skip files that have already been downloaded in "this" roverlay run.
roverlay would've already skipped the actual download of existing files (if
filesize,.. match), but this commit adds functionality to skip fetching _before_
opening a connection.

---
 roverlay/remote/websync.py | 221 +++++++++++++++++++++++++++++----------------
 1 file changed, 141 insertions(+), 80 deletions(-)

diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py
index 87abab8..a7250b6 100644
--- a/roverlay/remote/websync.py
+++ b/roverlay/remote/websync.py
@@ -1,6 +1,6 @@
 # R overlay -- remote, websync
 # -*- coding: utf-8 -*-
-# Copyright (C) 2012 André Erdmann <dywi@mailerd.de>
+# Copyright (C) 2012, 2013 André Erdmann <dywi@mailerd.de>
 # Distributed under the terms of the GNU General Public License;
 # either version 2 of the License, or (at your option) any later version.
 
@@ -33,7 +33,9 @@ from roverlay                  import digest, util
 from roverlay.packageinfo      import PackageInfo
 from roverlay.remote.basicrepo import BasicRepo
 
-MAX_WEBSYNC_RETRY = 3
+# this count includes the first run
+# (in contrast to rsync!)
+MAX_WEBSYNC_RETRY = 4
 
 VERBOSE = True
 
@@ -108,98 +110,104 @@ class WebsyncBase ( BasicRepo ):
       * expected_digest -- expected digest for package_file or None (^=disable)
       """
       distfile = self.distdir + os.sep + package_file
-      webh     = urlopen ( src_uri )
-      #web_info = webh.info()
-
-      expected_filesize = int ( webh.info().get ( 'content-length', -1 ) )
-
-      if os.access ( distfile, os.F_OK ):
-         # package exists locally, verify it (size, digest)
-         fetch_required = False
-         localsize      = os.path.getsize ( distfile )
-
-         if localsize != expected_filesize:
-            # size mismatch
-            self.logger.info (
-               'size mismatch for {f!r}: expected {websize} bytes '
-               'but got {localsize}!'.format (
-                  f         = package_file,
-                  websize   = expected_filesize,
-                  localsize = localsize
-               )
-            )
-            fetch_required = True
 
-         elif expected_digest is not None:
-            our_digest = digest.dodigest_file ( distfile, self._digest_type )
-
-            if our_digest != expected_digest:
-               # digest mismatch
-               self.logger.warning (
-                  '{dtype} mismatch for {f!r}: '
-                  'expected {theirs} but got {ours} - refetching.'.format (
-                     dtype  = self._digest_type,
-                     f      = distfile,
-                     theirs = expected_digest,
-                     ours   = our_digest
-                  )
-               )
-               fetch_required = True
+      if self.skip_fetch ( package_file, distfile, src_uri ):
+         if VERBOSE:
+            print ( "Skipping fetch (early) for {f!r}".format ( f=distfile ) )
+         return True
 
-      else:
-         fetch_required = True
 
-      if fetch_required:
-         bytes_fetched = 0
+      with contextlib.closing ( urlopen ( src_uri ) ) as webh:
+         #web_info = webh.info()
 
-         # FIXME: debug print (?)
-         if VERBOSE:
-            print (
-               "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri )
-            )
+         expected_filesize = int ( webh.info().get ( 'content-length', -1 ) )
 
-         # unlink the existing file first (if it exists)
-         util.try_unlink ( distfile )
-
-         with open ( distfile, mode='wb' ) as fh:
-            block = webh.read ( self.transfer_blocksize )
-            while block:
-               # write block to file
-               fh.write ( block )
-               # ? bytelen
-               bytes_fetched += len ( block )
+         if os.access ( distfile, os.F_OK ):
+            # package exists locally, verify it (size, digest)
+            fetch_required = False
+            localsize      = os.path.getsize ( distfile )
 
-               # get the next block
-               block = webh.read ( self.transfer_blocksize )
-         # -- with
+            if localsize != expected_filesize:
+               # size mismatch
+               self.logger.info (
+                  'size mismatch for {f!r}: expected {websize} bytes '
+                  'but got {localsize}!'.format (
+                     f         = package_file,
+                     websize   = expected_filesize,
+                     localsize = localsize
+                  )
+               )
+               fetch_required = True
 
-         if bytes_fetched == expected_filesize:
-            if expected_digest is not None:
+            elif expected_digest is not None:
                our_digest = digest.dodigest_file ( distfile, self._digest_type )
 
                if our_digest != expected_digest:
-                  # fetched package's digest does not match the expected one,
-                  # refuse to use it
+                  # digest mismatch
                   self.logger.warning (
-                     'bad {dtype} digest for {f!r}, expected {theirs} but '
-                     'got {ours} - removing this package.'.format (
+                     '{dtype} mismatch for {f!r}: '
+                     'expected {theirs} but got {ours} - refetching.'.format (
                         dtype  = self._digest_type,
                         f      = distfile,
                         theirs = expected_digest,
                         ours   = our_digest
                      )
                   )
-                  os.remove ( distfile )
-
-                  # package removed -> return success
-                  return True
-               # -- if
-            # -- if
+                  fetch_required = True
 
          else:
-            return False
-      elif VERBOSE:
-         print ( "Skipping fetch for {f!r}".format ( f=distfile ) )
+            fetch_required = True
+
+         if fetch_required:
+            bytes_fetched = 0
+
+            # FIXME: debug print (?)
+            if VERBOSE:
+               print (
+                  "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri )
+               )
+
+            # unlink the existing file first (if it exists)
+            util.try_unlink ( distfile )
+
+            with open ( distfile, mode='wb' ) as fh:
+               block = webh.read ( self.transfer_blocksize )
+               while block:
+                  # write block to file
+                  fh.write ( block )
+                  # ? bytelen
+                  bytes_fetched += len ( block )
+
+                  # get the next block
+                  block = webh.read ( self.transfer_blocksize )
+            # -- with
+
+            if bytes_fetched == expected_filesize:
+               if expected_digest is not None:
+                  our_digest = digest.dodigest_file ( distfile, self._digest_type )
+
+                  if our_digest != expected_digest:
+                     # fetched package's digest does not match the expected one,
+                     # refuse to use it
+                     self.logger.warning (
+                        'bad {dtype} digest for {f!r}, expected {theirs} but '
+                        'got {ours} - removing this package.'.format (
+                           dtype  = self._digest_type,
+                           f      = distfile,
+                           theirs = expected_digest,
+                           ours   = our_digest
+                        )
+                     )
+                     # package removed? -> return success (True/False)
+                     return util.try_unlink ( distfile )
+                  # -- end if <compare digest>
+               # -- end if <have digest?>
+
+            else:
+               return False
+            # -- end if <enough bytes fetched?>
+         elif VERBOSE:
+            print ( "Skipping fetch for {f!r}".format ( f=distfile ) )
 
       return self._package_synced ( package_file, distfile, src_uri )
    # --- end of get_package (...) ---
@@ -281,7 +289,7 @@ class WebsyncBase ( BasicRepo ):
          except URLError as err:
             if err.reason.errno in self.URL_ERROR_RETRY_CODES:
                self.logger.info (
-                  'sync failed with an url error (errno {:d}. '
+                  'sync failed with an url error (errno {:d}). '
                   'Retrying...'.format ( err.reason.errno )
                )
                want_retry = True
@@ -304,6 +312,18 @@ class WebsyncBase ( BasicRepo ):
          return retval
    # --- end of _dosync (...) ---
 
+   def skip_fetch ( self, package_filename, distfile, src_uri ):
+      """Returns True if downloading of a package file should be skipped,
+      else False. Called _before_ opening a web handle (urlopen()).
+
+      arguments:
+      * package_filename --
+      * distfile         --
+      * src_uri          --
+      """
+      return False
+   # --- end of skip_fetch (...) ---
+
 # --- end of WebsyncBase ---
 
 
@@ -348,6 +368,8 @@ class WebsyncRepo ( WebsyncBase ):
       self.pkglist_uri = pkglist_uri or self.get_src_uri ( pkglist_file )
       if not self.pkglist_uri:
          raise Exception ( "pkglist_uri is unset!" )
+
+      self._synced_packages = set()
    # --- end of __init__ (...) ---
 
    def _fetch_package_list ( self ):
@@ -414,6 +436,35 @@ class WebsyncRepo ( WebsyncBase ):
       return package_list
    # --- end fetch_pkglist (...) ---
 
+   def skip_fetch ( self, package_filename, distfile, src_uri ):
+      """Returns True if downloading of a package file should be skipped,
+      else False. Called _before_ opening a web handle (urlopen()).
+
+      arguments:
+      * package_filename --
+      * distfile         --
+      * src_uri          --
+      """
+      return distfile in self._synced_packages
+   # --- end of skip_fetch (...) ---
+
+
+   def _package_synced ( self, package_filename, distfile, src_uri ):
+      """Called when a package has been synced (=exists locally when
+      _get_package() is done).
+
+      arguments:
+      * package_filename --
+      * distfile         --
+      * src_uri          --
+      """
+      self._synced_packages.add ( distfile )
+      return True
+   # --- end of _package_synced (...) ---
+
+# --- end of WebsyncRepo ---
+
+
 class WebsyncPackageList ( WebsyncBase ):
    """Sync packages from multiple remotes via http. Packages uris are read
    from a file."""
@@ -444,7 +495,7 @@ class WebsyncPackageList ( WebsyncBase ):
 
       del self.src_uri
 
-      self._synced_packages = list()
+      self._synced_packages = set()
 
    # --- end of __init__ (...) ---
 
@@ -467,12 +518,22 @@ class WebsyncPackageList ( WebsyncBase ):
    # --- end of _fetch_package_list (...) ---
 
    def _package_synced ( self, package_filename, distfile, src_uri ):
-      self._synced_packages.append (
-         ( package_filename, src_uri )
-      )
+      self._synced_packages.add ( ( package_filename, src_uri ) )
       return True
    # --- end of _package_synced (...) ---
 
+   def skip_fetch ( self, package_filename, distfile, src_uri ):
+      """Returns True if downloading of a package file should be skipped,
+      else False. Called _before_ opening a web handle (urlopen()).
+
+      arguments:
+      * package_filename --
+      * distfile         --
+      * src_uri          --
+      """
+      return ( package_filename, distfile ) in self._synced_packages
+   # --- end of skip_fetch (...) ---
+
    def scan_distdir ( self, log_bad=True, **kwargs_ignored ):
       for package_filename, src_uri in self._synced_packages:
          pkg = self._package_nofail (


^ permalink raw reply related	[flat|nested] 5+ messages in thread
* [gentoo-commits] proj/R_overlay:gsoc13/next commit in: roverlay/remote/
@ 2013-07-23  9:38 André Erdmann
  0 siblings, 0 replies; 5+ messages in thread
From: André Erdmann @ 2013-07-23  9:38 UTC (permalink / raw
  To: gentoo-commits

commit:     623d65c8a41808bd2962512f70a5e0db5bdf9f9d
Author:     André Erdmann <dywi <AT> mailerd <DOT> de>
AuthorDate: Tue Jul 23 09:32:27 2013 +0000
Commit:     André Erdmann <dywi <AT> mailerd <DOT> de>
CommitDate: Tue Jul 23 09:32:27 2013 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=623d65c8

roverlay/remote/websync: retry on sync error

Renamed _dosync() to _sync_packages().
_dosync() calls _sync_packages() and retries that up to MAX_WEBSYNC_RETRY
times if a "known" url/http exception is caught (known := known and it makes
sense to retry).

Retry behavior needs some fine-tuning (e.g. don't try to refetch already
downloaded packages - this (sooner or later) causes a connection timeout
for me).

---
 roverlay/remote/websync.py | 113 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 94 insertions(+), 19 deletions(-)

diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py
index fa0a555..87abab8 100644
--- a/roverlay/remote/websync.py
+++ b/roverlay/remote/websync.py
@@ -4,10 +4,14 @@
 # Distributed under the terms of the GNU General Public License;
 # either version 2 of the License, or (at your option) any later version.
 
+from __future__ import print_function
+
 """websync, sync packages via http"""
 
 __all__ = [ 'WebsyncPackageList', 'WebsyncRepo', ]
 
+import errno
+import contextlib
 import re
 import os
 import sys
@@ -15,22 +19,33 @@ import sys
 # py2 urllib2 vs py3 urllib.request
 if sys.version_info >= ( 3, ):
    import urllib.request as _urllib
+   import urllib.error   as _urllib_error
 else:
    import urllib2 as _urllib
+   import urllib2 as _urllib_error
 
-urlopen = _urllib.urlopen
+urlopen   = _urllib.urlopen
+URLError  = _urllib_error.URLError
+HTTPError = _urllib_error.HTTPError
 del sys
 
 from roverlay                  import digest, util
 from roverlay.packageinfo      import PackageInfo
 from roverlay.remote.basicrepo import BasicRepo
 
+MAX_WEBSYNC_RETRY = 3
+
+VERBOSE = True
+
 # FIXME: websync does not support package deletion
 
 class WebsyncBase ( BasicRepo ):
    """Provides functionality for retrieving R packages via http.
    Not meant for direct usage."""
 
+   HTTP_ERROR_RETRY_CODES = frozenset ({ 404, 410, 500, 503 })
+   URL_ERROR_RETRY_CODES  = frozenset ({ errno.ETIMEDOUT, })
+
    def __init__ ( self,
       name,
       distroot,
@@ -138,9 +153,13 @@ class WebsyncBase ( BasicRepo ):
          bytes_fetched = 0
 
          # FIXME: debug print (?)
-         print (
-            "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri )
-         )
+         if VERBOSE:
+            print (
+               "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri )
+            )
+
+         # unlink the existing file first (if it exists)
+         util.try_unlink ( distfile )
 
          with open ( distfile, mode='wb' ) as fh:
             block = webh.read ( self.transfer_blocksize )
@@ -179,8 +198,7 @@ class WebsyncBase ( BasicRepo ):
 
          else:
             return False
-      else:
-         # FIXME: debug print
+      elif VERBOSE:
          print ( "Skipping fetch for {f!r}".format ( f=distfile ) )
 
       return self._package_synced ( package_file, distfile, src_uri )
@@ -198,8 +216,8 @@ class WebsyncBase ( BasicRepo ):
       return True
    # --- end of _package_synced (...) ---
 
-   def _dosync ( self ):
-      """Syncs this repo."""
+   def _sync_packages ( self ):
+      """Fetches the package list and downloads the packages."""
       package_list = self._fetch_package_list()
 
       # empty/unset package list
@@ -229,8 +247,66 @@ class WebsyncBase ( BasicRepo ):
                break
 
       return success
+   # --- end of _sync_packages (...) ---
+
+   def _dosync ( self, max_retry=MAX_WEBSYNC_RETRY ):
+      """Syncs this repo."""
+      retry_count = 0
+      want_retry  = True
+      retval_tmp  = None
+      retval      = None
+
+      while want_retry and retry_count < max_retry:
+         retry_count += 1
+         want_retry   = False
+
+         try:
+            retval_tmp = self._sync_packages()
+
+         except HTTPError as err:
+            # catch some error codes that are worth a retry
+            if err.code in self.HTTP_ERROR_RETRY_CODES:
+               self.logger.info (
+                  'sync failed with http error code {:d}. '
+                  'Retrying...'.format ( err.code )
+               )
+               want_retry = True
+            else:
+               self.logger.critical (
+                  "got an unexpected http error code: {:d}".format ( err.code )
+               )
+               self.logger.exception ( err )
+               raise
+
+         except URLError as err:
+            if err.reason.errno in self.URL_ERROR_RETRY_CODES:
+               self.logger.info (
+                  'sync failed with an url error (errno {:d}. '
+                  'Retrying...'.format ( err.reason.errno )
+               )
+               want_retry = True
+            else:
+               self.logger.critical (
+                  "got an unexpected url error code: {:d}".format (
+                     err.reason.errno
+                  )
+               )
+               self.logger.exception ( err )
+               raise
+         else:
+            retval = retval_tmp
+      # -- end while
+
+      if want_retry:
+         self.logger.error ( "retry count exhausted - sync finally failed" )
+         return False
+      else:
+         return retval
    # --- end of _dosync (...) ---
 
+# --- end of WebsyncBase ---
+
+
 
 class WebsyncRepo ( WebsyncBase ):
    """Sync a http repo using its PACKAGES file."""
@@ -323,8 +399,7 @@ class WebsyncRepo ( WebsyncBase ):
       # --- end of generate_pkglist (...) ---
 
       package_list = ()
-      try:
-         webh = urlopen ( self.pkglist_uri )
+      with contextlib.closing ( urlopen ( self.pkglist_uri ) ) as webh:
 
          content_type = webh.info().get ( 'content-type', None )
 
@@ -333,12 +408,8 @@ class WebsyncRepo ( WebsyncBase ):
                "content type {!r} is not supported!".format ( content_type )
             )
          else:
-            package_list = tuple ( generate_pkglist ( webh ) )
-
-         webh.close()
-
-      finally:
-         if 'webh' in locals() and webh: webh.close()
+            package_list = list ( generate_pkglist ( webh ) )
+      # -- end with
 
       return package_list
    # --- end fetch_pkglist (...) ---
@@ -347,6 +418,10 @@ class WebsyncPackageList ( WebsyncBase ):
    """Sync packages from multiple remotes via http. Packages uris are read
    from a file."""
 
+   # retry on 404 makes no sense for this sync type since a local package list
+   # is used
+   HTTP_ERROR_RETRY_CODES = frozenset ({ 410, 500, 503 })
+
    def __init__ ( self, pkglist_file, *args, **kwargs ):
       """Initializes a WebsyncPackageList instance.
 
@@ -420,8 +495,8 @@ class WebsyncPackageList ( WebsyncBase ):
       return True
    # --- end of _nosync (...) ---
 
-   def _dosync ( self ):
-      """Sync packages."""
+   def _sync_packages ( self ):
+      """Fetches package files."""
       package_list = self._fetch_package_list()
 
       # empty/unset package list
@@ -439,4 +514,4 @@ class WebsyncPackageList ( WebsyncBase ):
             break
 
       return success
-   # --- end of _dosync (...) ---
+   # --- end of _sync_packages (...) ---


^ permalink raw reply related	[flat|nested] 5+ messages in thread
* [gentoo-commits] proj/R_overlay:gsoc13/next commit in: roverlay/remote/
@ 2013-07-16 16:35 André Erdmann
  0 siblings, 0 replies; 5+ messages in thread
From: André Erdmann @ 2013-07-16 16:35 UTC (permalink / raw
  To: gentoo-commits

commit:     0d67413e156934952f236ae8161e8536bad03293
Author:     André Erdmann <dywi <AT> mailerd <DOT> de>
AuthorDate: Tue Jul 16 16:34:51 2013 +0000
Commit:     André Erdmann <dywi <AT> mailerd <DOT> de>
CommitDate: Tue Jul 16 16:34:51 2013 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=0d67413e

roverlay/remote: print repo name while syncing

---
 roverlay/remote/basicrepo.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/roverlay/remote/basicrepo.py b/roverlay/remote/basicrepo.py
index 9ce512c..33d8f30 100644
--- a/roverlay/remote/basicrepo.py
+++ b/roverlay/remote/basicrepo.py
@@ -192,6 +192,8 @@ class BasicRepo ( object ):
       """Syncs this repo."""
 
       status = False
+      print ( "Syncing {!r} ...".format ( self.name ) )
+
       if sync_enabled and hasattr ( self, '_dosync' ):
          status = self._dosync()
 


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2013-07-23 14:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-23  9:38 [gentoo-commits] proj/R_overlay:gsoc13/next commit in: roverlay/remote/ André Erdmann
2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:master " André Erdmann
  -- strict thread matches above, loose matches on Subject: below --
2013-07-23 14:57 André Erdmann
2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:gsoc13/next " André Erdmann
2013-07-23  9:38 André Erdmann
2013-07-16 16:35 André Erdmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox