From: "André Erdmann" <dywi@mailerd.de> To: gentoo-commits@lists.gentoo.org Subject: [gentoo-commits] proj/R_overlay:gsoc13/next commit in: roverlay/remote/ Date: Tue, 23 Jul 2013 09:38:19 +0000 (UTC) [thread overview] Message-ID: <1374571947.623d65c8a41808bd2962512f70a5e0db5bdf9f9d.dywi@gentoo> (raw) commit: 623d65c8a41808bd2962512f70a5e0db5bdf9f9d Author: André Erdmann <dywi <AT> mailerd <DOT> de> AuthorDate: Tue Jul 23 09:32:27 2013 +0000 Commit: André Erdmann <dywi <AT> mailerd <DOT> de> CommitDate: Tue Jul 23 09:32:27 2013 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=623d65c8 roverlay/remote/websync: retry on sync error Renamed _dosync() to _sync_packages(). _dosync() calls _sync_packages() and retries that up to MAX_WEBSYNC_RETRY times if a "known" url/http exception is caught (known := known and it makes sense to retry). Retry behavior needs some fine-tuning (e.g. don't try to refetch already downloaded packages - this (sooner or later) causes a connection timeout for me). --- roverlay/remote/websync.py | 113 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 19 deletions(-) diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py index fa0a555..87abab8 100644 --- a/roverlay/remote/websync.py +++ b/roverlay/remote/websync.py @@ -4,10 +4,14 @@ # Distributed under the terms of the GNU General Public License; # either version 2 of the License, or (at your option) any later version. +from __future__ import print_function + """websync, sync packages via http""" __all__ = [ 'WebsyncPackageList', 'WebsyncRepo', ] +import errno +import contextlib import re import os import sys @@ -15,22 +19,33 @@ import sys # py2 urllib2 vs py3 urllib.request if sys.version_info >= ( 3, ): import urllib.request as _urllib + import urllib.error as _urllib_error else: import urllib2 as _urllib + import urllib2 as _urllib_error -urlopen = _urllib.urlopen +urlopen = _urllib.urlopen +URLError = _urllib_error.URLError +HTTPError = _urllib_error.HTTPError del sys from roverlay import digest, util from roverlay.packageinfo import PackageInfo from roverlay.remote.basicrepo import BasicRepo +MAX_WEBSYNC_RETRY = 3 + +VERBOSE = True + # FIXME: websync does not support package deletion class WebsyncBase ( BasicRepo ): """Provides functionality for retrieving R packages via http. Not meant for direct usage.""" + HTTP_ERROR_RETRY_CODES = frozenset ({ 404, 410, 500, 503 }) + URL_ERROR_RETRY_CODES = frozenset ({ errno.ETIMEDOUT, }) + def __init__ ( self, name, distroot, @@ -138,9 +153,13 @@ class WebsyncBase ( BasicRepo ): bytes_fetched = 0 # FIXME: debug print (?) - print ( - "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) - ) + if VERBOSE: + print ( + "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) + ) + + # unlink the existing file first (if it exists) + util.try_unlink ( distfile ) with open ( distfile, mode='wb' ) as fh: block = webh.read ( self.transfer_blocksize ) @@ -179,8 +198,7 @@ class WebsyncBase ( BasicRepo ): else: return False - else: - # FIXME: debug print + elif VERBOSE: print ( "Skipping fetch for {f!r}".format ( f=distfile ) ) return self._package_synced ( package_file, distfile, src_uri ) @@ -198,8 +216,8 @@ class WebsyncBase ( BasicRepo ): return True # --- end of _package_synced (...) --- - def _dosync ( self ): - """Syncs this repo.""" + def _sync_packages ( self ): + """Fetches the package list and downloads the packages.""" package_list = self._fetch_package_list() # empty/unset package list @@ -229,8 +247,66 @@ class WebsyncBase ( BasicRepo ): break return success + # --- end of _sync_packages (...) --- + + def _dosync ( self, max_retry=MAX_WEBSYNC_RETRY ): + """Syncs this repo.""" + retry_count = 0 + want_retry = True + retval_tmp = None + retval = None + + while want_retry and retry_count < max_retry: + retry_count += 1 + want_retry = False + + try: + retval_tmp = self._sync_packages() + + except HTTPError as err: + # catch some error codes that are worth a retry + if err.code in self.HTTP_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with http error code {:d}. ' + 'Retrying...'.format ( err.code ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected http error code: {:d}".format ( err.code ) + ) + self.logger.exception ( err ) + raise + + except URLError as err: + if err.reason.errno in self.URL_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with an url error (errno {:d}. ' + 'Retrying...'.format ( err.reason.errno ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected url error code: {:d}".format ( + err.reason.errno + ) + ) + self.logger.exception ( err ) + raise + else: + retval = retval_tmp + # -- end while + + if want_retry: + self.logger.error ( "retry count exhausted - sync finally failed" ) + return False + else: + return retval # --- end of _dosync (...) --- +# --- end of WebsyncBase --- + + class WebsyncRepo ( WebsyncBase ): """Sync a http repo using its PACKAGES file.""" @@ -323,8 +399,7 @@ class WebsyncRepo ( WebsyncBase ): # --- end of generate_pkglist (...) --- package_list = () - try: - webh = urlopen ( self.pkglist_uri ) + with contextlib.closing ( urlopen ( self.pkglist_uri ) ) as webh: content_type = webh.info().get ( 'content-type', None ) @@ -333,12 +408,8 @@ class WebsyncRepo ( WebsyncBase ): "content type {!r} is not supported!".format ( content_type ) ) else: - package_list = tuple ( generate_pkglist ( webh ) ) - - webh.close() - - finally: - if 'webh' in locals() and webh: webh.close() + package_list = list ( generate_pkglist ( webh ) ) + # -- end with return package_list # --- end fetch_pkglist (...) --- @@ -347,6 +418,10 @@ class WebsyncPackageList ( WebsyncBase ): """Sync packages from multiple remotes via http. Packages uris are read from a file.""" + # retry on 404 makes no sense for this sync type since a local package list + # is used + HTTP_ERROR_RETRY_CODES = frozenset ({ 410, 500, 503 }) + def __init__ ( self, pkglist_file, *args, **kwargs ): """Initializes a WebsyncPackageList instance. @@ -420,8 +495,8 @@ class WebsyncPackageList ( WebsyncBase ): return True # --- end of _nosync (...) --- - def _dosync ( self ): - """Sync packages.""" + def _sync_packages ( self ): + """Fetches package files.""" package_list = self._fetch_package_list() # empty/unset package list @@ -439,4 +514,4 @@ class WebsyncPackageList ( WebsyncBase ): break return success - # --- end of _dosync (...) --- + # --- end of _sync_packages (...) ---
WARNING: multiple messages have this Message-ID (diff)
From: "André Erdmann" <dywi@mailerd.de> To: gentoo-commits@lists.gentoo.org Subject: [gentoo-commits] proj/R_overlay:master commit in: roverlay/remote/ Date: Tue, 23 Jul 2013 14:57:03 +0000 (UTC) [thread overview] Message-ID: <1374571947.623d65c8a41808bd2962512f70a5e0db5bdf9f9d.dywi@gentoo> (raw) Message-ID: <20130723145703.F2kSBrCNpv6uJYHDbqeeBF8O938nOyfHLp-fQ0m-VBk@z> (raw) commit: 623d65c8a41808bd2962512f70a5e0db5bdf9f9d Author: André Erdmann <dywi <AT> mailerd <DOT> de> AuthorDate: Tue Jul 23 09:32:27 2013 +0000 Commit: André Erdmann <dywi <AT> mailerd <DOT> de> CommitDate: Tue Jul 23 09:32:27 2013 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=623d65c8 roverlay/remote/websync: retry on sync error Renamed _dosync() to _sync_packages(). _dosync() calls _sync_packages() and retries that up to MAX_WEBSYNC_RETRY times if a "known" url/http exception is caught (known := known and it makes sense to retry). Retry behavior needs some fine-tuning (e.g. don't try to refetch already downloaded packages - this (sooner or later) causes a connection timeout for me). --- roverlay/remote/websync.py | 113 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 19 deletions(-) diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py index fa0a555..87abab8 100644 --- a/roverlay/remote/websync.py +++ b/roverlay/remote/websync.py @@ -4,10 +4,14 @@ # Distributed under the terms of the GNU General Public License; # either version 2 of the License, or (at your option) any later version. +from __future__ import print_function + """websync, sync packages via http""" __all__ = [ 'WebsyncPackageList', 'WebsyncRepo', ] +import errno +import contextlib import re import os import sys @@ -15,22 +19,33 @@ import sys # py2 urllib2 vs py3 urllib.request if sys.version_info >= ( 3, ): import urllib.request as _urllib + import urllib.error as _urllib_error else: import urllib2 as _urllib + import urllib2 as _urllib_error -urlopen = _urllib.urlopen +urlopen = _urllib.urlopen +URLError = _urllib_error.URLError +HTTPError = _urllib_error.HTTPError del sys from roverlay import digest, util from roverlay.packageinfo import PackageInfo from roverlay.remote.basicrepo import BasicRepo +MAX_WEBSYNC_RETRY = 3 + +VERBOSE = True + # FIXME: websync does not support package deletion class WebsyncBase ( BasicRepo ): """Provides functionality for retrieving R packages via http. Not meant for direct usage.""" + HTTP_ERROR_RETRY_CODES = frozenset ({ 404, 410, 500, 503 }) + URL_ERROR_RETRY_CODES = frozenset ({ errno.ETIMEDOUT, }) + def __init__ ( self, name, distroot, @@ -138,9 +153,13 @@ class WebsyncBase ( BasicRepo ): bytes_fetched = 0 # FIXME: debug print (?) - print ( - "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) - ) + if VERBOSE: + print ( + "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) + ) + + # unlink the existing file first (if it exists) + util.try_unlink ( distfile ) with open ( distfile, mode='wb' ) as fh: block = webh.read ( self.transfer_blocksize ) @@ -179,8 +198,7 @@ class WebsyncBase ( BasicRepo ): else: return False - else: - # FIXME: debug print + elif VERBOSE: print ( "Skipping fetch for {f!r}".format ( f=distfile ) ) return self._package_synced ( package_file, distfile, src_uri ) @@ -198,8 +216,8 @@ class WebsyncBase ( BasicRepo ): return True # --- end of _package_synced (...) --- - def _dosync ( self ): - """Syncs this repo.""" + def _sync_packages ( self ): + """Fetches the package list and downloads the packages.""" package_list = self._fetch_package_list() # empty/unset package list @@ -229,8 +247,66 @@ class WebsyncBase ( BasicRepo ): break return success + # --- end of _sync_packages (...) --- + + def _dosync ( self, max_retry=MAX_WEBSYNC_RETRY ): + """Syncs this repo.""" + retry_count = 0 + want_retry = True + retval_tmp = None + retval = None + + while want_retry and retry_count < max_retry: + retry_count += 1 + want_retry = False + + try: + retval_tmp = self._sync_packages() + + except HTTPError as err: + # catch some error codes that are worth a retry + if err.code in self.HTTP_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with http error code {:d}. ' + 'Retrying...'.format ( err.code ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected http error code: {:d}".format ( err.code ) + ) + self.logger.exception ( err ) + raise + + except URLError as err: + if err.reason.errno in self.URL_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with an url error (errno {:d}. ' + 'Retrying...'.format ( err.reason.errno ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected url error code: {:d}".format ( + err.reason.errno + ) + ) + self.logger.exception ( err ) + raise + else: + retval = retval_tmp + # -- end while + + if want_retry: + self.logger.error ( "retry count exhausted - sync finally failed" ) + return False + else: + return retval # --- end of _dosync (...) --- +# --- end of WebsyncBase --- + + class WebsyncRepo ( WebsyncBase ): """Sync a http repo using its PACKAGES file.""" @@ -323,8 +399,7 @@ class WebsyncRepo ( WebsyncBase ): # --- end of generate_pkglist (...) --- package_list = () - try: - webh = urlopen ( self.pkglist_uri ) + with contextlib.closing ( urlopen ( self.pkglist_uri ) ) as webh: content_type = webh.info().get ( 'content-type', None ) @@ -333,12 +408,8 @@ class WebsyncRepo ( WebsyncBase ): "content type {!r} is not supported!".format ( content_type ) ) else: - package_list = tuple ( generate_pkglist ( webh ) ) - - webh.close() - - finally: - if 'webh' in locals() and webh: webh.close() + package_list = list ( generate_pkglist ( webh ) ) + # -- end with return package_list # --- end fetch_pkglist (...) --- @@ -347,6 +418,10 @@ class WebsyncPackageList ( WebsyncBase ): """Sync packages from multiple remotes via http. Packages uris are read from a file.""" + # retry on 404 makes no sense for this sync type since a local package list + # is used + HTTP_ERROR_RETRY_CODES = frozenset ({ 410, 500, 503 }) + def __init__ ( self, pkglist_file, *args, **kwargs ): """Initializes a WebsyncPackageList instance. @@ -420,8 +495,8 @@ class WebsyncPackageList ( WebsyncBase ): return True # --- end of _nosync (...) --- - def _dosync ( self ): - """Sync packages.""" + def _sync_packages ( self ): + """Fetches package files.""" package_list = self._fetch_package_list() # empty/unset package list @@ -439,4 +514,4 @@ class WebsyncPackageList ( WebsyncBase ): break return success - # --- end of _dosync (...) --- + # --- end of _sync_packages (...) ---
next reply other threads:[~2013-07-23 9:38 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2013-07-23 9:38 André Erdmann [this message] 2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:master commit in: roverlay/remote/ André Erdmann -- strict thread matches above, loose matches on Subject: below -- 2013-07-23 14:57 André Erdmann 2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:gsoc13/next " André Erdmann 2013-07-23 9:38 André Erdmann 2013-07-16 16:35 André Erdmann
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1374571947.623d65c8a41808bd2962512f70a5e0db5bdf9f9d.dywi@gentoo \ --to=dywi@mailerd.de \ --cc=gentoo-commits@lists.gentoo.org \ --cc=gentoo-dev@lists.gentoo.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox