From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id D68F41381F3 for ; Tue, 23 Jul 2013 09:38:23 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 53D60E09C3; Tue, 23 Jul 2013 09:38:23 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 81C2DE09C3 for ; Tue, 23 Jul 2013 09:38:22 +0000 (UTC) Received: from hornbill.gentoo.org (hornbill.gentoo.org [94.100.119.163]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id 5D9CE33E94B for ; Tue, 23 Jul 2013 09:38:21 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by hornbill.gentoo.org (Postfix) with ESMTP id F23EDE5464 for ; Tue, 23 Jul 2013 09:38:19 +0000 (UTC) From: "André Erdmann" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "André Erdmann" Message-ID: <1374571947.623d65c8a41808bd2962512f70a5e0db5bdf9f9d.dywi@gentoo> Subject: [gentoo-commits] proj/R_overlay:gsoc13/next commit in: roverlay/remote/ X-VCS-Repository: proj/R_overlay X-VCS-Files: roverlay/remote/websync.py X-VCS-Directories: roverlay/remote/ X-VCS-Committer: dywi X-VCS-Committer-Name: André Erdmann X-VCS-Revision: 623d65c8a41808bd2962512f70a5e0db5bdf9f9d X-VCS-Branch: gsoc13/next Date: Tue, 23 Jul 2013 09:38:19 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: 0c556a55-ba56-4fde-8107-f5d713e5121f X-Archives-Hash: 3d6be4b8363e66691e7aa821f152d5f9 commit: 623d65c8a41808bd2962512f70a5e0db5bdf9f9d Author: André Erdmann mailerd de> AuthorDate: Tue Jul 23 09:32:27 2013 +0000 Commit: André Erdmann mailerd de> CommitDate: Tue Jul 23 09:32:27 2013 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=623d65c8 roverlay/remote/websync: retry on sync error Renamed _dosync() to _sync_packages(). _dosync() calls _sync_packages() and retries that up to MAX_WEBSYNC_RETRY times if a "known" url/http exception is caught (known := known and it makes sense to retry). Retry behavior needs some fine-tuning (e.g. don't try to refetch already downloaded packages - this (sooner or later) causes a connection timeout for me). --- roverlay/remote/websync.py | 113 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 19 deletions(-) diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py index fa0a555..87abab8 100644 --- a/roverlay/remote/websync.py +++ b/roverlay/remote/websync.py @@ -4,10 +4,14 @@ # Distributed under the terms of the GNU General Public License; # either version 2 of the License, or (at your option) any later version. +from __future__ import print_function + """websync, sync packages via http""" __all__ = [ 'WebsyncPackageList', 'WebsyncRepo', ] +import errno +import contextlib import re import os import sys @@ -15,22 +19,33 @@ import sys # py2 urllib2 vs py3 urllib.request if sys.version_info >= ( 3, ): import urllib.request as _urllib + import urllib.error as _urllib_error else: import urllib2 as _urllib + import urllib2 as _urllib_error -urlopen = _urllib.urlopen +urlopen = _urllib.urlopen +URLError = _urllib_error.URLError +HTTPError = _urllib_error.HTTPError del sys from roverlay import digest, util from roverlay.packageinfo import PackageInfo from roverlay.remote.basicrepo import BasicRepo +MAX_WEBSYNC_RETRY = 3 + +VERBOSE = True + # FIXME: websync does not support package deletion class WebsyncBase ( BasicRepo ): """Provides functionality for retrieving R packages via http. Not meant for direct usage.""" + HTTP_ERROR_RETRY_CODES = frozenset ({ 404, 410, 500, 503 }) + URL_ERROR_RETRY_CODES = frozenset ({ errno.ETIMEDOUT, }) + def __init__ ( self, name, distroot, @@ -138,9 +153,13 @@ class WebsyncBase ( BasicRepo ): bytes_fetched = 0 # FIXME: debug print (?) - print ( - "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) - ) + if VERBOSE: + print ( + "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) + ) + + # unlink the existing file first (if it exists) + util.try_unlink ( distfile ) with open ( distfile, mode='wb' ) as fh: block = webh.read ( self.transfer_blocksize ) @@ -179,8 +198,7 @@ class WebsyncBase ( BasicRepo ): else: return False - else: - # FIXME: debug print + elif VERBOSE: print ( "Skipping fetch for {f!r}".format ( f=distfile ) ) return self._package_synced ( package_file, distfile, src_uri ) @@ -198,8 +216,8 @@ class WebsyncBase ( BasicRepo ): return True # --- end of _package_synced (...) --- - def _dosync ( self ): - """Syncs this repo.""" + def _sync_packages ( self ): + """Fetches the package list and downloads the packages.""" package_list = self._fetch_package_list() # empty/unset package list @@ -229,8 +247,66 @@ class WebsyncBase ( BasicRepo ): break return success + # --- end of _sync_packages (...) --- + + def _dosync ( self, max_retry=MAX_WEBSYNC_RETRY ): + """Syncs this repo.""" + retry_count = 0 + want_retry = True + retval_tmp = None + retval = None + + while want_retry and retry_count < max_retry: + retry_count += 1 + want_retry = False + + try: + retval_tmp = self._sync_packages() + + except HTTPError as err: + # catch some error codes that are worth a retry + if err.code in self.HTTP_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with http error code {:d}. ' + 'Retrying...'.format ( err.code ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected http error code: {:d}".format ( err.code ) + ) + self.logger.exception ( err ) + raise + + except URLError as err: + if err.reason.errno in self.URL_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with an url error (errno {:d}. ' + 'Retrying...'.format ( err.reason.errno ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected url error code: {:d}".format ( + err.reason.errno + ) + ) + self.logger.exception ( err ) + raise + else: + retval = retval_tmp + # -- end while + + if want_retry: + self.logger.error ( "retry count exhausted - sync finally failed" ) + return False + else: + return retval # --- end of _dosync (...) --- +# --- end of WebsyncBase --- + + class WebsyncRepo ( WebsyncBase ): """Sync a http repo using its PACKAGES file.""" @@ -323,8 +399,7 @@ class WebsyncRepo ( WebsyncBase ): # --- end of generate_pkglist (...) --- package_list = () - try: - webh = urlopen ( self.pkglist_uri ) + with contextlib.closing ( urlopen ( self.pkglist_uri ) ) as webh: content_type = webh.info().get ( 'content-type', None ) @@ -333,12 +408,8 @@ class WebsyncRepo ( WebsyncBase ): "content type {!r} is not supported!".format ( content_type ) ) else: - package_list = tuple ( generate_pkglist ( webh ) ) - - webh.close() - - finally: - if 'webh' in locals() and webh: webh.close() + package_list = list ( generate_pkglist ( webh ) ) + # -- end with return package_list # --- end fetch_pkglist (...) --- @@ -347,6 +418,10 @@ class WebsyncPackageList ( WebsyncBase ): """Sync packages from multiple remotes via http. Packages uris are read from a file.""" + # retry on 404 makes no sense for this sync type since a local package list + # is used + HTTP_ERROR_RETRY_CODES = frozenset ({ 410, 500, 503 }) + def __init__ ( self, pkglist_file, *args, **kwargs ): """Initializes a WebsyncPackageList instance. @@ -420,8 +495,8 @@ class WebsyncPackageList ( WebsyncBase ): return True # --- end of _nosync (...) --- - def _dosync ( self ): - """Sync packages.""" + def _sync_packages ( self ): + """Fetches package files.""" package_list = self._fetch_package_list() # empty/unset package list @@ -439,4 +514,4 @@ class WebsyncPackageList ( WebsyncBase ): break return success - # --- end of _dosync (...) --- + # --- end of _sync_packages (...) --- From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id E1E1C1381F3 for ; Tue, 23 Jul 2013 14:57:23 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id A168BE0A62; Tue, 23 Jul 2013 14:57:23 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 087B4E0A60 for ; Tue, 23 Jul 2013 14:57:07 +0000 (UTC) Received: from hornbill.gentoo.org (hornbill.gentoo.org [94.100.119.163]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id D831033E998 for ; Tue, 23 Jul 2013 14:57:06 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by hornbill.gentoo.org (Postfix) with ESMTP id A986DE5470 for ; Tue, 23 Jul 2013 14:57:03 +0000 (UTC) From: "André Erdmann" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "André Erdmann" Message-ID: <1374571947.623d65c8a41808bd2962512f70a5e0db5bdf9f9d.dywi@gentoo> Subject: [gentoo-commits] proj/R_overlay:master commit in: roverlay/remote/ X-VCS-Repository: proj/R_overlay X-VCS-Files: roverlay/remote/websync.py X-VCS-Directories: roverlay/remote/ X-VCS-Committer: dywi X-VCS-Committer-Name: André Erdmann X-VCS-Revision: 623d65c8a41808bd2962512f70a5e0db5bdf9f9d X-VCS-Branch: master Date: Tue, 23 Jul 2013 14:57:03 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: 68c649a8-576f-408a-a879-37d2a45bb6a5 X-Archives-Hash: 48d47c20579ca85ba734caaa37b725a8 Message-ID: <20130723145703.F2kSBrCNpv6uJYHDbqeeBF8O938nOyfHLp-fQ0m-VBk@z> commit: 623d65c8a41808bd2962512f70a5e0db5bdf9f9d Author: André Erdmann mailerd de> AuthorDate: Tue Jul 23 09:32:27 2013 +0000 Commit: André Erdmann mailerd de> CommitDate: Tue Jul 23 09:32:27 2013 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=623d65c8 roverlay/remote/websync: retry on sync error Renamed _dosync() to _sync_packages(). _dosync() calls _sync_packages() and retries that up to MAX_WEBSYNC_RETRY times if a "known" url/http exception is caught (known := known and it makes sense to retry). Retry behavior needs some fine-tuning (e.g. don't try to refetch already downloaded packages - this (sooner or later) causes a connection timeout for me). --- roverlay/remote/websync.py | 113 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 19 deletions(-) diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py index fa0a555..87abab8 100644 --- a/roverlay/remote/websync.py +++ b/roverlay/remote/websync.py @@ -4,10 +4,14 @@ # Distributed under the terms of the GNU General Public License; # either version 2 of the License, or (at your option) any later version. +from __future__ import print_function + """websync, sync packages via http""" __all__ = [ 'WebsyncPackageList', 'WebsyncRepo', ] +import errno +import contextlib import re import os import sys @@ -15,22 +19,33 @@ import sys # py2 urllib2 vs py3 urllib.request if sys.version_info >= ( 3, ): import urllib.request as _urllib + import urllib.error as _urllib_error else: import urllib2 as _urllib + import urllib2 as _urllib_error -urlopen = _urllib.urlopen +urlopen = _urllib.urlopen +URLError = _urllib_error.URLError +HTTPError = _urllib_error.HTTPError del sys from roverlay import digest, util from roverlay.packageinfo import PackageInfo from roverlay.remote.basicrepo import BasicRepo +MAX_WEBSYNC_RETRY = 3 + +VERBOSE = True + # FIXME: websync does not support package deletion class WebsyncBase ( BasicRepo ): """Provides functionality for retrieving R packages via http. Not meant for direct usage.""" + HTTP_ERROR_RETRY_CODES = frozenset ({ 404, 410, 500, 503 }) + URL_ERROR_RETRY_CODES = frozenset ({ errno.ETIMEDOUT, }) + def __init__ ( self, name, distroot, @@ -138,9 +153,13 @@ class WebsyncBase ( BasicRepo ): bytes_fetched = 0 # FIXME: debug print (?) - print ( - "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) - ) + if VERBOSE: + print ( + "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) + ) + + # unlink the existing file first (if it exists) + util.try_unlink ( distfile ) with open ( distfile, mode='wb' ) as fh: block = webh.read ( self.transfer_blocksize ) @@ -179,8 +198,7 @@ class WebsyncBase ( BasicRepo ): else: return False - else: - # FIXME: debug print + elif VERBOSE: print ( "Skipping fetch for {f!r}".format ( f=distfile ) ) return self._package_synced ( package_file, distfile, src_uri ) @@ -198,8 +216,8 @@ class WebsyncBase ( BasicRepo ): return True # --- end of _package_synced (...) --- - def _dosync ( self ): - """Syncs this repo.""" + def _sync_packages ( self ): + """Fetches the package list and downloads the packages.""" package_list = self._fetch_package_list() # empty/unset package list @@ -229,8 +247,66 @@ class WebsyncBase ( BasicRepo ): break return success + # --- end of _sync_packages (...) --- + + def _dosync ( self, max_retry=MAX_WEBSYNC_RETRY ): + """Syncs this repo.""" + retry_count = 0 + want_retry = True + retval_tmp = None + retval = None + + while want_retry and retry_count < max_retry: + retry_count += 1 + want_retry = False + + try: + retval_tmp = self._sync_packages() + + except HTTPError as err: + # catch some error codes that are worth a retry + if err.code in self.HTTP_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with http error code {:d}. ' + 'Retrying...'.format ( err.code ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected http error code: {:d}".format ( err.code ) + ) + self.logger.exception ( err ) + raise + + except URLError as err: + if err.reason.errno in self.URL_ERROR_RETRY_CODES: + self.logger.info ( + 'sync failed with an url error (errno {:d}. ' + 'Retrying...'.format ( err.reason.errno ) + ) + want_retry = True + else: + self.logger.critical ( + "got an unexpected url error code: {:d}".format ( + err.reason.errno + ) + ) + self.logger.exception ( err ) + raise + else: + retval = retval_tmp + # -- end while + + if want_retry: + self.logger.error ( "retry count exhausted - sync finally failed" ) + return False + else: + return retval # --- end of _dosync (...) --- +# --- end of WebsyncBase --- + + class WebsyncRepo ( WebsyncBase ): """Sync a http repo using its PACKAGES file.""" @@ -323,8 +399,7 @@ class WebsyncRepo ( WebsyncBase ): # --- end of generate_pkglist (...) --- package_list = () - try: - webh = urlopen ( self.pkglist_uri ) + with contextlib.closing ( urlopen ( self.pkglist_uri ) ) as webh: content_type = webh.info().get ( 'content-type', None ) @@ -333,12 +408,8 @@ class WebsyncRepo ( WebsyncBase ): "content type {!r} is not supported!".format ( content_type ) ) else: - package_list = tuple ( generate_pkglist ( webh ) ) - - webh.close() - - finally: - if 'webh' in locals() and webh: webh.close() + package_list = list ( generate_pkglist ( webh ) ) + # -- end with return package_list # --- end fetch_pkglist (...) --- @@ -347,6 +418,10 @@ class WebsyncPackageList ( WebsyncBase ): """Sync packages from multiple remotes via http. Packages uris are read from a file.""" + # retry on 404 makes no sense for this sync type since a local package list + # is used + HTTP_ERROR_RETRY_CODES = frozenset ({ 410, 500, 503 }) + def __init__ ( self, pkglist_file, *args, **kwargs ): """Initializes a WebsyncPackageList instance. @@ -420,8 +495,8 @@ class WebsyncPackageList ( WebsyncBase ): return True # --- end of _nosync (...) --- - def _dosync ( self ): - """Sync packages.""" + def _sync_packages ( self ): + """Fetches package files.""" package_list = self._fetch_package_list() # empty/unset package list @@ -439,4 +514,4 @@ class WebsyncPackageList ( WebsyncBase ): break return success - # --- end of _dosync (...) --- + # --- end of _sync_packages (...) ---