public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "André Erdmann" <dywi@mailerd.de>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/R_overlay:master commit in: roverlay/remote/
Date: Tue, 31 Jul 2012 17:51:48 +0000 (UTC)	[thread overview]
Message-ID: <1343757062.2b49ac8b4752fa1e5efd3f51f15720e7d70f12a9.dywi@gentoo> (raw)

commit:     2b49ac8b4752fa1e5efd3f51f15720e7d70f12a9
Author:     André Erdmann <dywi <AT> mailerd <DOT> de>
AuthorDate: Tue Jul 31 17:51:02 2012 +0000
Commit:     André Erdmann <dywi <AT> mailerd <DOT> de>
CommitDate: Tue Jul 31 17:51:02 2012 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=2b49ac8b

remote: get packages via http

---
 roverlay/remote/basicrepo.py  |  241 ++++++++----------------
 roverlay/remote/repolist.py   |   10 +-
 roverlay/remote/repoloader.py |   49 ++++--
 roverlay/remote/rsync.py      |   23 ++-
 roverlay/remote/websync.py    |  410 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 536 insertions(+), 197 deletions(-)

diff --git a/roverlay/remote/basicrepo.py b/roverlay/remote/basicrepo.py
index 3dd09de..65b07eb 100644
--- a/roverlay/remote/basicrepo.py
+++ b/roverlay/remote/basicrepo.py
@@ -35,14 +35,17 @@ def normalize_uri ( uri, protocol, force_protocol=False ):
 		return uri
 # --- end of normalize_uri (...) ---
 
-class LocalRepo ( object ):
+class BasicRepo ( object ):
 	"""
 	This class represents a local repository - all packages are assumed
 	to exist in its distfiles dir and no remote syncing will occur.
 	It's the base class for remote repos.
 	"""
 
-	def __init__ ( self, name, distroot, directory=None, src_uri=None ):
+	def __init__ ( self,
+		name, distroot,
+		directory=None, src_uri=None, is_remote=False, remote_uri=None
+	):
 		"""Initializes a LocalRepo.
 
 		arguments:
@@ -50,29 +53,34 @@ class LocalRepo ( object ):
 		* directory -- distfiles dir, defaults to <DISTFILES root>/<name>
 		* src_uri   -- SRC_URI, defaults to http://localhost/R-Packages/<name>
 		"""
-		self.name = name
-
+		self.name   = name
 		self.logger = logging.getLogger (
 			self.__class__.__name__ + ':' + self.name
 		)
 
 		if directory is None:
-			self.distdir = os.path.join (
-				distroot,
-				# subdir repo names like CRAN/contrib are ok,
-				#  but make sure to use the correct path separator
-				self.name.replace ( '/', os.path.sep ),
-			)
+			# subdir repo names like CRAN/contrib are ok,
+			#  but make sure to use the correct path separator
+			self.distdir = \
+				distroot + os.path.sep + self.name.replace ( '/', os.path.sep )
+
 		else:
 			self.distdir = directory
 
 		if src_uri is None:
-			self.src_uri = '/'.join ( ( LOCALREPO_SRC_URI, self.name ) )
+			self.src_uri = LOCALREPO_SRC_URI + '/' +  self.name
+		elif len ( src_uri ) > 0 and src_uri [-1] == '/':
+			self.src_uri = src_uri [:-1]
 		else:
 			self.src_uri = src_uri
 
 		self.sync_status = 0
 
+		if remote_uri is not None:
+			self.is_remote  = True
+			self.remote_uri = remote_uri
+		else:
+			self.is_remote  = is_remote
 	# --- end of __init__ (...) ---
 
 	def ready ( self ):
@@ -110,9 +118,26 @@ class LocalRepo ( object ):
 	# --- end of _set_fail (...) ---
 
 	def __str__ ( self ):
-		return "repo '%s': DISTDIR '%s', SRC_URI '%s'" % (
-			self.name, self.distdir, self.src_uri
-		)
+		if hasattr ( self, 'remote_uri' ):
+			return \
+				'{cls} {name}: DISTDIR {distdir!r}, SRC_URI {src_uri!r}, '\
+				'REMOTE_URI {remote_uri!r}.'.format (
+					cls        = self.__class__.__name__,
+					name       = self.name,
+					distdir    = self.distdir,
+					src_uri    = self.src_uri \
+						if hasattr ( self, 'src_uri' ) else '[none]',
+					remote_uri = self.remote_uri
+				)
+		else:
+			return '{cls} {name}: DISTDIR {distdir!r}, SRC_URI {src_uri!r}.'.\
+				format (
+					cls     = self.__class__.__name__,
+					name    = self.name,
+					distdir = self.distdir,
+					src_uri = self.src_uri \
+						if hasattr ( self, 'src_uri' ) else '[none]'
+				)
 	# --- end of __str__ (...) ---
 
 	def get_name ( self ):
@@ -125,16 +150,24 @@ class LocalRepo ( object ):
 		return self.distdir
 	# --- end of get_distdir (...) ---
 
+	def get_remote_uri ( self ):
+		"""Returns the remote uri of this RemoteRepo which used for syncing."""
+		return self.remote_uri if hasattr ( self, 'remote_uri' ) else None
+	# --- end of get_remote_uri (...) ---
+
+	# get_remote(...) -> get_remote_uri(...)
+	get_remote = get_remote_uri
+
 	def get_src_uri ( self, package_file=None ):
 		"""Returns the SRC_URI of this repository.
 
 		arguments:
 		* package_file -- if set and not None: returns a SRC_URI for this pkg
 		"""
-		if package_file is None:
-			return self.src_uri
+		if package_file is not None:
+			return self.src_uri + '/' +  package_file
 		else:
-			return '/'.join ( ( self.src_uri, package_file ) )
+			return self.src_uri
 	# --- end of get_src_uri (...) ---
 
 	# get_src(...) -> get_src_uri(...)
@@ -166,6 +199,28 @@ class LocalRepo ( object ):
 		return status
 	# --- end of sync (...) ---
 
+	def _package_nofail ( self, log_bad, **data ):
+		"""Tries to create a PackageInfo.
+		Logs failure if log_bad is True.
+
+		arguments:
+		* log_bad  --
+		* data     -- PackageInfo data
+
+		returns: PackageInfo on success, else None.
+		"""
+		try:
+			return PackageInfo ( **data )
+		except ValueError as expected:
+			if log_bad:
+				#self.logger.exception ( expected )
+				self.logger.info (
+					"filtered {f!r}: bad package".format ( f=filename )
+				)
+			return None
+
+	# --- end of _package_nofail (...) ---
+
 	def scan_distdir ( self,
 		is_package=None, log_filtered=False, log_bad=True
 	):
@@ -183,30 +238,9 @@ class LocalRepo ( object ):
 
 		raises: AssertionError if is_package is neither None nor a callable.
 		"""
-
-		def package_nofail ( filename, distdir ):
-			"""Tries to create a PackageInfo.
-			Logs failure if log_bad is True.
-
-			arguments:
-			* filename -- name of the package file (including .tar* suffix)
-			* distdir  -- filename's directory
-
-			returns: PackageInfo on success, else None.
-			"""
-			try:
-				return PackageInfo (
-					filename=filename, origin=self, distdir=distdir
-				)
-			except ( ValueError, ) as expected:
-				if log_bad:
-					#self.logger.exception ( expected )
-					self.logger.info (
-						"filtered %r: bad package" % filename
-					)
-				return None
-
-		# --- end of package_nofail (...) ---
+		package_nofail = lambda filename, distdir : self._package_nofail (
+			log_bad=log_bad, filename=filename, distdir=distdir, origin=self
+		)
 
 		if is_package is None:
 			# unfiltered variant
@@ -219,7 +253,7 @@ class LocalRepo ( object ):
 					if pkg is not None:
 						yield pkg
 
-		elif hasattr ( is_package, '__call__' ):
+		else:
 			# filtered variant (adds an if is_package... before yield)
 			for dirpath, dirnames, filenames in os.walk ( self.distdir ):
 				distdir = dirpath if dirpath != self.distdir else None
@@ -233,127 +267,6 @@ class LocalRepo ( object ):
 						self.logger.debug (
 							"filtered %r: not a package" % filename
 						)
-
-
-		else:
-			# faulty variant, raises Exception
-			raise AssertionError (
-				"is_package should either be None or a function."
-			)
-			#yield None
-
 	# --- end of scan_distdir (...) ---
 
-# --- end of LocalRepo ---
-
-
-class RemoteRepo ( LocalRepo ):
-	"""A template for remote repositories."""
-
-	def __init__ (
-		self, name, distroot, sync_proto,
-		directory=None,
-		src_uri=None, remote_uri=None, base_uri=None
-	):
-		"""Initializes a RemoteRepo.
-		Mainly consists of URI calculation that derived classes may find useful.
-
-		arguments:
-		* name       --
-		* sync_proto -- protocol used for syncing (e.g. 'rsync')
-		* directory  --
-		* src_uri    -- src uri, if set, else calculated using base/remote uri,
-		                 the leading <proto>:// can be left out in which case
-		                 http is assumed
-		* remote_uri -- uri used for syncing, if set, else calculated using
-		                 base/src uri, the leading <proto>:// can be left out
-		* base_uri   -- used to calculate remote/src uri,
-		                 example: localhost/R-packages/something
-
-		keyword condition:
-		* | { x : x in union(src,remote,base) and x not None } | >= 1
-		 ^= at least one out of src/remote/base uri is not None
-		"""
-		super ( RemoteRepo, self ) . __init__ (
-			name, distroot, directory, src_uri=''
-		)
-
-		self.sync_proto = sync_proto
-
-		# detemerine uris
-		if src_uri is None and remote_uri is None:
-			if base_uri is None:
-				# keyword condition not met
-				raise Exception ( "Bad initialization of RemoteRepo!" )
-
-			else:
-				# using base_uri for src,remote
-				self.src_uri = URI_SEPARATOR.join (
-					( DEFAULT_PROTOCOL, base_uri )
-				)
-
-				self.remote_uri = URI_SEPARATOR.join (
-					( sync_proto, base_uri )
-				)
-
-		elif src_uri is None:
-			# remote_uri is not None
-			self.remote_uri = normalize_uri ( remote_uri, self.sync_proto )
-
-			if base_uri is not None:
-				# using base_uri for src_uri
-				self.src_uri = URI_SEPARATOR.join (
-					( DEFAULT_PROTOCOL, base_uri )
-				)
-			else:
-				# using remote_uri for src_uri
-				self.src_uri = normalize_uri (
-					self.remote_uri, DEFAULT_PROTOCOL, force_protocol=True
-				)
-
-		elif remote_uri is None:
-			# src_uri is not None
-			self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL )
-
-			if base_uri is not None:
-				# using base_uri for remote_uri
-				self.remote_uri = URI_SEPARATOR.join (
-					( self.sync_proto, base_uri )
-				)
-			else:
-				# using src_uri for remote_uri
-				self.remote_uri = normalize_uri (
-					self.src_uri, self.sync_proto, force_protocol=True
-				)
-		else:
-			# remote and src not None
-			self.remote_uri = normalize_uri ( remote_uri, self.sync_proto )
-			self.src_uri    = normalize_uri ( src_uri, DEFAULT_PROTOCOL )
-
-	# --- end of __init__ (...) ---
-
-	def get_remote_uri ( self ):
-		"""Returns the remote uri of this RemoteRepo which used for syncing."""
-		return self.remote_uri
-	# --- end of get_remote_uri (...) ---
-
-	# get_remote(...) -> get_remote_uri(...)
-	get_remote = get_remote_uri
-
-	def _dosync ( self ):
-		"""Gets packages from remote(s) and returns True if the repo is ready
-		for overlay creation, else False.
-
-		Derived classes have to implement this method.
-		"""
-		raise Exception ( "RemoteRepo does not implement sync()." )
-	# --- end of _dosync (...) ---
-
-	def __str__ ( self ):
-		return "repo '%s': DISTDIR '%s', SRC_URI '%s', REMOTE_URI '%s'" % (
-			self.name, self.distdir, self.src_uri, self.remote_uri
-		)
-	# --- end of __str__ (...) ---
-
-# --- end of RemoteRepo ---
-
+# --- end of BasicRepo ---

diff --git a/roverlay/remote/repolist.py b/roverlay/remote/repolist.py
index cc673e6..a623db0 100644
--- a/roverlay/remote/repolist.py
+++ b/roverlay/remote/repolist.py
@@ -4,7 +4,7 @@ import os.path
 
 from roverlay import config
 from roverlay.remote.repoloader import read_repofile
-from roverlay.remote.basicrepo import LocalRepo
+from roverlay.remote.basicrepo import BasicRepo
 
 class RepoList ( object ):
 	"""Controls several Repo objects."""
@@ -55,7 +55,7 @@ class RepoList ( object ):
 	# --- end of _pkg_filter (...) ---
 
 	def add_distdir ( self, distdir, src_uri=None, name=None ):
-		"""Adds a local package directory as LocalRepo.
+		"""Adds a local package directory as BasicRepo.
 
 		arguments:
 		* distdir --
@@ -64,7 +64,7 @@ class RepoList ( object ):
 		             (FIXME: could add RESTRICT="fetch" to those ebuilds)
 		* name    -- name of the repo, defaults to os.path.basename (distdir)
 		"""
-		self.repos.append ( LocalRepo (
+		self.repos.append ( BasicRepo (
 			name=os.path.basename ( distdir ) if name is None else name,
 			directory=distdir,
 			src_uri=src_uri
@@ -72,7 +72,7 @@ class RepoList ( object ):
 	# --- end of add_distdir (...) ---
 
 	def add_distdirs ( self, distdirs ):
-		"""Adds several distdirs as LocalRepos.
+		"""Adds several distdirs as BasicRepos.
 		All distdirs will have an invalid SRC_URI and a default name,
 		use add_distdir() if you want usable ebuilds.
 
@@ -81,7 +81,7 @@ class RepoList ( object ):
 		"""
 		def gen_repos():
 			for d in distdirs:
-				repo = LocalRepo (
+				repo = BasicRepo (
 					name=os.path.basename ( d ),
 					# FIXME: --force_distroot should block --distdir
 					directory=d,

diff --git a/roverlay/remote/repoloader.py b/roverlay/remote/repoloader.py
index 2a92526..ba49d0a 100644
--- a/roverlay/remote/repoloader.py
+++ b/roverlay/remote/repoloader.py
@@ -11,8 +11,9 @@ except ImportError:
 
 from roverlay import config
 
-from roverlay.remote.basicrepo import LocalRepo
-from roverlay.remote.rsync     import RsyncRepo
+from roverlay.remote import basicrepo
+from roverlay.remote import rsync
+from roverlay.remote import websync
 
 LOGGER = logging.getLogger ( 'repoloader' )
 
@@ -51,35 +52,49 @@ def read_repofile ( repo_file, distroot, lenient=False, force_distroot=False ):
 
 		repo_type = get ( 'type', 'rsync' ).lower()
 
-		repo_name = get ( 'name', name )
+		common_kwargs = dict (
+			name      = get ( 'name', name ),
+			directory = None if force_distroot else get ( 'directory' ),
+			distroot  = distroot,
+			src_uri   = get ( 'src_uri' )
+		)
 
-		repo_distdir = None if force_distroot else get ( 'directory' )
 
 
 		if repo_type == 'local':
-			repo = LocalRepo (
-				name      = repo_name,
-				distroot  = distroot,
-				directory = repo_distdir,
-				src_uri   = get ( 'src_uri' )
-			)
+			repo = basicrepo.BasicRepo ( **common_kwargs )
+
 		elif repo_type == 'rsync':
 			extra_opts = get ( 'extra_rsync_opts' )
 			if extra_opts:
 				extra_opts = extra_opts.split ( ' ' )
 
-			repo = RsyncRepo (
-				name       = repo_name,
-				distroot   = distroot,
-				directory  = repo_distdir,
-				src_uri    = get ( 'src_uri' ),
+			repo = rsync.RsyncRepo (
 				rsync_uri  = get ( 'rsync_uri' ),
-				base_uri   = get ( 'base_uri' ),
 				extra_opts = extra_opts,
 				recursive  = get ( 'recursive', False ) == 'yes',
+				**common_kwargs
+			)
+
+		elif repo_type == 'websync_repo':
+			repo = websync.WebsyncRepo (
+				pkglist_file = get ( 'pkglist_file', 'PACKAGES' ),
+				pkglist_uri  = get ( 'pkglist_uri' ),
+				digest_type  = get ( 'digest_type' ) or get ( 'digest' ),
+				**common_kwargs
 			)
+
+		elif repo_type in ( 'websync_pkglist', 'websync_package_list' ):
+			repo = websync.WebsyncPackageList (
+				pkglist_file = get ( 'pkglist_file' ) or get ( 'pkglist' ),
+				#digest_type  = get ( 'digest_type' ) or get ( 'digest' ),
+				**common_kwargs
+			)
+
 		else:
-			LOGGER.error ( "Unknown repo type %s for %s" % ( repo_type, name ) )
+			LOGGER.error ( "Unknown repo type {} for {}!".format (
+				repo_type, name
+			) )
 
 
 		if repo is not None:

diff --git a/roverlay/remote/rsync.py b/roverlay/remote/rsync.py
index 11efd27..90f21f5 100644
--- a/roverlay/remote/rsync.py
+++ b/roverlay/remote/rsync.py
@@ -4,8 +4,7 @@ import subprocess
 
 from roverlay import config, util
 
-#from roverlay.remote.basicrepo import LocalRepo, RemoteRepo
-from roverlay.remote.basicrepo import RemoteRepo
+from roverlay.remote.basicrepo import BasicRepo
 
 RSYNC_ENV = util.keepenv (
 	'PATH',
@@ -46,12 +45,16 @@ DEFAULT_RSYNC_OPTS =  (
 	'--chmod=ugo=r,u+w,Dugo+x', # 0755 for transferred dirs, 0644 for files
 )
 
-class RsyncRepo ( RemoteRepo ):
+class RsyncRepo ( BasicRepo ):
 
-	def __init__ (
-		self, name, distroot,
-		directory=None, src_uri=None, rsync_uri=None, base_uri=None,
-		recursive=False, extra_opts=None
+	def __init__ (	self,
+		name,
+		distroot,
+		src_uri,
+		rsync_uri,
+		directory=None,
+		recursive=False,
+		extra_opts=None
 	):
 		"""Initializes an RsyncRepo.
 
@@ -68,8 +71,8 @@ class RsyncRepo ( RemoteRepo ):
 		#  using '' as remote protocol which leaves uris unchanged when
 		#   normalizing them for rsync usage
 		super ( RsyncRepo, self ) . __init__ (
-			name, distroot=distroot, sync_proto='', directory=directory,
-			src_uri=src_uri, remote_uri=rsync_uri, base_uri=base_uri
+			name=name, distroot=distroot, directory=directory,
+			src_uri=src_uri, remote_uri=rsync_uri
 		)
 
 		# syncing directories, not files - always appending a slash at the end
@@ -83,8 +86,6 @@ class RsyncRepo ( RemoteRepo ):
 				self.extra_opts.extend ( extra_opts )
 		else:
 			self.extra_opts = extra_opts
-
-		self.sync_protocol = 'rsync'
 	# --- end of __init__ (...) ---
 
 	def _rsync_argv ( self ):

diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py
new file mode 100644
index 0000000..a0ded6c
--- /dev/null
+++ b/roverlay/remote/websync.py
@@ -0,0 +1,410 @@
+
+import re
+import os
+import urllib2
+
+from roverlay                  import digest, util
+from roverlay.packageinfo      import PackageInfo
+from roverlay.remote.basicrepo import BasicRepo
+
+class WebsyncBase ( BasicRepo ):
+	"""Provides functionality for retrieving R packages via http.
+	Not meant for direct usage."""
+
+	def __init__ ( self,
+		name,
+		distroot,
+		src_uri,
+		directory=None,
+		digest_type=None
+	):
+		"""Initializes a WebsyncBase instance.
+
+		arguments:
+		* name        -- see BasicRepo
+		* distroot    -- ^
+		* src_uri     -- ^
+		* directory   -- ^
+		* digest_type -- if set and not None/"None":
+		                  verify packages using the given digest type
+		                  Supported digest types: 'md5'.
+		"""
+		super ( WebsyncBase, self ) . __init__ (
+			name=name,
+			distroot=distroot,
+			src_uri=src_uri,
+			remote_uri=src_uri,
+			directory=directory
+		)
+
+		if digest_type is None:
+			self._digest_type = None
+
+		elif str ( digest_type ).lower() in ( 'none', 'disabled', 'off' ):
+			self._digest_type = None
+
+		elif digest.digest_supported ( digest_type ):
+			# setting a digest_type (other than None) expects package_list
+			# to be a 2-tuple <package_file, digest sum> list,
+			# else a list of package_files is expected.
+			self._digest_type = digest_type
+
+		else:
+			raise Exception (
+				"Unknown/unsupported digest type {}!".format ( digest_type )
+			)
+
+		# download 8KiB per block
+		self.transfer_blocksize = 8192
+	# --- end of __init__ (...) ---
+
+	def _fetch_package_list ( self ):
+		"""This function returns a list of packages to download."""
+		raise Exception ( "method stub" )
+	# --- end of _fetch_package_list (...) ---
+
+	def _get_package ( self, package_file, src_uri, expected_digest ):
+		"""Gets a packages, i.e. downloads if it doesn't exist locally
+		or fails verification (size, digest).
+
+		arguments:
+		* package_file    -- package file name
+		* src_uri         -- uri for package_file
+		* expected_digest -- expected digest for package_file or None (^=disable)
+		"""
+		distfile = self.distdir + os.sep + package_file
+		webh     = urllib2.urlopen ( src_uri )
+		#web_info = webh.info()
+
+		expected_filesize = int ( webh.info().get ( 'content-length', -1 ) )
+
+		if os.access ( distfile, os.F_OK ):
+			# package exists locally, verify it (size, digest)
+			fetch_required = False
+			localsize      = os.path.getsize ( distfile )
+
+			if localsize != expected_filesize:
+				# size mismatch
+				self.logger.info (
+					'size mismatch for {f!r}: expected {websize} bytes '
+					'but got {localsize}!'.format (
+						f         = package_file,
+						websize   = expected_filesize,
+						localsize = localsize
+					)
+				)
+				fetch_required = True
+
+			elif expected_digest is not None:
+				our_digest = digest.dodigest_file ( distfile, self._digest_type )
+
+				if our_digest != expected_digest:
+					# digest mismatch
+					self.logger.warning (
+						'{dtype} mismatch for {f!r}: '
+						'expected {theirs} but got {ours} - refetching.'.format (
+							dtype  = self._digest_type,
+							f      = distfile,
+							theirs = expected_digest,
+							ours   = our_digest
+						)
+					)
+					fetch_required = True
+
+		else:
+			fetch_required = True
+
+		if fetch_required:
+			bytes_fetched = 0
+
+			# FIXME: debug print (?)
+			print (
+				"Fetching {f} from {u} ...".format ( f=package_file, u=src_uri )
+			)
+
+			with open ( distfile, mode='wb' ) as fh:
+				block = webh.read ( self.transfer_blocksize )
+				while block:
+					# write block to file
+					fh.write ( block )
+					# ? bytelen
+					bytes_fetched += len ( block )
+
+					# get the next block
+					block = webh.read ( self.transfer_blocksize )
+			# -- with
+
+			if bytes_fetched == expected_filesize:
+				if expected_digest is not None:
+					our_digest = digest.dodigest_file ( distfile, self._digest_type )
+
+					if our_digest != expected_digest:
+						# fetched package's digest does not match the expected one,
+						# refuse to use it
+						self.logger.warning (
+							'bad {dtype} digest for {f!r}, expected {theirs} but '
+							'got {ours} - removing this package.'.format (
+								dtype  = self._digest_type,
+								f      = distfile,
+								theirs = expected_digest,
+								ours   = our_digest
+							)
+						)
+						os.remove ( distfile )
+
+						# package removed -> return success
+						return True
+					# -- if
+				# -- if
+
+			else:
+				return False
+		else:
+			# FIXME: debug print
+			print ( "Skipping fetch for {f!r}".format ( f=distfile ) )
+
+		return self._package_synced ( package_file, distfile, src_uri )
+	# --- end of get_package (...) ---
+
+	def _package_synced ( self, package_filename, distfile, src_uri ):
+		"""Called when a package has been synced (=exists locally when
+		_get_package() is done).
+
+		arguments:
+		* package_filename --
+		* distfile         --
+		* src_uri          --
+		"""
+		return True
+	# --- end of _package_synced (...) ---
+
+	def _dosync ( self ):
+		"""Syncs this repo."""
+		package_list = self._fetch_package_list()
+
+		# empty/unset package list
+		if not package_list: return True
+
+		util.dodir ( self.distdir )
+
+		success = True
+
+		if self._digest_type is not None:
+			for package_file, expected_digest in package_list:
+				src_uri  = self.get_src_uri ( package_file )
+
+				if not self._get_package (
+					package_file, src_uri, expected_digest
+				):
+					success = False
+					break
+		else:
+			for package_file in package_list:
+				src_uri  = self.get_src_uri ( package_file )
+
+				if not self._get_package (
+					package_file, src_uri, expected_digest=None
+				):
+					success = False
+					break
+
+		return success
+	# --- end of _dosync (...) ---
+
+
+class WebsyncRepo ( WebsyncBase ):
+	"""Sync a http repo using its PACKAGES file."""
+	# FIXME: hardcoded for md5
+
+	def __init__ ( self,
+		pkglist_uri=None,
+		pkglist_file=None,
+		*args,
+		**kwargs
+	):
+		"""Initializes a WebsyncRepo instance.
+
+		arguments:
+		* pkglist_uri      -- if set and not None: uri of the package list file
+		* pkglist_file     -- if set and not None: name of the package list file,
+		                      this is used to calculate the pkglist_uri
+		                      pkglist_uri = <src_uri>/<pkglist_file>
+		* *args / **kwargs -- see WebsyncBase / BasicRepo
+
+		pkglist file: this is a file with debian control file-like syntax
+		              listing all packages.
+		Example: http://www.omegahat.org/R/src/contrib/PACKAGES (2012-07-31)
+		"""
+		super ( WebsyncRepo, self ) . __init__ ( *args, **kwargs )
+
+		if self._digest_type is None:
+			self.FIELDREGEX = re.compile (
+				'^\s*(?P<name>(package|version))[:]\s*(?P<value>.+)',
+				re.IGNORECASE
+			)
+		else:
+			# used to filter field names (package,version,md5sum)
+			self.FIELDREGEX = re.compile (
+				'^\s*(?P<name>(package|version|md5sum))[:]\s*(?P<value>.+)',
+				re.IGNORECASE
+			)
+
+		self.pkglist_uri = pkglist_uri or self.get_src_uri ( pkglist_file )
+		if not self.pkglist_uri:
+			raise Exception ( "pkglist_uri is unset!" )
+	# --- end of __init__ (...) ---
+
+	def _fetch_package_list ( self ):
+		"""Returns the list of packages to be downloaded.
+		List format:
+		* if digest verification is enabled:
+		   List ::= [ ( package_file, digest ), ... ]
+		* else
+		   List ::= [ package_file, ... ]
+		"""
+
+		def generate_pkglist ( fh ):
+			"""Generates the package list using the given file handle.
+
+			arguments:
+			* fh -- file handle to read from
+			"""
+			info = dict()
+
+			max_info_len = 3 if self._digest_type is not None else 2
+
+			for match in (
+				filter ( None, (
+					self.FIELDREGEX.match ( l ) for l in fh.readlines()
+				) )
+			):
+				name, value = match.group ( 'name', 'value' )
+				info [name.lower()] = value
+
+				if len ( info.keys() ) == max_info_len:
+
+					pkgfile = '{name}_{version}.tar.gz'.format (
+						name=info ['package'], version=info ['version']
+					)
+
+					if self._digest_type is not None:
+						yield ( pkgfile, info ['md5sum'] )
+						#yield ( pkgfile, ( 'md5', info ['md5sum'] ) )
+					else:
+						yield pkgfile
+
+					info.clear()
+		# --- end of generate_pkglist (...) ---
+
+		package_list = ()
+		try:
+			webh = urllib2.urlopen ( self.pkglist_uri )
+
+			content_type = webh.info().get ( 'content-type', None )
+
+			if content_type != 'text/plain':
+				print (
+					"content type {!r} is not supported!".format ( content_type )
+				)
+			else:
+				package_list = tuple ( generate_pkglist ( webh ) )
+
+			webh.close()
+
+		finally:
+			if 'webh' in locals() and webh: webh.close()
+
+		return package_list
+	# --- end fetch_pkglist (...) ---
+
+class WebsyncPackageList ( WebsyncBase ):
+	"""Sync packages from multiple remotes via http. Packages uris are read
+	from a file."""
+
+	# FIXME: does not support --nosync
+
+	def __init__ ( self, pkglist_file, *args, **kwargs ):
+		"""Initializes a WebsyncPackageList instance.
+
+		arguments:
+		* pkglist_file     -- path to the package list file that lists
+		                      one package http uri per line
+		* *args / **kwargs -- see WebsyncBase, BasicRepo
+		"""
+		super ( WebsyncPackageList, self ) . __init__ ( *args, **kwargs )
+
+		self._pkglist_file = os.path.abspath ( pkglist_file )
+
+		del self.src_uri
+
+		self._synced_packages = list()
+
+	# --- end of __init__ (...) ---
+
+	def _fetch_package_list ( self ):
+		"""Returns the package list.
+		Format:
+		pkglist ::= [ ( package_file, src_uri ), ... ]
+		"""
+		pkglist = list()
+		with open ( self._pkglist_file, mode='r' ) as fh:
+			for line in fh.readlines():
+				src_uri = line.strip()
+				if src_uri:
+					pkglist.append ( (
+						src_uri.rpartition ( '/' ) [-1],
+						src_uri
+					) )
+
+		return pkglist
+	# --- end of _fetch_package_list (...) ---
+
+	def _package_synced ( self, package_filename, distfile, src_uri ):
+		self._synced_packages.append (
+			( package_filename, src_uri )
+		)
+		return True
+	# --- end of _package_synced (...) ---
+
+	def scan_distdir ( self, log_bad=True, **kwargs_ignored ):
+		for package_filename, src_uri in self._synced_packages:
+			pkg = self._package_nofail (
+				log_bad,
+				filename = package_filename,
+				origin   = self,
+				src_uri  = src_uri
+			)
+			if pkg is not None:
+				yield pkg
+	# --- end of scan_distdir (...) ---
+
+	def _nosync ( self ):
+		"""nosync - report existing packages"""
+		for package_file, src_uri in self._fetch_package_list():
+			distfile = self.distdir + os.sep + package_file
+			if os.access ( distfile, os.F_OK ):
+				self._package_synced ( package_file, distfile, src_uri )
+
+		return True
+	# --- end of _nosync (...) ---
+
+	def _dosync ( self ):
+		"""Sync packages."""
+		package_list = self._fetch_package_list()
+
+		# empty/unset package list
+		if not package_list: return True
+
+		util.dodir ( self.distdir )
+
+		success = True
+
+		for package_file, src_uri in package_list:
+			if not self._get_package (
+				package_file, src_uri, expected_digest=None
+			):
+				success = False
+				break
+
+		return success
+	# --- end of _dosync (...) ---


             reply	other threads:[~2012-07-31 17:51 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-31 17:51 André Erdmann [this message]
  -- strict thread matches above, loose matches on Subject: below --
2015-01-26 17:41 [gentoo-commits] proj/R_overlay:master commit in: roverlay/remote/ André Erdmann
2014-02-16 16:45 André Erdmann
2014-02-15 19:49 André Erdmann
2014-02-15 19:49 André Erdmann
2014-02-15 19:49 André Erdmann
2013-09-03 13:15 André Erdmann
2013-09-03  8:35 André Erdmann
2013-09-02 16:21 André Erdmann
2013-08-29 15:08 André Erdmann
2013-08-07 16:10 André Erdmann
2013-08-07 16:10 André Erdmann
2013-07-24  9:54 André Erdmann
2013-07-24  9:54 André Erdmann
2013-07-24  9:54 André Erdmann
2013-07-23 14:57 André Erdmann
2013-07-23  9:38 [gentoo-commits] proj/R_overlay:gsoc13/next " André Erdmann
2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:master " André Erdmann
2013-07-23  9:38 [gentoo-commits] proj/R_overlay:gsoc13/next " André Erdmann
2013-07-23 14:57 ` [gentoo-commits] proj/R_overlay:master " André Erdmann
2013-07-16 16:35 [gentoo-commits] proj/R_overlay:gsoc13/next " André Erdmann
2013-07-16 16:36 ` [gentoo-commits] proj/R_overlay:master " André Erdmann
2012-08-13 18:07 André Erdmann
2012-08-11  0:01 André Erdmann
2012-08-10 15:16 André Erdmann
2012-08-10 15:16 André Erdmann
2012-08-09  9:26 André Erdmann
2012-08-02 15:14 André Erdmann
2012-08-01  7:33 André Erdmann
2012-07-09 17:25 André Erdmann
2012-07-06  8:15 André Erdmann
2012-07-04 18:21 André Erdmann
2012-07-04 18:21 André Erdmann
2012-07-03 17:48 André Erdmann
2012-06-27 14:46 André Erdmann
2012-06-26 15:55 André Erdmann
2012-06-26 15:42 André Erdmann
2012-06-25 18:19 André Erdmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1343757062.2b49ac8b4752fa1e5efd3f51f15720e7d70f12a9.dywi@gentoo \
    --to=dywi@mailerd.de \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox