* [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579)
@ 2015-01-16 1:27 Zac Medico
2015-01-16 3:00 ` Brian Dolbec
0 siblings, 1 reply; 6+ messages in thread
From: Zac Medico @ 2015-01-16 1:27 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
This adds support for using a binary package's compression header to
determine the compression type, providing forward-compatibility for
xz and gzip decompression. The file name extension is disregared, so
that it will be possible use a compression-independent file naming
scheme in the future (see bug #150031 for discussion about proposed
file naming schemes).
Currently, only decompression is supported. It's useful to provide
forward-compatibility now, so that binhost clients will be prepared
to handle future binhost servers that use xz or gzip compression.
X-Gentoo-Bug: 142579
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=142579
---
| 28 ++++++++++++++--
pym/portage/util/compression_probe.py | 62 +++++++++++++++++++++++++++++++++++
2 files changed, 88 insertions(+), 2 deletions(-)
create mode 100644 pym/portage/util/compression_probe.py
--git a/pym/_emerge/BinpkgExtractorAsync.py b/pym/_emerge/BinpkgExtractorAsync.py
index be74c2f..8d446f9 100644
--- a/pym/_emerge/BinpkgExtractorAsync.py
+++ b/pym/_emerge/BinpkgExtractorAsync.py
@@ -1,8 +1,12 @@
# Copyright 1999-2013 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
+import logging
+
from _emerge.SpawnProcess import SpawnProcess
import portage
+from portage.localization import _
+from portage.util.compression_probe import compression_probe
import signal
import subprocess
@@ -20,19 +24,39 @@ class BinpkgExtractorAsync(SpawnProcess):
if b"--xattrs" in output:
tar_options = "--xattrs"
+ comp = compression_probe(self.pkg_path)
+ if comp is None:
+ self.scheduler.output("!!! %s\n" %
+ _("File compression header unrecognized: %s") %
+ self.pkg_path, log_path=self.logfile,
+ background=self.background, level=logging.ERROR)
+ self.returncode = 1
+ self._async_wait()
+ return
+
# Add -q to bzip2 opts, in order to avoid "trailing garbage after
# EOF ignored" warning messages due to xpak trailer.
+ if comp == "bzip2":
+ decomp_cmd = "${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d}"
+ elif comp == "xz":
+ decomp_cmd = "xz -d"
+ elif comp == "gzip":
+ decomp_cmd = "gzip -d"
+ else:
+ raise AssertionError("Unexpected compression: %s" % comp)
+
# SIGPIPE handling (128 + SIGPIPE) should be compatible with
# assert_sigpipe_ok() that's used by the ebuild unpack() helper.
self.args = [self._shell_binary, "-c",
- ("${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d} -cq -- %s | tar -xp %s -C %s -f - ; " + \
+ ("%s -cq -- %s | tar -xp %s -C %s -f - ; " + \
"p=(${PIPESTATUS[@]}) ; " + \
"if [[ ${p[0]} != 0 && ${p[0]} != %d ]] ; then " % (128 + signal.SIGPIPE) + \
"echo bzip2 failed with status ${p[0]} ; exit ${p[0]} ; fi ; " + \
"if [ ${p[1]} != 0 ] ; then " + \
"echo tar failed with status ${p[1]} ; exit ${p[1]} ; fi ; " + \
"exit 0 ;") % \
- (portage._shell_quote(self.pkg_path),
+ (decomp_cmd,
+ portage._shell_quote(self.pkg_path),
tar_options,
portage._shell_quote(self.image_dir))]
diff --git a/pym/portage/util/compression_probe.py b/pym/portage/util/compression_probe.py
new file mode 100644
index 0000000..7bdd28f
--- /dev/null
+++ b/pym/portage/util/compression_probe.py
@@ -0,0 +1,62 @@
+# Copyright 2015 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import re
+import sys
+
+if sys.hexversion >= 0x3000000:
+ basestring = str
+
+from portage import _encodings, _unicode_encode
+from portage.exception import FileNotFound, PermissionDenied
+
+_compression_re = re.compile(b'^(' +
+ b'(?P<gzip>\x1f\x8b)|' +
+ b'(?P<bzip2>\x42\x5a\x68\x39)|' +
+ b'(?P<xz>\xfd\x37\x7a\x58\x5a\x00))')
+
+def compression_probe(f):
+ """
+ Identify the compression type of a file. Returns one of the
+ following identifier strings:
+
+ bzip2
+ gzip
+ xz
+
+ @param f: a file path, or file-like object
+ @type f: file-like object
+ @return: a string identifying the compression type, or None if the
+ compression type is unrecognized
+ @rtype str or None
+ """
+
+ open_file = isinstance(f, basestring)
+ if open_file:
+ try:
+ f = open(_unicode_encode(f,
+ encoding=_encodings['fs'], errors='strict'), mode='rb')
+ except IOError as e:
+ if e.errno == PermissionDenied.errno:
+ raise PermissionDenied(f)
+ elif e.errno in (errno.ENOENT, errno.ESTALE):
+ raise FileNotFound(f)
+ else:
+ raise
+
+ try:
+ return _compression_probe_file(f)
+ finally:
+ if open_file:
+ f.close()
+
+def _compression_probe_file(f):
+
+ m = _compression_re.match(f.read(6))
+ if m is not None:
+ for k, v in m.groupdict().items():
+ if v is not None:
+ return k
+
+ return None
--
2.0.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579)
2015-01-16 1:27 [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579) Zac Medico
@ 2015-01-16 3:00 ` Brian Dolbec
2015-01-16 4:53 ` Zac Medico
0 siblings, 1 reply; 6+ messages in thread
From: Brian Dolbec @ 2015-01-16 3:00 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: text/plain, Size: 5830 bytes --]
On Thu, 15 Jan 2015 17:27:23 -0800
Zac Medico <zmedico@gentoo.org> wrote:
> This adds support for using a binary package's compression header to
> determine the compression type, providing forward-compatibility for
> xz and gzip decompression. The file name extension is disregared, so
> that it will be possible use a compression-independent file naming
> scheme in the future (see bug #150031 for discussion about proposed
> file naming schemes).
>
> Currently, only decompression is supported. It's useful to provide
> forward-compatibility now, so that binhost clients will be prepared
> to handle future binhost servers that use xz or gzip compression.
>
> X-Gentoo-Bug: 142579
> X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=142579
> ---
> pym/_emerge/BinpkgExtractorAsync.py | 28 ++++++++++++++--
> pym/portage/util/compression_probe.py | 62
> +++++++++++++++++++++++++++++++++++ 2 files changed, 88
> insertions(+), 2 deletions(-) create mode 100644
> pym/portage/util/compression_probe.py
>
> diff --git a/pym/_emerge/BinpkgExtractorAsync.py
> b/pym/_emerge/BinpkgExtractorAsync.py index be74c2f..8d446f9 100644
> --- a/pym/_emerge/BinpkgExtractorAsync.py
> +++ b/pym/_emerge/BinpkgExtractorAsync.py
> @@ -1,8 +1,12 @@
> # Copyright 1999-2013 Gentoo Foundation
> # Distributed under the terms of the GNU General Public License v2
>
> +import logging
> +
> from _emerge.SpawnProcess import SpawnProcess
> import portage
> +from portage.localization import _
> +from portage.util.compression_probe import compression_probe
> import signal
> import subprocess
>
> @@ -20,19 +24,39 @@ class BinpkgExtractorAsync(SpawnProcess):
> if b"--xattrs" in output:
> tar_options = "--xattrs"
>
> + comp = compression_probe(self.pkg_path)
> + if comp is None:
> + self.scheduler.output("!!! %s\n" %
> + _("File compression header
> unrecognized: %s") %
> + self.pkg_path, log_path=self.logfile,
> + background=self.background,
> level=logging.ERROR)
> + self.returncode = 1
> + self._async_wait()
> + return
> +
> # Add -q to bzip2 opts, in order to avoid "trailing
> garbage after # EOF ignored" warning messages due to xpak trailer.
> + if comp == "bzip2":
> + decomp_cmd =
> "${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d}"
> + elif comp == "xz":
> + decomp_cmd = "xz -d"
> + elif comp == "gzip":
> + decomp_cmd = "gzip -d"
> + else:
> + raise AssertionError("Unexpected
> compression: %s" % comp) +
> # SIGPIPE handling (128 + SIGPIPE) should be
> compatible with # assert_sigpipe_ok() that's used by the ebuild
> unpack() helper. self.args = [self._shell_binary, "-c",
> -
> ("${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d} -cq -- %s |
> tar -xp %s -C %s -f - ; " + \
> + ("%s -cq -- %s | tar -xp %s -C %s -f - ; " +
> \ "p=(${PIPESTATUS[@]}) ; " + \
> "if [[ ${p[0]} != 0 && ${p[0]} != %d ]] ;
> then " % (128 + signal.SIGPIPE) + \ "echo bzip2 failed with status
> ${p[0]} ; exit ${p[0]} ; fi ; " + \ "if [ ${p[1]} != 0 ] ; then " + \
> "echo tar failed with status ${p[1]} ; exit
> ${p[1]} ; fi ; " + \ "exit 0 ;") % \
> - (portage._shell_quote(self.pkg_path),
> + (decomp_cmd,
> + portage._shell_quote(self.pkg_path),
> tar_options,
> portage._shell_quote(self.image_dir))]
>
No offense, but yuk to the if foo .... else bar... else...
I already have code that does this much better, I decided I was going
to release it separately from catalyst because it is generally useful
and in many ways more future proof. I know you were interested in it,
but I hadn't gotten around to establishing it in a separate repo. I
also didn't know you were already working on this.
I've attached it to this email for you to look over. It needs a bit of
work for an independent release, but it is easily extended with
configuration changes. It can also be easily extended with custom
commands if needed.
If you are interested, it would not take long to have it release worthy
and in the tree.
> diff --git a/pym/portage/util/compression_probe.py
> b/pym/portage/util/compression_probe.py new file mode 100644
> index 0000000..7bdd28f
> --- /dev/null
> +++ b/pym/portage/util/compression_probe.py
> @@ -0,0 +1,62 @@
> +# Copyright 2015 Gentoo Foundation
> +# Distributed under the terms of the GNU General Public License v2
> +
> +import errno
> +import re
> +import sys
> +
> +if sys.hexversion >= 0x3000000:
> + basestring = str
> +
> +from portage import _encodings, _unicode_encode
> +from portage.exception import FileNotFound, PermissionDenied
> +
> +_compression_re = re.compile(b'^(' +
> + b'(?P<gzip>\x1f\x8b)|' +
> + b'(?P<bzip2>\x42\x5a\x68\x39)|' +
> + b'(?P<xz>\xfd\x37\x7a\x58\x5a\x00))')
> +
> +def compression_probe(f):
> + """
> + Identify the compression type of a file. Returns one of the
> + following identifier strings:
> +
> + bzip2
> + gzip
> + xz
> +
> + @param f: a file path, or file-like object
> + @type f: file-like object
> + @return: a string identifying the compression type, or None
> if the
> + compression type is unrecognized
> + @rtype str or None
> + """
> +
> + open_file = isinstance(f, basestring)
> + if open_file:
> + try:
> + f = open(_unicode_encode(f,
> + encoding=_encodings['fs'],
> errors='strict'), mode='rb')
> + except IOError as e:
> + if e.errno == PermissionDenied.errno:
> + raise PermissionDenied(f)
> + elif e.errno in (errno.ENOENT, errno.ESTALE):
> + raise FileNotFound(f)
> + else:
> + raise
> +
> + try:
> + return _compression_probe_file(f)
> + finally:
> + if open_file:
> + f.close()
> +
> +def _compression_probe_file(f):
> +
> + m = _compression_re.match(f.read(6))
> + if m is not None:
> + for k, v in m.groupdict().items():
> + if v is not None:
> + return k
> +
> + return None
--
Brian Dolbec <dolsen>
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: compress.py --]
[-- Type: text/x-python, Size: 14061 bytes --]
# Maintained in full by:
# Brian Dolbec <dolsent@gentoo.org>
'''
compress.py
Utility class to hold and handle all possible compression
and de-compression of files using native linux utilities.
Including rsync transfers.
'''
import os
from collections import namedtuple
from support import cmd
DEFINITION_FIELDS = ["func", "cmd", "args", "id", "extensions"]
DEFINITION_TYPES = [ str, str, list, str, list]
DEFINITION_HELP = \
'''The definition entries are to follow the the definition_types
with the exception of the first entry "Type" which is a mode identifier
for use in the class as a type ID and printable output string.
Definiton entries are composed of the following:
access key: list of definition fields values.
eg:
"tar" :["_common", "tar", ["-cpf", "%(filename)s", "-C", "%(basedir)s", "%(source)s"], "TAR", ["tar"]],
access key : list of DEFINITION_FIELDS
["func", <== the class function to use to run the external utility with
"cmd", <== the external utility command
"args", <== a list of the arguments to pass to the utility
"id", <== ID string that identifies the utility
"extensions"], <== the list of file extensions this command handles
Available named string variables that will be substituted with the passed in
values during run time:
"%(filename)s" filename parameter to pass to the utility
"%(basedir)s" the base source directory where source originates from
"%(source)s" the file or directory being acted upon
"%(destination)s" the destination file or directory
"%(arch)s" the arch filter to pass in ie. Available filters: x86, arm, armthumb, powerpc, sparc, ia64
'''
COMPRESS_DEFINITIONS = {
"Type" :["Compression", "Compression definitions loaded"],
"rsync" :["rsync", "rsync", ["-a", "--delete", "%(source)s", "%(destination)s"], "RSYNC", None],
"lbzip2" :["_common", "tar", ["-I", "lbzip2", "-cf", "%(filename)s", "-C", "%(basedir)s", "%(source)s"], "LBZIP2", ["tar.bz2"]],
"bzip2" :["_common", "tar", ["-cpjf", "%(filename)s", "-C", "%(basedir)s", "%(source)s"], "BZIP2", ["tar.bz2"]],
"tar" :["_common", "tar", ["-cpf", "%(filename)s", "-C", "%(basedir)s", "%(source)s"], "TAR", ["tar"]],
"xz" :["_common", "tar", ["-cpJf", "%(filename)s", "-C", "%(basedir)s", "%(source)s"], "XZ", ["tar.xz"]],
"pixz" :["_common", "tar", ["-I", "pixz", "-cpf", "%(filename)s", "-C", "%(basedir)s", "%(source)s"], "PIXZ", ["tar.xz"]],
"gzip" :["_common", "tar", ["-cpzf", "%(filename)s", "-C", "%(basedir)s", "%(source)s"], "GZIP", ["tar.gz"]],
"squashfs" :["_common", "mksquashfs", ["%(source)s", "%(destination)s", "-comp", "xz", "-Xbcj", "%(arch)s", "-b", "1M"], "SQUASHFS", ["squashfs", "sfs"]],
}
DECOMPRESS_DEFINITIONS = {
"Type" :["Decompression", "Decompression definitions loaded"],
"rsync" :["rsync", "rsync", ["-a", "--delete", "%(source)s", "%(destination)s"], "RSYNC", None],
"lbzip2" :["_common", "tar", ["-I", "lbzip2", "-xpf", "%(source)s", "-C", "%(destination)s"], "LBZIP2", ["tar.bz2", "bz2", "tbz2"]],
"bzip2" :["_common", "tar", ["-xpf", "%(source)s", "-C", "%(destination)s"], "BZIP2", ["tar.bz2", "bz2", "tbz2"]],
"tar" :["_common", "tar", ["-xpf", "%(source)s", "-C", "%(destination)s"], "TAR", ["tar"]],
"xz" :["_common", "tar", ["-xpf", "%(source)s", "-C", "%(destination)s"], "XZ", ["tar.xz", "xz"]],
"pixz" :["_common", "tar", ["-I", "pixz", "-xpf", "%(source)s", "-C", "%(destination)s"], "PIXZ", ["tar.xz", "xz"]],
"gzip" :["_common", "tar", ["-xpzf", "%(source)s", "-C", "%(destination)s"], "GZIP", ["tar.gz", "gz"]],
"squashfs" :["_common", "unsquashfs", ["-d", "%(destination)s", "%(source)s"], "SQUASHFS", ["squashfs", "sfs"]],
}
'''Configure this here in case it is ever changed.
This is the only edit point required then.'''
EXTENSION_SEPARATOR = '.'
def create_classes(definitions, fields):
'''This function dynamically creates the namedtuple classes which are
used for the information they contain in a consistent manner.
@parm definitions: dict, of (de)compressor definitions
see DEFINITION_FIELDS and DEFINTITION_TYPES defined in this
library.
@param fields: list of the field names to create
@return class_map: dictionary of key: namedtuple class instance
'''
class_map = {}
for name in list(definitions):
# create the namedtuple class instance
obj = namedtuple(name, fields)
# reduce memory used by limiting it to the predefined fields variables
obj.__slots__ = ()
# now add the instance to our map
class_map[name] = obj._make(definitions[name])
del obj
return class_map
class CompressMap(object):
'''Class for handling
Catalyst's compression & decompression of archives'''
'''fields: list of ordered field names for the (de)compression functions'''
fields = DEFINITION_FIELDS[:]
def __init__(self, definitions=None, env=None, default_mode=None,
separator=EXTENSION_SEPARATOR, search_order=None):
'''Class init
@param compress_mode: boolean, defaults to True
describes compression or de-compression definitions loaded
@param definitions: dictionary of
Key:[function, cmd, cmd_args, Print/id string, extensions]
@param env: environment to pass to the cmd subprocess
'''
if definitions is None:
definitions = {}
self.loaded_type = ["None", "No definitions loaded"]
else:
self.loaded_type = definitions.pop('Type')
self.env = env or {}
self.mode_error = self.loaded_type[0] + \
" Error: No mode was passed in or automatically detected"
self._map = {}
self.extension_separator = separator
# set some defaults depending on what is being loaded
if self.loaded_type[0] in ['Compression']:
self.mode = default_mode or 'tbz2'
self.compress = self._compress
self.extract = None
else:
self.mode = default_mode or 'auto'
self.compress = None
self.extract = self._extract
self.search_order = search_order
print("COMPRESS: __init__(), search_order = " + str(self.search_order))
# create the (de)compression definition namedtuple classes
self._map = create_classes(definitions, self.fields)
def _compress(self, infodict=None, filename='', source=None,
basedir='.', mode=None, auto_extension=False, fatal=True):
'''Compression function
@param infodict: optional dictionary of the next 4 parameters.
@param filename: optional string, name ot the file to make
@param source: optional string, path to a directory
@param destination: optional string, path a directory
@param mode: string, optional mode to use to (de)compress with
@param auto_extension: boolean, optional, enables or disables
adding the normaL file extension defined by the mode used.
defaults to False
@param fatal: boolean, pass through variable
passed to the command subprocess handler
@return boolean
'''
if not infodict:
infodict = self.create_infodict(source, None,
basedir, filename, mode or self.mode, auto_extension)
if not infodict['mode']:
print self.mode_error
return False
if auto_extension:
infodict['auto-ext'] = True
return self._run(infodict, fatal=fatal)
def _extract(self, infodict=None, source=None, destination=None,
mode=None, fatal=True):
'''De-compression function
@param infodict: optional dictionary of the next 3 parameters.
@param source: optional string, path to a directory
@param destination: optional string, path a directory
@param mode: string, optional mode to use to (de)compress with
@param fatal: boolean, pass through variable
passed to the command subprocess handler
@return boolean
'''
if self.loaded_type[0] not in ["Decompression"]:
return False
if not infodict:
infodict = self.create_infodict(source, destination, mode=mode)
if infodict['mode'] in [None]:
infodict['mode'] = self.mode or 'auto'
if infodict['mode'] in ['auto']:
infodict['mode'] = self.get_extension(infodict['source'])
if not infodict['mode']:
print self.mode_error
return False
return self._run(infodict, fatal=fatal)
def _run(self, infodict, fatal=True):
'''Internal function that runs the designated function
@param infodict: dictionary of the next 3 parameters.
@param fatal: boolean, pass through variable
passed to the command subprocess handler
@return boolean
'''
if not self.is_supported(infodict['mode']):
print "mode: %s is not supported in the current %s definitions" \
% (infodict['mode'], self.loaded_type[1])
return False
try:
func = getattr(self, self._map[infodict['mode']].func)
success = func(infodict, fatal)
except AttributeError:
print "FAILED to find function '%s'" % str(self._map[infodict['mode']].func)
return False
#except Exception as e:
#msg = "Error performing %s %s, " % (mode, self.loaded_type[0]) + \
#"is the appropriate utility installed on your system?"
#print msg
#print "Exception:", e
#return False
return success
def get_extension(self, source):
'''Extracts the file extension string from the source file
@param source: string, file path of the file to determine
@return string: file type extension of the source file
'''
return os.path.splitext(source)[1]
def determine_mode(self, source):
'''Uses the decompressor_search_order spec parameter and
compares the decompressor's file extension strings
with the source file and returns the mode to use for decompression.
@param source: string, file path of the file to determine
@return string: the decompressor mode to use on the source file
'''
print("COMPRESS: determine_mode(), source = " + source)
result = None
for mode in self.search_order:
print("COMPRESS: determine_mode(), mode = " + mode)
for ext in self._map[mode].extensions:
if source.endswith(ext):
result = mode
break
if result:
break
if not result:
print("COMPRESS: determine_mode(), failed to find a mode " +
"to use for: " + source)
return result
def rsync(self, infodict=None, source=None, destination=None,
mode=None, fatal=True):
'''Convienience function. Performs an rsync transfer
@param infodict: dict as returned by this class's create_infodict()
@param source: optional string, path to a directory
@param destination: optional string, path a directory
@param mode: string, optional mode to use to (de)compress with
@param fatal: boolean, pass through variable
passed to the command subprocess handler
@return boolean
'''
if not infodict:
if not mode:
mode = 'rsync'
infodict = self.create_infodict(source, destination, mode=mode)
return self._run(infodict, fatal=fatal)
def _common(self, infodict, fatal=True):
'''Internal function. Performs commonly supported
compression or decompression commands.
@param infodict: dict as returned by this class's create_infodict()
@param fatal: boolean, pass through variable
passed to the command subprocess handler
@return boolean
'''
if not infodict['mode'] or not self.is_supported(infodict['mode']):
print "ERROR: CompressMap; %s mode: %s not correctly set!" \
% (self.loaded_type[0], infodict['mode'])
return False
#Avoid modifying the source dictionary
cmdinfo = infodict.copy()
# obtain the pointer to the mode class to use
cmdlist = self._map[cmdinfo['mode']]
# for compression, add the file extension if enabled
if cmdinfo['auto-ext']:
cmdinfo['filename'] += self.extension_separator + \
self.extension(cmdinfo["mode"])
# Do the string substitution
opts = ' '.join(cmdlist.args) %(cmdinfo)
args = ' '.join([cmdlist.cmd, opts])
# now run the (de)compressor command in a subprocess
# return it's success/fail return value
return cmd(args, cmdlist.id, env=self.env, fatal=fatal)
def create_infodict(self, source, destination=None, basedir=None,
filename='', mode=None, auto_extension=False, arch=None):
'''Puts the source and destination paths into a dictionary
for use in string substitution in the defintions
%(source) and %(destination) fields embedded into the commands
@param source: string, path to a directory
@param destination: string, path a directory
@param basedir: optional string, path a directory
@param filename: optional string
@param mode: string, optional mode to use to (de)compress with
@param auto_extension: boolean, optional, enables or disables
adding the normaL file extension defined by the mode used.
defaults to False
@return dict:
'''
return {
'source': source,
'destination': destination,
'basedir': basedir,
'filename': filename,
'arch': arch,
'mode': mode or self.mode,
'auto-ext': auto_extension,
}
def is_supported(self, mode):
'''Truth function to test the mode desired is supported
in the definitions loaded
@param mode: string, mode to use to (de)compress with
@return boolean
'''
return mode in list(self._map)
@property
def available_modes(self):
'''Convienence function to return the available modes'''
return list(self._map)
def extension(self, mode, all_extensions=False):
'''Returns the predetermined extension auto-ext added
to the filename for compression.
@param mode: string
@return string
'''
if self.is_supported(mode):
if all_extensions:
return self._map[mode].extensions
else: #return the first one (default)
return self._map[mode].extensions[0]
return ''
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579)
2015-01-16 3:00 ` Brian Dolbec
@ 2015-01-16 4:53 ` Zac Medico
2015-01-16 5:18 ` Brian Dolbec
0 siblings, 1 reply; 6+ messages in thread
From: Zac Medico @ 2015-01-16 4:53 UTC (permalink / raw
To: gentoo-portage-dev
On 01/15/2015 07:00 PM, Brian Dolbec wrote:
> On Thu, 15 Jan 2015 17:27:23 -0800
> Zac Medico <zmedico@gentoo.org> wrote:
>> # Add -q to bzip2 opts, in order to avoid "trailing
>> garbage after # EOF ignored" warning messages due to xpak trailer.
>> + if comp == "bzip2":
>> + decomp_cmd =
>> "${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d}"
>> + elif comp == "xz":
>> + decomp_cmd = "xz -d"
>> + elif comp == "gzip":
>> + decomp_cmd = "gzip -d"
>> + else:
>> + raise AssertionError("Unexpected
>
> No offense, but yuk to the if foo .... else bar... else...
It's simple and it works. Why don't we just go with this simple approach
first, and add fancy stuff later on?
> I already have code that does this much better, I decided I was going
> to release it separately from catalyst because it is generally useful
> and in many ways more future proof. I know you were interested in it,
> but I hadn't gotten around to establishing it in a separate repo. I
> also didn't know you were already working on this.
>
> I've attached it to this email for you to look over. It needs a bit of
> work for an independent release, but it is easily extended with
> configuration changes. It can also be easily extended with custom
> commands if needed.
>
> If you are interested, it would not take long to have it release worthy
> and in the tree.
I am interested in your library, but it does much more than is needed in
BinpkgExtractorAsync.The requirements are:
1) Map bzip2, xz, or gzip to an appropriate decompression command. The
command should be guaranteed to support -c and -q options, since
BinpkgExtractorAsync relies on them.
2) Respect the user's PORTAGE_BUNZIP2_COMMAND and PORTAGE_BZIP2_COMMAND
variables.
--
Thanks,
Zac
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579)
2015-01-16 4:53 ` Zac Medico
@ 2015-01-16 5:18 ` Brian Dolbec
2015-01-16 8:30 ` Zac Medico
0 siblings, 1 reply; 6+ messages in thread
From: Brian Dolbec @ 2015-01-16 5:18 UTC (permalink / raw
To: gentoo-portage-dev
On Thu, 15 Jan 2015 20:53:02 -0800
Zac Medico <zmedico@gentoo.org> wrote:
> On 01/15/2015 07:00 PM, Brian Dolbec wrote:
> > On Thu, 15 Jan 2015 17:27:23 -0800
> > Zac Medico <zmedico@gentoo.org> wrote:
> >> # Add -q to bzip2 opts, in order to avoid
> >> "trailing garbage after # EOF ignored" warning messages due to
> >> xpak trailer.
> >> + if comp == "bzip2":
> >> + decomp_cmd =
> >> "${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d}"
> >> + elif comp == "xz":
> >> + decomp_cmd = "xz -d"
> >> + elif comp == "gzip":
> >> + decomp_cmd = "gzip -d"
> >> + else:
> >> + raise AssertionError("Unexpected
> >
> > No offense, but yuk to the if foo .... else bar... else...
>
> It's simple and it works. Why don't we just go with this simple
> approach first, and add fancy stuff later on?
>
PORTAGE_COMPRESSORS = {
"bzip2": "${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d}",
"xz": "xz -d",
"gzip": "gzip -d",
}
try:
decomp_cmd = PORTAGE_COMPRESSORS[comp]
except: AtributeError:
writemsg("You fool we don't support the %s (de)compression type!)
makes it simple to add new types, saves mile long if, elif, elif, else
code ;) There is also a good chance the code won't need editing to add
a new type, just add it to the dict definition. Second advantage is the
code should be slightly faster. (yes, I realize this isn't in a
multi k iteration loop, so speed-up is minuscule) but portage can't
afford to waste many cycles ;) it all adds up...
P.S. I don't expect you to use the writemsg(...) as is :D
> > I already have code that does this much better, I decided I was
> > going to release it separately from catalyst because it is
> > generally useful and in many ways more future proof. I know you
> > were interested in it, but I hadn't gotten around to establishing
> > it in a separate repo. I also didn't know you were already working
> > on this.
> >
> > I've attached it to this email for you to look over. It needs a
> > bit of work for an independent release, but it is easily extended
> > with configuration changes. It can also be easily extended with
> > custom commands if needed.
> >
> > If you are interested, it would not take long to have it release
> > worthy and in the tree.
>
> I am interested in your library, but it does much more than is needed
> in BinpkgExtractorAsync.The requirements are:
>
> 1) Map bzip2, xz, or gzip to an appropriate decompression command. The
> command should be guaranteed to support -c and -q options, since
> BinpkgExtractorAsync relies on them.
>
> 2) Respect the user's PORTAGE_BUNZIP2_COMMAND and
> PORTAGE_BZIP2_COMMAND variables.
yes, I know there is more there than is needed and a few options need
adding.
--
Brian Dolbec <dolsen>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579)
2015-01-16 5:18 ` Brian Dolbec
@ 2015-01-16 8:30 ` Zac Medico
2015-01-16 11:25 ` Brian Dolbec
0 siblings, 1 reply; 6+ messages in thread
From: Zac Medico @ 2015-01-16 8:30 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
This adds support for using a binary package's compression header to
determine the compression type, providing forward-compatibility for
xz and gzip decompression. The file name extension is disregared, so
that it will be possible to use a compression-independent file naming
scheme in the future (see bug #150031 for discussion about proposed
file naming schemes).
Currently, only decompression is supported. It's useful to provide
forward-compatibility now, so that binhost clients will be prepared
to handle future binhost servers that use xz or gzip compression.
X-Gentoo-Bug: 142579
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=142579
---
| 25 ++++++++++---
pym/portage/util/compression_probe.py | 68 +++++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+), 4 deletions(-)
create mode 100644 pym/portage/util/compression_probe.py
--git a/pym/_emerge/BinpkgExtractorAsync.py b/pym/_emerge/BinpkgExtractorAsync.py
index be74c2f..6aaa448 100644
--- a/pym/_emerge/BinpkgExtractorAsync.py
+++ b/pym/_emerge/BinpkgExtractorAsync.py
@@ -1,8 +1,13 @@
# Copyright 1999-2013 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
+import logging
+
from _emerge.SpawnProcess import SpawnProcess
import portage
+from portage.localization import _
+from portage.util.compression_probe import (compression_probe,
+ _decompressors)
import signal
import subprocess
@@ -20,19 +25,31 @@ class BinpkgExtractorAsync(SpawnProcess):
if b"--xattrs" in output:
tar_options = "--xattrs"
- # Add -q to bzip2 opts, in order to avoid "trailing garbage after
- # EOF ignored" warning messages due to xpak trailer.
+ decomp_cmd = _decompressors.get(
+ compression_probe(self.pkg_path))
+ if decomp_cmd is None:
+ self.scheduler.output("!!! %s\n" %
+ _("File compression header unrecognized: %s") %
+ self.pkg_path, log_path=self.logfile,
+ background=self.background, level=logging.ERROR)
+ self.returncode = 1
+ self._async_wait()
+ return
+
+ # Add -q to decomp_cmd opts, in order to avoid "trailing garbage
+ # after EOF ignored" warning messages due to xpak trailer.
# SIGPIPE handling (128 + SIGPIPE) should be compatible with
# assert_sigpipe_ok() that's used by the ebuild unpack() helper.
self.args = [self._shell_binary, "-c",
- ("${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d} -cq -- %s | tar -xp %s -C %s -f - ; " + \
+ ("%s -cq -- %s | tar -xp %s -C %s -f - ; " + \
"p=(${PIPESTATUS[@]}) ; " + \
"if [[ ${p[0]} != 0 && ${p[0]} != %d ]] ; then " % (128 + signal.SIGPIPE) + \
"echo bzip2 failed with status ${p[0]} ; exit ${p[0]} ; fi ; " + \
"if [ ${p[1]} != 0 ] ; then " + \
"echo tar failed with status ${p[1]} ; exit ${p[1]} ; fi ; " + \
"exit 0 ;") % \
- (portage._shell_quote(self.pkg_path),
+ (decomp_cmd,
+ portage._shell_quote(self.pkg_path),
tar_options,
portage._shell_quote(self.image_dir))]
diff --git a/pym/portage/util/compression_probe.py b/pym/portage/util/compression_probe.py
new file mode 100644
index 0000000..1dc3547
--- /dev/null
+++ b/pym/portage/util/compression_probe.py
@@ -0,0 +1,68 @@
+# Copyright 2015 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import re
+import sys
+
+if sys.hexversion >= 0x3000000:
+ basestring = str
+
+from portage import _encodings, _unicode_encode
+from portage.exception import FileNotFound, PermissionDenied
+
+_decompressors = {
+ "bzip2": "${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d}",
+ "gzip": "gzip -d",
+ "xz": "xz -d",
+}
+
+_compression_re = re.compile(b'^(' +
+ b'(?P<bzip2>\x42\x5a\x68\x39)|' +
+ b'(?P<gzip>\x1f\x8b)|' +
+ b'(?P<xz>\xfd\x37\x7a\x58\x5a\x00))')
+
+def compression_probe(f):
+ """
+ Identify the compression type of a file. Returns one of the
+ following identifier strings:
+
+ bzip2
+ gzip
+ xz
+
+ @param f: a file path, or file-like object
+ @type f: str or file
+ @return: a string identifying the compression type, or None if the
+ compression type is unrecognized
+ @rtype str or None
+ """
+
+ open_file = isinstance(f, basestring)
+ if open_file:
+ try:
+ f = open(_unicode_encode(f,
+ encoding=_encodings['fs'], errors='strict'), mode='rb')
+ except IOError as e:
+ if e.errno == PermissionDenied.errno:
+ raise PermissionDenied(f)
+ elif e.errno in (errno.ENOENT, errno.ESTALE):
+ raise FileNotFound(f)
+ else:
+ raise
+
+ try:
+ return _compression_probe_file(f)
+ finally:
+ if open_file:
+ f.close()
+
+def _compression_probe_file(f):
+
+ m = _compression_re.match(f.read(6))
+ if m is not None:
+ for k, v in m.groupdict().items():
+ if v is not None:
+ return k
+
+ return None
--
2.0.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579)
2015-01-16 8:30 ` Zac Medico
@ 2015-01-16 11:25 ` Brian Dolbec
0 siblings, 0 replies; 6+ messages in thread
From: Brian Dolbec @ 2015-01-16 11:25 UTC (permalink / raw
To: gentoo-portage-dev
On Fri, 16 Jan 2015 00:30:31 -0800
Zac Medico <zmedico@gentoo.org> wrote:
> This adds support for using a binary package's compression header to
> determine the compression type, providing forward-compatibility for
> xz and gzip decompression. The file name extension is disregared, so
> that it will be possible to use a compression-independent file naming
> scheme in the future (see bug #150031 for discussion about proposed
> file naming schemes).
>
> Currently, only decompression is supported. It's useful to provide
> forward-compatibility now, so that binhost clients will be prepared
> to handle future binhost servers that use xz or gzip compression.
>
> X-Gentoo-Bug: 142579
> X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=142579
> ---
oooh, now that is sooooooo much better :D
Merge approved!
(P.S. too pretty to erase the code below ;)
> pym/_emerge/BinpkgExtractorAsync.py | 25 ++++++++++---
> pym/portage/util/compression_probe.py | 68
> +++++++++++++++++++++++++++++++++++ 2 files changed, 89
> insertions(+), 4 deletions(-) create mode 100644
> pym/portage/util/compression_probe.py
>
> diff --git a/pym/_emerge/BinpkgExtractorAsync.py
> b/pym/_emerge/BinpkgExtractorAsync.py index be74c2f..6aaa448 100644
> --- a/pym/_emerge/BinpkgExtractorAsync.py
> +++ b/pym/_emerge/BinpkgExtractorAsync.py
> @@ -1,8 +1,13 @@
> # Copyright 1999-2013 Gentoo Foundation
> # Distributed under the terms of the GNU General Public License v2
>
> +import logging
> +
> from _emerge.SpawnProcess import SpawnProcess
> import portage
> +from portage.localization import _
> +from portage.util.compression_probe import (compression_probe,
> + _decompressors)
> import signal
> import subprocess
>
> @@ -20,19 +25,31 @@ class BinpkgExtractorAsync(SpawnProcess):
> if b"--xattrs" in output:
> tar_options = "--xattrs"
>
> - # Add -q to bzip2 opts, in order to avoid "trailing
> garbage after
> - # EOF ignored" warning messages due to xpak trailer.
> + decomp_cmd = _decompressors.get(
> + compression_probe(self.pkg_path))
> + if decomp_cmd is None:
> + self.scheduler.output("!!! %s\n" %
> + _("File compression header
> unrecognized: %s") %
> + self.pkg_path, log_path=self.logfile,
> + background=self.background,
> level=logging.ERROR)
> + self.returncode = 1
> + self._async_wait()
> + return
> +
> + # Add -q to decomp_cmd opts, in order to avoid
> "trailing garbage
> + # after EOF ignored" warning messages due to xpak
> trailer. # SIGPIPE handling (128 + SIGPIPE) should be compatible with
> # assert_sigpipe_ok() that's used by the ebuild
> unpack() helper. self.args = [self._shell_binary, "-c",
> -
> ("${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d} -cq -- %s |
> tar -xp %s -C %s -f - ; " + \
> + ("%s -cq -- %s | tar -xp %s -C %s -f - ; " +
> \ "p=(${PIPESTATUS[@]}) ; " + \
> "if [[ ${p[0]} != 0 && ${p[0]} != %d ]] ;
> then " % (128 + signal.SIGPIPE) + \ "echo bzip2 failed with status
> ${p[0]} ; exit ${p[0]} ; fi ; " + \ "if [ ${p[1]} != 0 ] ; then " + \
> "echo tar failed with status ${p[1]} ; exit
> ${p[1]} ; fi ; " + \ "exit 0 ;") % \
> - (portage._shell_quote(self.pkg_path),
> + (decomp_cmd,
> + portage._shell_quote(self.pkg_path),
> tar_options,
> portage._shell_quote(self.image_dir))]
>
> diff --git a/pym/portage/util/compression_probe.py
> b/pym/portage/util/compression_probe.py new file mode 100644
> index 0000000..1dc3547
> --- /dev/null
> +++ b/pym/portage/util/compression_probe.py
> @@ -0,0 +1,68 @@
> +# Copyright 2015 Gentoo Foundation
> +# Distributed under the terms of the GNU General Public License v2
> +
> +import errno
> +import re
> +import sys
> +
> +if sys.hexversion >= 0x3000000:
> + basestring = str
> +
> +from portage import _encodings, _unicode_encode
> +from portage.exception import FileNotFound, PermissionDenied
> +
> +_decompressors = {
> + "bzip2":
> "${PORTAGE_BUNZIP2_COMMAND:-${PORTAGE_BZIP2_COMMAND} -d}",
> + "gzip": "gzip -d",
> + "xz": "xz -d",
> +}
> +
> +_compression_re = re.compile(b'^(' +
> + b'(?P<bzip2>\x42\x5a\x68\x39)|' +
> + b'(?P<gzip>\x1f\x8b)|' +
> + b'(?P<xz>\xfd\x37\x7a\x58\x5a\x00))')
> +
> +def compression_probe(f):
> + """
> + Identify the compression type of a file. Returns one of the
> + following identifier strings:
> +
> + bzip2
> + gzip
> + xz
> +
> + @param f: a file path, or file-like object
> + @type f: str or file
> + @return: a string identifying the compression type, or None
> if the
> + compression type is unrecognized
> + @rtype str or None
> + """
> +
> + open_file = isinstance(f, basestring)
> + if open_file:
> + try:
> + f = open(_unicode_encode(f,
> + encoding=_encodings['fs'],
> errors='strict'), mode='rb')
> + except IOError as e:
> + if e.errno == PermissionDenied.errno:
> + raise PermissionDenied(f)
> + elif e.errno in (errno.ENOENT, errno.ESTALE):
> + raise FileNotFound(f)
> + else:
> + raise
> +
> + try:
> + return _compression_probe_file(f)
> + finally:
> + if open_file:
> + f.close()
> +
> +def _compression_probe_file(f):
> +
> + m = _compression_re.match(f.read(6))
> + if m is not None:
> + for k, v in m.groupdict().items():
> + if v is not None:
> + return k
> +
> + return None
--
Brian Dolbec <dolsen>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2015-01-16 11:25 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-16 1:27 [gentoo-portage-dev] [PATCH] BinpkgExtractorAsync: xz and gzip decompression (142579) Zac Medico
2015-01-16 3:00 ` Brian Dolbec
2015-01-16 4:53 ` Zac Medico
2015-01-16 5:18 ` Brian Dolbec
2015-01-16 8:30 ` Zac Medico
2015-01-16 11:25 ` Brian Dolbec
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox