From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gentoo-commits+bounces-687445-garchives=archives.gentoo.org@lists.gentoo.org>
Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80])
	by finch.gentoo.org (Postfix) with ESMTP id E4364138A1F
	for <garchives@archives.gentoo.org>; Fri, 18 Apr 2014 18:14:44 +0000 (UTC)
Received: from pigeon.gentoo.org (localhost [127.0.0.1])
	by pigeon.gentoo.org (Postfix) with SMTP id DEE64E09FC;
	Fri, 18 Apr 2014 18:14:43 +0000 (UTC)
Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by pigeon.gentoo.org (Postfix) with ESMTPS id 53F9FE09F1
	for <gentoo-commits@lists.gentoo.org>; Fri, 18 Apr 2014 18:14:42 +0000 (UTC)
Received: from spoonbill.gentoo.org (spoonbill.gentoo.org [81.93.255.5])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by smtp.gentoo.org (Postfix) with ESMTPS id 9EA3C340360
	for <gentoo-commits@lists.gentoo.org>; Fri, 18 Apr 2014 18:14:41 +0000 (UTC)
Received: from localhost.localdomain (localhost [127.0.0.1])
	by spoonbill.gentoo.org (Postfix) with ESMTP id 3C2AD18167
	for <gentoo-commits@lists.gentoo.org>; Fri, 18 Apr 2014 18:14:40 +0000 (UTC)
From: "Martin Mokrejs" <mmokrejs@fold.natur.cuni.cz>
To: gentoo-commits@lists.gentoo.org
Content-Transfer-Encoding: 8bit
Content-type: text/plain; charset=UTF-8
Reply-To: gentoo-dev@lists.gentoo.org, "Martin Mokrejs" <mmokrejs@fold.natur.cuni.cz>
Message-ID: <1397844719.20c8e6ec36a2cdd5af944030720b059fb8d10891.mmokrejs@gentoo>
Subject: [gentoo-commits] proj/sci:master commit in: sci-biology/biopython/, sci-biology/biopython/files/
X-VCS-Repository: proj/sci
X-VCS-Files: sci-biology/biopython/ChangeLog sci-biology/biopython/biopython-1.62-r3.ebuild sci-biology/biopython/biopython-1.62-r4.ebuild sci-biology/biopython/biopython-1.63-r1.ebuild sci-biology/biopython/files/SeqRecord.py.patch sci-biology/biopython/files/SffIO_broken_padding.patch sci-biology/biopython/files/adjust-trimpoints.patch
X-VCS-Directories: sci-biology/biopython/ sci-biology/biopython/files/
X-VCS-Committer: mmokrejs
X-VCS-Committer-Name: Martin Mokrejs
X-VCS-Revision: 20c8e6ec36a2cdd5af944030720b059fb8d10891
X-VCS-Branch: master
Date: Fri, 18 Apr 2014 18:14:40 +0000 (UTC)
Precedence: bulk
List-Post: <mailto:gentoo-commits@lists.gentoo.org>
List-Help: <mailto:gentoo-commits+help@lists.gentoo.org>
List-Unsubscribe: <mailto:gentoo-commits+unsubscribe@lists.gentoo.org>
List-Subscribe: <mailto:gentoo-commits+subscribe@lists.gentoo.org>
List-Id: Gentoo Linux mail <gentoo-commits.gentoo.org>
X-BeenThere: gentoo-commits@lists.gentoo.org
X-Archives-Salt: 6c4b36ec-5f58-48e3-b8a1-d21d47e212da
X-Archives-Hash: 30592a433e5525ec7f0b08ea15b929e8

commit:     20c8e6ec36a2cdd5af944030720b059fb8d10891
Author:     Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Fri Apr 18 18:11:59 2014 +0000
Commit:     Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Fri Apr 18 18:11:59 2014 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=20c8e6ec

sci-biology/biopython: upstream patch to downgrade an assert to a warning

Package-Manager: portage-2.2.7

---
 sci-biology/biopython/ChangeLog                    |  11 ++
 ...hon-1.63-r1.ebuild => biopython-1.62-r3.ebuild} |   5 +-
 ...hon-1.63-r1.ebuild => biopython-1.62-r4.ebuild} |   5 +-
 sci-biology/biopython/biopython-1.63-r1.ebuild     |   3 +-
 sci-biology/biopython/files/SeqRecord.py.patch     | 148 +++++++++++++++++++++
 .../biopython/files/SffIO_broken_padding.patch     |  27 ++++
 .../biopython/files/adjust-trimpoints.patch        |  76 +++++++++++
 7 files changed, 270 insertions(+), 5 deletions(-)
diff --git a/sci-biology/biopython/ChangeLog b/sci-biology/biopython/ChangeLog
index 037227d..c326c4a 100644
--- a/sci-biology/biopython/ChangeLog
+++ b/sci-biology/biopython/ChangeLog
@@ -2,6 +2,17 @@
 # Copyright 1999-2014 Gentoo Foundation; Distributed under the GPL v2
 # $Header: $
 
+*biopython-1.62-r3 (18 Apr 2014)
+*biopython-1.62-r4 (18 Apr 2014)
+
+  18 Apr 2014; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
+  +biopython-1.62-r3.ebuild, +biopython-1.62-r4.ebuild,
+  +files/SeqRecord.py.patch, +files/SffIO_broken_padding.patch,
+  +files/adjust-trimpoints.patch, biopython-1.63-r1.ebuild,
+  files/SffIO_error_in_check_eof.patch, files/biopython-1.51-flex.patch,
+  files/biopython-1.62-SffIO.patch:
+  sci-biology/biopython: upstream patch to downgrade an assert to a warning
+
   23 Mar 2014; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
   -biopython-1.62-r3.ebuild, -biopython-1.62-r4.ebuild, -biopython-1.63.ebuild,
   -files/SeqRecord.py.patch, -files/adjust-trimpoints.patch,

diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild b/sci-biology/biopython/biopython-1.62-r3.ebuild
similarity index 90%
copy from sci-biology/biopython/biopython-1.63-r1.ebuild
copy to sci-biology/biopython/biopython-1.62-r3.ebuild
index e99f846..09e6ed7 100644
--- a/sci-biology/biopython/biopython-1.63-r1.ebuild
+++ b/sci-biology/biopython/biopython-1.62-r3.ebuild
@@ -23,7 +23,6 @@ RDEPEND="${PYTHON_DEPS}
 	dev-python/matplotlib[${PYTHON_USEDEP}]
 	dev-python/networkx[${PYTHON_USEDEP}]
 	dev-python/numpy[${PYTHON_USEDEP}]
-	dev-python/rdflib[${PYTHON_USEDEP}]
 	dev-python/pygraphviz[${PYTHON_USEDEP}]
 	dev-python/reportlab[${PYTHON_USEDEP}]
 	media-gfx/pydot[${PYTHON_USEDEP}]
@@ -35,8 +34,10 @@ DEPEND="${RDEPEND}
 DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
 
 src_prepare() {
-	epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch
 	distutils-r1_src_prepare
+	epatch "${FILESDIR}/${PN}-1.62-SffIO.patch"
+	epatch "${FILESDIR}/SffIO_error_in_check_eof.patch"
+	epatch "${FILESDIR}/SffIO_broken_padding.patch"
 }
 
 python_test() {

diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild b/sci-biology/biopython/biopython-1.62-r4.ebuild
similarity index 90%
copy from sci-biology/biopython/biopython-1.63-r1.ebuild
copy to sci-biology/biopython/biopython-1.62-r4.ebuild
index e99f846..09e6ed7 100644
--- a/sci-biology/biopython/biopython-1.63-r1.ebuild
+++ b/sci-biology/biopython/biopython-1.62-r4.ebuild
@@ -23,7 +23,6 @@ RDEPEND="${PYTHON_DEPS}
 	dev-python/matplotlib[${PYTHON_USEDEP}]
 	dev-python/networkx[${PYTHON_USEDEP}]
 	dev-python/numpy[${PYTHON_USEDEP}]
-	dev-python/rdflib[${PYTHON_USEDEP}]
 	dev-python/pygraphviz[${PYTHON_USEDEP}]
 	dev-python/reportlab[${PYTHON_USEDEP}]
 	media-gfx/pydot[${PYTHON_USEDEP}]
@@ -35,8 +34,10 @@ DEPEND="${RDEPEND}
 DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
 
 src_prepare() {
-	epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch
 	distutils-r1_src_prepare
+	epatch "${FILESDIR}/${PN}-1.62-SffIO.patch"
+	epatch "${FILESDIR}/SffIO_error_in_check_eof.patch"
+	epatch "${FILESDIR}/SffIO_broken_padding.patch"
 }
 
 python_test() {

diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild b/sci-biology/biopython/biopython-1.63-r1.ebuild
index e99f846..b600748 100644
--- a/sci-biology/biopython/biopython-1.63-r1.ebuild
+++ b/sci-biology/biopython/biopython-1.63-r1.ebuild
@@ -35,7 +35,8 @@ DEPEND="${RDEPEND}
 DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
 
 src_prepare() {
-	epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch
+	epatch "${FILESDIR}/SffIO_error_in_check_eof.patch"
+	epatch "${FILESDIR}/SffIO_broken_padding.patch"
 	distutils-r1_src_prepare
 }
 

diff --git a/sci-biology/biopython/files/SeqRecord.py.patch b/sci-biology/biopython/files/SeqRecord.py.patch
new file mode 100644
index 0000000..ac3785f
--- /dev/null
+++ b/sci-biology/biopython/files/SeqRecord.py.patch
@@ -0,0 +1,148 @@
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
+index 1971dba..43b38fd 100644
+--- a/Bio/SeqIO/SffIO.py
++++ b/Bio/SeqIO/SffIO.py
+@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
+ 
+ 
+ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
+-                         key_sequence, alphabet, trim=False):
+-    """Parse the next read in the file, return data as a SeqRecord (PRIVATE)."""
++                         key_sequence, alphabet, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False):
++    """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
++    Allow user to specify which type of clipping values should be applied
++    while reading the SFF stream. To be backwards compatible, we interpret
++    only the quality-based trim points by default. That results in lower-cased
++    sequences in the low-qual region, regardless what adapter-based clip points
++    say. This should be the desired behavior. More discussion at
++    https://redmine.open-bio.org/issues/3437
++    """
+     #Now on to the reads...
+     #the read header format (fixed part):
+     #read_header_length     H
+@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
+             warnings.warn("Post quality %i byte padding region contained data, SFF data is not broken"
+                              % padding)
+     #Follow Roche and apply most aggressive of qual and adapter clipping.
+-    #Note Roche seems to ignore adapter clip fields when writing SFF,
+-    #and uses just the quality clipping values for any clipping.
+-    clip_left = max(clip_qual_left, clip_adapter_left)
+-    #Right clipping of zero means no clipping
+-    if clip_qual_right:
+-        if clip_adapter_right:
+-            clip_right = min(clip_qual_right, clip_adapter_right)
++    #Note Roche does not use adapter clip fields when writing SFF files
++    #but instead combines the adapter clipping information with quality-based
++    #values and writes the most aggressive combination into clip fields (as
++    #allowed by SFF specs).
++
++    if interpret_qual_trims:
++        if interpret_adapter_trims:
++            clip_left = max(clip_qual_left, clip_adapter_left)
++            #Right clipping of zero means no clipping
++            if clip_qual_right:
++                if clip_adapter_right:
++                    clip_right = min(clip_qual_right, clip_adapter_right)
++                else:
++                    #Typical case with Roche SFF files
++                    clip_right = clip_qual_right
++            elif clip_adapter_right:
++                clip_right = clip_adapter_right
++            else:
++                clip_right = seq_len
+         else:
+-            #Typical case with Roche SFF files
+-            clip_right = clip_qual_right
+-    elif clip_adapter_right:
+-        clip_right = clip_adapter_right
++	    clip_left = clip_qual_left
++	    if clip_qual_right:
++	        clip_right = clip_qual_right
++            else:
++	        clip_right = seq_len
++    elif interpret_adapter_trims:
++        clip_left = clip_adapter_left
++	if clip_adapter_right:
++	    clip_right = clip_adapter_right
++	else:
++	    clip_right = seq_len
+     else:
+-        clip_right = seq_len
++        clip_left = 0
++	clip_right = seq_len
++
+     #Now build a SeqRecord
+     if trim:
+         seq = seq[clip_left:clip_right].upper()
+diff --git a/Bio/SeqRecord.py b/Bio/SeqRecord.py
+index c90e13b..66bdea0 100644
+--- a/Bio/SeqRecord.py
++++ b/Bio/SeqRecord.py
+@@ -14,6 +14,8 @@ __docformat__ = "epytext en"  # Simple markup to show doctests nicely
+ # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class)
+ 
+ 
++from Bio.Seq import Seq
++
+ class _RestrictedDict(dict):
+     """Dict which only allows sequences of given length as values (PRIVATE).
+ 
+@@ -76,7 +78,7 @@ class _RestrictedDict(dict):
+         if not hasattr(value, "__len__") or not hasattr(value, "__getitem__") \
+         or (hasattr(self, "_length") and len(value) != self._length):
+             raise TypeError("We only allow python sequences (lists, tuples or "
+-                            "strings) of length %i." % self._length)
++                            "strings) of length %i whereas you passed an object of length %s." % (self._length, str(len(value))))
+         dict.__setitem__(self, key, value)
+ 
+     def update(self, new_dict):
+@@ -290,10 +292,11 @@ class SeqRecord(object):
+         """)
+ 
+     def _set_seq(self, value):
+-        #TODO - Add a deprecation warning that the seq should be write only?
+-        if self._per_letter_annotations:
+-            #TODO - Make this a warning? Silently empty the dictionary?
+-            raise ValueError("You must empty the letter annotations first!")
++        # we should be much more user friendly and accept even a plain sequence string
++	# and make the Seq or MutableSeq object ourselves
++        if not isinstance(value, Seq):
++            raise ValueError("You must pass a Seq object containing the new sequence instead of just plain string.")
++        else:
+         self._seq = value
+         try:
+             self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
+@@ -696,7 +699,7 @@ class SeqRecord(object):
+         SeqIO.write(self, handle, format_spec)
+         return handle.getvalue()
+ 
+-    def __len__(self):
++    def __len__(self, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False):
+         """Returns the length of the sequence.
+ 
+         For example, using Bio.SeqIO to read in a FASTA nucleotide file:
+@@ -707,6 +710,10 @@ class SeqRecord(object):
+         309
+         >>> len(record.seq)
+         309
++
++	It should be possible to get length of a raw object, of trimmed
++	object by quality or adapter criteria or both, whenever user wants
++	to, not only when data is parsed from input.
+         """
+         return len(self.seq)
+ 
+@@ -725,6 +732,13 @@ class SeqRecord(object):
+         """
+         return True
+ 
++    def apply_trimpoints(self, trim=False, interpret_qual_trims=False, interpret_adapter_trims=False):
++        """We should apply either of the quality-based or adapter-based annotated
++	trim points and return a new, sliced object.
++	"""
++	pass
++
++
+     def __add__(self, other):
+         """Add another sequence or string to this sequence.
+ 

diff --git a/sci-biology/biopython/files/SffIO_broken_padding.patch b/sci-biology/biopython/files/SffIO_broken_padding.patch
new file mode 100644
index 0000000..a009c58
--- /dev/null
+++ b/sci-biology/biopython/files/SffIO_broken_padding.patch
@@ -0,0 +1,27 @@
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
+index 735d55b..b89cf41 100644
+--- a/Bio/SeqIO/SffIO.py
++++ b/Bio/SeqIO/SffIO.py
+@@ -933,12 +933,20 @@ def _check_eof(handle, index_offset, index_length):
+                          "null padding region ended '.sff' which could "
+                          "be the start of a concatenated SFF file? "
+                          "See offset %i" % (padding, offset))
++    if padding and not extra:
++        #TODO - Is this error harmless enough to just ignore?
++        import warnings
++        from Bio import BiopythonParserWarning
++        warnings.warn("Your SFF file is technically invalid as it is missing "
++                      "a terminal %i byte null padding region." % padding,
++                      BiopythonParserWarning)
++        return
+     if extra.count(_null) != padding:
+         import warnings
+         from Bio import BiopythonParserWarning
+         warnings.warn("Your SFF file is invalid, post index %i byte "
+-                      "null padding region contained data." % padding,
+-                      BiopythonParserWarning)
++                      "null padding region contained data: %r"
++                      % (padding, extra), BiopythonParserWarning)
+ 
+     offset = handle.tell()
+     assert offset % 8 == 0, \

diff --git a/sci-biology/biopython/files/adjust-trimpoints.patch b/sci-biology/biopython/files/adjust-trimpoints.patch
new file mode 100644
index 0000000..dd6d548
--- /dev/null
+++ b/sci-biology/biopython/files/adjust-trimpoints.patch
@@ -0,0 +1,76 @@
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
+index 1971dba..43b38fd 100644
+--- a/Bio/SeqIO/SffIO.py
++++ b/Bio/SeqIO/SffIO.py
+@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
+ 
+ 
+ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
+-                         key_sequence, alphabet, trim=False):
+-    """Parse the next read in the file, return data as a SeqRecord (PRIVATE)."""
++                         key_sequence, alphabet, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False):
++    """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
++    Allow user to specify which type of clipping values should be applied
++    while reading the SFF stream. To be backwards compatible, we interpret
++    only the quality-based trim points by default. That results in lower-cased
++    sequences in the low-qual region, regardless what adapter-based clip points
++    say. This should be the desired behavior. More discussion at
++    https://redmine.open-bio.org/issues/3437
++    """
+     #Now on to the reads...
+     #the read header format (fixed part):
+     #read_header_length     H
+@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
+             warnings.warn("Post quality %i byte padding region contained data, SFF data is not broken"
+                              % padding)
+     #Follow Roche and apply most aggressive of qual and adapter clipping.
+-    #Note Roche seems to ignore adapter clip fields when writing SFF,
+-    #and uses just the quality clipping values for any clipping.
+-    clip_left = max(clip_qual_left, clip_adapter_left)
+-    #Right clipping of zero means no clipping
+-    if clip_qual_right:
+-        if clip_adapter_right:
+-            clip_right = min(clip_qual_right, clip_adapter_right)
++    #Note Roche does not use adapter clip fields when writing SFF files
++    #but instead combines the adapter clipping information with quality-based
++    #values and writes the most aggressive combination into clip fields (as
++    #allowed by SFF specs).
++
++    if interpret_qual_trims:
++        if interpret_adapter_trims:
++            clip_left = max(clip_qual_left, clip_adapter_left)
++            #Right clipping of zero means no clipping
++            if clip_qual_right:
++                if clip_adapter_right:
++                    clip_right = min(clip_qual_right, clip_adapter_right)
++                else:
++                    #Typical case with Roche SFF files
++                    clip_right = clip_qual_right
++            elif clip_adapter_right:
++                clip_right = clip_adapter_right
++            else:
++                clip_right = seq_len
+         else:
+-            #Typical case with Roche SFF files
+-            clip_right = clip_qual_right
+-    elif clip_adapter_right:
+-        clip_right = clip_adapter_right
++	    clip_left = clip_qual_left
++	    if clip_qual_right:
++	        clip_right = clip_qual_right
++            else:
++	        clip_right = seq_len
++    elif interpret_adapter_trims:
++        clip_left = clip_adapter_left
++	if clip_adapter_right:
++	    clip_right = clip_adapter_right
++	else:
++	    clip_right = seq_len
+     else:
+-        clip_right = seq_len
++        clip_left = 0
++	clip_right = seq_len
++
+     #Now build a SeqRecord
+     if trim:
+         seq = seq[clip_left:clip_right].upper()