public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/
@ 2017-11-21 16:11 Martin Mokrejs
  0 siblings, 0 replies; 2+ messages in thread
From: Martin Mokrejs @ 2017-11-21 16:11 UTC (permalink / raw
  To: gentoo-commits

commit:     aab87747b89b64107677056a3d4874d8f5ee7bbf
Author:     Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Tue Nov 21 16:11:28 2017 +0000
Commit:     Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Tue Nov 21 16:11:28 2017 +0000
URL:        https://gitweb.gentoo.org/proj/sci.git/commit/?id=aab87747

sci-biology/SEECER: execute jellyfish1 instead of jellyfish

Also I wrote a few cleanup patches to expose THREADS
variable and cleanup the code.

Package-Manager: Portage-2.3.14, Repoman-2.3.6

 sci-biology/SEECER/SEECER-0.1.3-r2.ebuild          |  7 ++-
 .../SEECER/files/rename_jellyfish_binary.patch     | 11 ++++
 sci-biology/SEECER/files/run_jellyfish.sh.patch    | 72 ++++++++++++++++++++++
 sci-biology/SEECER/files/run_seecer.sh.patch       | 42 +++++++++++++
 4 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
index 60862d8c8..0b7ec3bfa 100644
--- a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
+++ b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
@@ -5,7 +5,7 @@ EAPI=6
 
 inherit eutils
 
-DESCRIPTION="SEquencing Error Corrector for RNA-Seq reads"
+DESCRIPTION="SEquence Error Corrector for RNA-Seq reads"
 HOMEPAGE="http://sb.cs.cmu.edu/seecer/"
 SRC_URI="
 	http://sb.cs.cmu.edu/seecer/downloads/"${P}".tar.gz
@@ -22,12 +22,15 @@ DEPEND="
 	sci-libs/gsl:0=
 	sci-biology/seqan:0="
 RDEPEND="${DEPEND}
-	=sci-biology/jellyfish-1.1.11"
+	=sci-biology/jellyfish-1.1.11-r1"
 
 S="${S}"/SEECER
 
 PATCHES=(
 	"${FILESDIR}"/remove-hardcoded-paths.patch
+	"${FILESDIR}"/run_seecer.sh.patch
+	"${FILESDIR}"/run_jellyfish.sh.patch
+	"${FILESDIR}"/rename_jellyfish_binary.patch
 )
 
 src_prepare(){

diff --git a/sci-biology/SEECER/files/rename_jellyfish_binary.patch b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
new file mode 100644
index 000000000..c6548cee1
--- /dev/null
+++ b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
@@ -0,0 +1,11 @@
+--- SEECER/bin/run_seecer.sh.ori	2017-11-21 16:56:28.808767468 +0100
++++ SEECER/bin/run_seecer.sh	2017-11-21 16:57:07.469835728 +0100
+@@ -26,7 +26,7 @@
+ 
+ 
+ BINDIR='' #this can be hardcoded to /absolute/path/to/SEECER/bin/
+-JF="jellyfish"    #this may be hardcoded to /absolute/path/to/jellyfish/bin/
++JF="jellyfish1"    #this may be hardcoded to /absolute/path/to/jellyfish/bin/jellyfish
+ 
+ K=17
+ SEECER_PARAMS=""

diff --git a/sci-biology/SEECER/files/run_jellyfish.sh.patch b/sci-biology/SEECER/files/run_jellyfish.sh.patch
new file mode 100644
index 000000000..7631f5a4c
--- /dev/null
+++ b/sci-biology/SEECER/files/run_jellyfish.sh.patch
@@ -0,0 +1,72 @@
+--- SEECER-0.1.3/bin/run_jellyfish.sh.ori	2017-11-21 16:41:54.164599838 +0100
++++ SEECER-0.1.3/bin/run_jellyfish.sh	2017-11-21 16:46:28.022166903 +0100
+@@ -1,18 +1,45 @@
+ #!/bin/bash
++
++# Usage: run_jellyfish.sh jellyfish_binpath tempfile_prefix kmersize mincount tmpdir infile1 [infile2] threads
+ JF=$1
+ LCOUNT=$4
+ TMPDIR=$5
++THREADS=${8:-32}
+ 
+ if [ -z "$JF" ]; then
+     echo "No path to jellyfish binary provided, exiting.";
+     exit 255;
+ fi
+ 
++# Usage: jellyfish count [options] file:path+
++#
++# Count k-mers or qmers in fasta or fastq files
++# 
++# Options (default value in (), *required):
++#  -m, --mer-len=uint32                    *Length of mer
++#  -s, --size=uint64                       *Hash size
++#  -t, --threads=uint32                     Number of threads (1)
++#  -o, --output=string                      Output prefix (mer_counts)
++#  -c, --counter-len=Length in bits         Length of counting field (7)
++#      --out-counter-len=Length in bytes    Length of counter field in output (4)
++#  -C, --both-strands                       Count both strand, canonical representation (false)
++#  -p, --reprobes=uint32                    Maximum number of reprobes (62)
++#  -r, --raw                                Write raw database (false)
++#  -q, --quake                              Quake compatibility mode (false)
++#      --quality-start=uint32               Starting ASCII for quality values (64)
++#      --min-quality=uint32                 Minimum quality. A base with lesser quality becomes an N (0)
++#  -L, --lower-count=uint64                 Don't output k-mer with count < lower-count
++#  -U, --upper-count=uint64                 Don't output k-mer with count > upper-count
++#      --invalid-char=warn|ignore|error     How to treat invalid characters. The char is changed to a N. (warn)
++#      --matrix=Matrix file                 Hash function binary matrix
++#      --timing=Timing file                 Print timing information
++#      --stats=Stats file                   Print stats
++#
+ if [ "$#" -eq "4" ];
+ then
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 || exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 || exit 255
+ else
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7 || exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 $7 || exit 255
+ fi;
+ 
+ # merge
+@@ -25,5 +52,21 @@
+     rm $TMPDIR/jf_tmp_*
+ fi
+ 
++#
++# Usage: jellyfish dump [options] db:path
++# 
++# Dump k-mer counts
++# 
++# By default, dump in a fasta format where the header is the count and
++# the sequence is the sequence of the k-mer. The column format is a 2
++# column output: k-mer count.
++# 
++# Options (default value in (), *required):
++#  -c, --column                             Column format (false)
++#  -t, --tab                                Tab separator (false)
++#  -L, --lower-count=uint64                 Don't output k-mer with count < lower-count
++#  -U, --upper-count=uint64                 Don't output k-mer with count > upper-count
++#  -o, --output=string                      Output file
++#
+ $JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3 || exit 255
+ rm $TMPDIR/jf_merged_$3

diff --git a/sci-biology/SEECER/files/run_seecer.sh.patch b/sci-biology/SEECER/files/run_seecer.sh.patch
new file mode 100644
index 000000000..a20c7917f
--- /dev/null
+++ b/sci-biology/SEECER/files/run_seecer.sh.patch
@@ -0,0 +1,42 @@
+--- SEECER/bin/run_seecer.sh.old	2013-10-02 18:55:24.000000000 +0200
++++ SEECER/bin/run_seecer.sh	2017-11-21 16:24:24.065584149 +0100
+@@ -33,6 +33,7 @@
+ SeecerStep=1
+ LCOUNT=3
+ TMPDIR=''
++THREADS=32
+ 
+ usage=$(cat << EOF
+    # This script runs the SEECER pipeline of 4 steps:
+@@ -54,11 +55,12 @@
+       -j <v> : specify the location of JELLYFISH binary (default = $JF).
+       -p <v> : specify extra SEECER parameters (default = '').
+       -s <v> : specify the starting step ( default = 1). Values = 1,2,3,4.
++      -c <v> : number of threads (default = 32).
+       -h : help message
+ EOF
+ );
+ 
+-while getopts ":j:p:k:s:t:h" opt; do
++while getopts ":j:p:k:s:t:c:h" opt; do
+   case $opt in
+     t)
+       TMPDIR=$OPTARG
+@@ -75,6 +77,8 @@
+     s)
+       SeecerStep=$OPTARG
+       ;;
++    c)
++      THREADS=$OPTARG
+     \?)
+       echo "Invalid option: -$OPTARG" >&2
+       echo "$usage"
+@@ -170,7 +177,7 @@
+ then
+     echo "++ Step 2: Running JELLYFISH to count kmers ..."
+     echo
+-    bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N || exit 255
++    bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N $THREADS || exit 255
+ fi;
+ 
+ if [ ! -r $TMPDIR/counts_${K}_${LCOUNT} ];


^ permalink raw reply related	[flat|nested] 2+ messages in thread
* [gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/
@ 2015-11-13 19:04 Martin Mokrejs
  0 siblings, 0 replies; 2+ messages in thread
From: Martin Mokrejs @ 2015-11-13 19:04 UTC (permalink / raw
  To: gentoo-commits

commit:     6af16cbc76de1d4d8a1726e81880cfccb28adbb6
Author:     Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Fri Nov 13 19:03:02 2015 +0000
Commit:     Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Fri Nov 13 19:03:02 2015 +0000
URL:        https://gitweb.gentoo.org/proj/sci.git/commit/?id=6af16cbc

sci-biology/SEECER: patch crappy shellscripts to at leats die on error; require jellyfish-1.1.11

Package-Manager: portage-2.2.18

 sci-biology/SEECER/ChangeLog                       |  5 +++
 sci-biology/SEECER/SEECER-0.1.3-r2.ebuild          |  3 +-
 .../SEECER/files/remove-hardcoded-paths.patch      | 47 +++++++++++++++++++---
 3 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/sci-biology/SEECER/ChangeLog b/sci-biology/SEECER/ChangeLog
index 2872b27..b72cc46 100644
--- a/sci-biology/SEECER/ChangeLog
+++ b/sci-biology/SEECER/ChangeLog
@@ -2,6 +2,11 @@
 # Copyright 1999-2015 Gentoo Foundation; Distributed under the GPL v2
 # $Id$
 
+  13 Nov 2015; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
+  SEECER-0.1.3-r2.ebuild, files/remove-hardcoded-paths.patch:
+  sci-biology/SEECER: patch crappy shellscripts to at leats die on error;
+  require jellyfish-1.1.11
+
 *SEECER-0.1.3-r2 (13 Nov 2015)
 
   13 Nov 2015; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>

diff --git a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
index 730c429..3a9138c 100644
--- a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
+++ b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
@@ -18,11 +18,12 @@ KEYWORDS="~amd64"
 IUSE=""
 
 # although has bundled jellyfish-1.1.11 copy it just calls the executable during runtime
+# seems jellyfish-2 does not accept same commandline arguments
 DEPEND="
 	sci-libs/gsl
 	sci-biology/seqan"
 RDEPEND="${DEPEND}
-	sci-biology/jellyfish"
+	=sci-biology/jellyfish-1.1.11"
 
 S="${S}"/SEECER
 

diff --git a/sci-biology/SEECER/files/remove-hardcoded-paths.patch b/sci-biology/SEECER/files/remove-hardcoded-paths.patch
index 9258e50..4e317e6 100644
--- a/sci-biology/SEECER/files/remove-hardcoded-paths.patch
+++ b/sci-biology/SEECER/files/remove-hardcoded-paths.patch
@@ -1,5 +1,40 @@
---- SEECER-0.1.3/SEECER/bin/run_seecer.sh.old	2015-11-13 18:17:53.985784977 +0100
-+++ SEECER-0.1.3/SEECER/bin/run_seecer.sh	2015-11-13 18:20:19.995787411 +0100
+--- SEECER-0.1.3/SEECER/bin/run_jellyfish.sh.ori	2015-11-13 18:40:01.595807104 +0100
++++ SEECER-0.1.3/SEECER/bin/run_jellyfish.sh	2015-11-13 18:51:45.655818838 +0100
+@@ -3,22 +3,27 @@
+ LCOUNT=$4
+ TMPDIR=$5
+ 
++if [ -z "$JF" ]; then
++    echo "No path to jellyfish binary provided, exiting.";
++    exit 255;
++fi
++
+ if [ "$#" -eq "4" ];
+ then
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 || exit 255
+ else
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7 || exit 255
+ fi;
+ 
+ # merge
+ N_TMP=`ls -1 $TMPDIR/jf_tmp_* | wc -l`
+ if [ $N_TMP -eq 1 ]
+ then
+-    mv $TMPDIR/jf_tmp_0 $TMPDIR/jf_merged_$3
++    mv $TMPDIR/jf_tmp_0 $TMPDIR/jf_merged_$3 || exit 255
+ else
+-    $JF merge $TMPDIR/jf_tmp_* -o $TMPDIR/jf_merged_$3
++    $JF merge $TMPDIR/jf_tmp_* -o $TMPDIR/jf_merged_$3 || exit 255
+     rm $TMPDIR/jf_tmp_*
+ fi
+ 
+-$JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3
++$JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3 || exit 255
+ rm $TMPDIR/jf_merged_$3
+--- SEECER-0.1.3/SEECER/bin/run_seecer.sh.ori	2015-11-13 18:40:16.215807347 +0100
++++ SEECER-0.1.3/SEECER/bin/run_seecer.sh	2015-11-13 18:53:03.695820138 +0100
 @@ -25,8 +25,8 @@
  #
  
@@ -16,7 +51,7 @@
      echo "++ Step 1: Replacing Ns ... and stripping off read IDs"
      echo
 -    ${BINDIR}/random_sub_N $RS_ARGS
-+    "${BINDIR}"random_sub_N $RS_ARGS
++    "${BINDIR}"random_sub_N $RS_ARGS || exit 255
  fi;
  
  if [ ! -r $Read1_N ];
@@ -25,7 +60,7 @@
      echo "++ Step 2: Running JELLYFISH to count kmers ..."
      echo
 -    bash ${BINDIR}/run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N
-+    bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N
++    bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N || exit 255
  fi;
  
  if [ ! -r $TMPDIR/counts_${K}_${LCOUNT} ];
@@ -34,7 +69,7 @@
      echo " *** Start time: " `date`;
  
 -    ${BINDIR}/seecer $Read1_N $Read2_N $SEECER_PARAMS --kmer $K -k $TMPDIR/counts_${K}_${LCOUNT} -o $TMPDIR/corrected.fasta
-+    "${BINDIR}"seecer $Read1_N $Read2_N $SEECER_PARAMS --kmer $K -k $TMPDIR/counts_${K}_${LCOUNT} -o $TMPDIR/corrected.fasta
++    "${BINDIR}"seecer $Read1_N $Read2_N $SEECER_PARAMS --kmer $K -k $TMPDIR/counts_${K}_${LCOUNT} -o $TMPDIR/corrected.fasta || exit 255
      echo " *** End time: " `date`;
      echo "-----------------------------------------------------------------------"
      echo
@@ -43,7 +78,7 @@
  then
      echo "++ Step 4: Cleaning and putting back original read IDs ... We finish soon!"
 -    ${BINDIR}/replace_ids $TMPDIR/corrected.fasta $Reads $Reads_N $Reads_O
-+    "${BINDIR}"replace_ids $TMPDIR/corrected.fasta $Reads $Reads_N $Reads_O
++    "${BINDIR}"replace_ids $TMPDIR/corrected.fasta $Reads $Reads_N $Reads_O || exit 255
  #    rm $TMPDIR/corrected.fasta
  fi;
   


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-11-21 16:11 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-11-21 16:11 [gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/ Martin Mokrejs
  -- strict thread matches above, loose matches on Subject: below --
2015-11-13 19:04 Martin Mokrejs

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox