* [gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/
@ 2015-11-13 19:04 Martin Mokrejs
0 siblings, 0 replies; 2+ messages in thread
From: Martin Mokrejs @ 2015-11-13 19:04 UTC (permalink / raw
To: gentoo-commits
commit: 6af16cbc76de1d4d8a1726e81880cfccb28adbb6
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Fri Nov 13 19:03:02 2015 +0000
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Fri Nov 13 19:03:02 2015 +0000
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=6af16cbc
sci-biology/SEECER: patch crappy shellscripts to at leats die on error; require jellyfish-1.1.11
Package-Manager: portage-2.2.18
sci-biology/SEECER/ChangeLog | 5 +++
sci-biology/SEECER/SEECER-0.1.3-r2.ebuild | 3 +-
.../SEECER/files/remove-hardcoded-paths.patch | 47 +++++++++++++++++++---
3 files changed, 48 insertions(+), 7 deletions(-)
diff --git a/sci-biology/SEECER/ChangeLog b/sci-biology/SEECER/ChangeLog
index 2872b27..b72cc46 100644
--- a/sci-biology/SEECER/ChangeLog
+++ b/sci-biology/SEECER/ChangeLog
@@ -2,6 +2,11 @@
# Copyright 1999-2015 Gentoo Foundation; Distributed under the GPL v2
# $Id$
+ 13 Nov 2015; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
+ SEECER-0.1.3-r2.ebuild, files/remove-hardcoded-paths.patch:
+ sci-biology/SEECER: patch crappy shellscripts to at leats die on error;
+ require jellyfish-1.1.11
+
*SEECER-0.1.3-r2 (13 Nov 2015)
13 Nov 2015; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
diff --git a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
index 730c429..3a9138c 100644
--- a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
+++ b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
@@ -18,11 +18,12 @@ KEYWORDS="~amd64"
IUSE=""
# although has bundled jellyfish-1.1.11 copy it just calls the executable during runtime
+# seems jellyfish-2 does not accept same commandline arguments
DEPEND="
sci-libs/gsl
sci-biology/seqan"
RDEPEND="${DEPEND}
- sci-biology/jellyfish"
+ =sci-biology/jellyfish-1.1.11"
S="${S}"/SEECER
diff --git a/sci-biology/SEECER/files/remove-hardcoded-paths.patch b/sci-biology/SEECER/files/remove-hardcoded-paths.patch
index 9258e50..4e317e6 100644
--- a/sci-biology/SEECER/files/remove-hardcoded-paths.patch
+++ b/sci-biology/SEECER/files/remove-hardcoded-paths.patch
@@ -1,5 +1,40 @@
---- SEECER-0.1.3/SEECER/bin/run_seecer.sh.old 2015-11-13 18:17:53.985784977 +0100
-+++ SEECER-0.1.3/SEECER/bin/run_seecer.sh 2015-11-13 18:20:19.995787411 +0100
+--- SEECER-0.1.3/SEECER/bin/run_jellyfish.sh.ori 2015-11-13 18:40:01.595807104 +0100
++++ SEECER-0.1.3/SEECER/bin/run_jellyfish.sh 2015-11-13 18:51:45.655818838 +0100
+@@ -3,22 +3,27 @@
+ LCOUNT=$4
+ TMPDIR=$5
+
++if [ -z "$JF" ]; then
++ echo "No path to jellyfish binary provided, exiting.";
++ exit 255;
++fi
++
+ if [ "$#" -eq "4" ];
+ then
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 || exit 255
+ else
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7 || exit 255
+ fi;
+
+ # merge
+ N_TMP=`ls -1 $TMPDIR/jf_tmp_* | wc -l`
+ if [ $N_TMP -eq 1 ]
+ then
+- mv $TMPDIR/jf_tmp_0 $TMPDIR/jf_merged_$3
++ mv $TMPDIR/jf_tmp_0 $TMPDIR/jf_merged_$3 || exit 255
+ else
+- $JF merge $TMPDIR/jf_tmp_* -o $TMPDIR/jf_merged_$3
++ $JF merge $TMPDIR/jf_tmp_* -o $TMPDIR/jf_merged_$3 || exit 255
+ rm $TMPDIR/jf_tmp_*
+ fi
+
+-$JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3
++$JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3 || exit 255
+ rm $TMPDIR/jf_merged_$3
+--- SEECER-0.1.3/SEECER/bin/run_seecer.sh.ori 2015-11-13 18:40:16.215807347 +0100
++++ SEECER-0.1.3/SEECER/bin/run_seecer.sh 2015-11-13 18:53:03.695820138 +0100
@@ -25,8 +25,8 @@
#
@@ -16,7 +51,7 @@
echo "++ Step 1: Replacing Ns ... and stripping off read IDs"
echo
- ${BINDIR}/random_sub_N $RS_ARGS
-+ "${BINDIR}"random_sub_N $RS_ARGS
++ "${BINDIR}"random_sub_N $RS_ARGS || exit 255
fi;
if [ ! -r $Read1_N ];
@@ -25,7 +60,7 @@
echo "++ Step 2: Running JELLYFISH to count kmers ..."
echo
- bash ${BINDIR}/run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N
-+ bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N
++ bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N || exit 255
fi;
if [ ! -r $TMPDIR/counts_${K}_${LCOUNT} ];
@@ -34,7 +69,7 @@
echo " *** Start time: " `date`;
- ${BINDIR}/seecer $Read1_N $Read2_N $SEECER_PARAMS --kmer $K -k $TMPDIR/counts_${K}_${LCOUNT} -o $TMPDIR/corrected.fasta
-+ "${BINDIR}"seecer $Read1_N $Read2_N $SEECER_PARAMS --kmer $K -k $TMPDIR/counts_${K}_${LCOUNT} -o $TMPDIR/corrected.fasta
++ "${BINDIR}"seecer $Read1_N $Read2_N $SEECER_PARAMS --kmer $K -k $TMPDIR/counts_${K}_${LCOUNT} -o $TMPDIR/corrected.fasta || exit 255
echo " *** End time: " `date`;
echo "-----------------------------------------------------------------------"
echo
@@ -43,7 +78,7 @@
then
echo "++ Step 4: Cleaning and putting back original read IDs ... We finish soon!"
- ${BINDIR}/replace_ids $TMPDIR/corrected.fasta $Reads $Reads_N $Reads_O
-+ "${BINDIR}"replace_ids $TMPDIR/corrected.fasta $Reads $Reads_N $Reads_O
++ "${BINDIR}"replace_ids $TMPDIR/corrected.fasta $Reads $Reads_N $Reads_O || exit 255
# rm $TMPDIR/corrected.fasta
fi;
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/
@ 2017-11-21 16:11 Martin Mokrejs
0 siblings, 0 replies; 2+ messages in thread
From: Martin Mokrejs @ 2017-11-21 16:11 UTC (permalink / raw
To: gentoo-commits
commit: aab87747b89b64107677056a3d4874d8f5ee7bbf
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Tue Nov 21 16:11:28 2017 +0000
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Tue Nov 21 16:11:28 2017 +0000
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=aab87747
sci-biology/SEECER: execute jellyfish1 instead of jellyfish
Also I wrote a few cleanup patches to expose THREADS
variable and cleanup the code.
Package-Manager: Portage-2.3.14, Repoman-2.3.6
sci-biology/SEECER/SEECER-0.1.3-r2.ebuild | 7 ++-
.../SEECER/files/rename_jellyfish_binary.patch | 11 ++++
sci-biology/SEECER/files/run_jellyfish.sh.patch | 72 ++++++++++++++++++++++
sci-biology/SEECER/files/run_seecer.sh.patch | 42 +++++++++++++
4 files changed, 130 insertions(+), 2 deletions(-)
diff --git a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
index 60862d8c8..0b7ec3bfa 100644
--- a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
+++ b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
@@ -5,7 +5,7 @@ EAPI=6
inherit eutils
-DESCRIPTION="SEquencing Error Corrector for RNA-Seq reads"
+DESCRIPTION="SEquence Error Corrector for RNA-Seq reads"
HOMEPAGE="http://sb.cs.cmu.edu/seecer/"
SRC_URI="
http://sb.cs.cmu.edu/seecer/downloads/"${P}".tar.gz
@@ -22,12 +22,15 @@ DEPEND="
sci-libs/gsl:0=
sci-biology/seqan:0="
RDEPEND="${DEPEND}
- =sci-biology/jellyfish-1.1.11"
+ =sci-biology/jellyfish-1.1.11-r1"
S="${S}"/SEECER
PATCHES=(
"${FILESDIR}"/remove-hardcoded-paths.patch
+ "${FILESDIR}"/run_seecer.sh.patch
+ "${FILESDIR}"/run_jellyfish.sh.patch
+ "${FILESDIR}"/rename_jellyfish_binary.patch
)
src_prepare(){
diff --git a/sci-biology/SEECER/files/rename_jellyfish_binary.patch b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
new file mode 100644
index 000000000..c6548cee1
--- /dev/null
+++ b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
@@ -0,0 +1,11 @@
+--- SEECER/bin/run_seecer.sh.ori 2017-11-21 16:56:28.808767468 +0100
++++ SEECER/bin/run_seecer.sh 2017-11-21 16:57:07.469835728 +0100
+@@ -26,7 +26,7 @@
+
+
+ BINDIR='' #this can be hardcoded to /absolute/path/to/SEECER/bin/
+-JF="jellyfish" #this may be hardcoded to /absolute/path/to/jellyfish/bin/
++JF="jellyfish1" #this may be hardcoded to /absolute/path/to/jellyfish/bin/jellyfish
+
+ K=17
+ SEECER_PARAMS=""
diff --git a/sci-biology/SEECER/files/run_jellyfish.sh.patch b/sci-biology/SEECER/files/run_jellyfish.sh.patch
new file mode 100644
index 000000000..7631f5a4c
--- /dev/null
+++ b/sci-biology/SEECER/files/run_jellyfish.sh.patch
@@ -0,0 +1,72 @@
+--- SEECER-0.1.3/bin/run_jellyfish.sh.ori 2017-11-21 16:41:54.164599838 +0100
++++ SEECER-0.1.3/bin/run_jellyfish.sh 2017-11-21 16:46:28.022166903 +0100
+@@ -1,18 +1,45 @@
+ #!/bin/bash
++
++# Usage: run_jellyfish.sh jellyfish_binpath tempfile_prefix kmersize mincount tmpdir infile1 [infile2] threads
+ JF=$1
+ LCOUNT=$4
+ TMPDIR=$5
++THREADS=${8:-32}
+
+ if [ -z "$JF" ]; then
+ echo "No path to jellyfish binary provided, exiting.";
+ exit 255;
+ fi
+
++# Usage: jellyfish count [options] file:path+
++#
++# Count k-mers or qmers in fasta or fastq files
++#
++# Options (default value in (), *required):
++# -m, --mer-len=uint32 *Length of mer
++# -s, --size=uint64 *Hash size
++# -t, --threads=uint32 Number of threads (1)
++# -o, --output=string Output prefix (mer_counts)
++# -c, --counter-len=Length in bits Length of counting field (7)
++# --out-counter-len=Length in bytes Length of counter field in output (4)
++# -C, --both-strands Count both strand, canonical representation (false)
++# -p, --reprobes=uint32 Maximum number of reprobes (62)
++# -r, --raw Write raw database (false)
++# -q, --quake Quake compatibility mode (false)
++# --quality-start=uint32 Starting ASCII for quality values (64)
++# --min-quality=uint32 Minimum quality. A base with lesser quality becomes an N (0)
++# -L, --lower-count=uint64 Don't output k-mer with count < lower-count
++# -U, --upper-count=uint64 Don't output k-mer with count > upper-count
++# --invalid-char=warn|ignore|error How to treat invalid characters. The char is changed to a N. (warn)
++# --matrix=Matrix file Hash function binary matrix
++# --timing=Timing file Print timing information
++# --stats=Stats file Print stats
++#
+ if [ "$#" -eq "4" ];
+ then
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 || exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 || exit 255
+ else
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7 || exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 $7 || exit 255
+ fi;
+
+ # merge
+@@ -25,5 +52,21 @@
+ rm $TMPDIR/jf_tmp_*
+ fi
+
++#
++# Usage: jellyfish dump [options] db:path
++#
++# Dump k-mer counts
++#
++# By default, dump in a fasta format where the header is the count and
++# the sequence is the sequence of the k-mer. The column format is a 2
++# column output: k-mer count.
++#
++# Options (default value in (), *required):
++# -c, --column Column format (false)
++# -t, --tab Tab separator (false)
++# -L, --lower-count=uint64 Don't output k-mer with count < lower-count
++# -U, --upper-count=uint64 Don't output k-mer with count > upper-count
++# -o, --output=string Output file
++#
+ $JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3 || exit 255
+ rm $TMPDIR/jf_merged_$3
diff --git a/sci-biology/SEECER/files/run_seecer.sh.patch b/sci-biology/SEECER/files/run_seecer.sh.patch
new file mode 100644
index 000000000..a20c7917f
--- /dev/null
+++ b/sci-biology/SEECER/files/run_seecer.sh.patch
@@ -0,0 +1,42 @@
+--- SEECER/bin/run_seecer.sh.old 2013-10-02 18:55:24.000000000 +0200
++++ SEECER/bin/run_seecer.sh 2017-11-21 16:24:24.065584149 +0100
+@@ -33,6 +33,7 @@
+ SeecerStep=1
+ LCOUNT=3
+ TMPDIR=''
++THREADS=32
+
+ usage=$(cat << EOF
+ # This script runs the SEECER pipeline of 4 steps:
+@@ -54,11 +55,12 @@
+ -j <v> : specify the location of JELLYFISH binary (default = $JF).
+ -p <v> : specify extra SEECER parameters (default = '').
+ -s <v> : specify the starting step ( default = 1). Values = 1,2,3,4.
++ -c <v> : number of threads (default = 32).
+ -h : help message
+ EOF
+ );
+
+-while getopts ":j:p:k:s:t:h" opt; do
++while getopts ":j:p:k:s:t:c:h" opt; do
+ case $opt in
+ t)
+ TMPDIR=$OPTARG
+@@ -75,6 +77,8 @@
+ s)
+ SeecerStep=$OPTARG
+ ;;
++ c)
++ THREADS=$OPTARG
+ \?)
+ echo "Invalid option: -$OPTARG" >&2
+ echo "$usage"
+@@ -170,7 +177,7 @@
+ then
+ echo "++ Step 2: Running JELLYFISH to count kmers ..."
+ echo
+- bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N || exit 255
++ bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N $THREADS || exit 255
+ fi;
+
+ if [ ! -r $TMPDIR/counts_${K}_${LCOUNT} ];
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-11-21 16:11 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-11-13 19:04 [gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/ Martin Mokrejs
-- strict thread matches above, loose matches on Subject: below --
2017-11-21 16:11 Martin Mokrejs
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox