* [gentoo-commits] proj/sci:master commit in: sci-biology/transdecoder/, sci-biology/transdecoder/files/
@ 2015-01-09 15:35 Martin Mokrejs
0 siblings, 0 replies; 2+ messages in thread
From: Martin Mokrejs @ 2015-01-09 15:35 UTC (permalink / raw
To: gentoo-commits
commit: cf02cac01f101bdcabbd2b3ed95145e97584b925
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Fri Jan 9 15:34:03 2015 +0000
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Fri Jan 9 15:34:03 2015 +0000
URL: http://sources.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=cf02cac0
sci-biology/transdecoder: added patches so that we use PATH to loclate binaries and not in a local subdirectory named 'util', drop sys-cluster/openmpi requirement, it does not link against it all all, this is a bunch of perl and shell scripts
Package-Manager: portage-2.2.7
---
sci-biology/transdecoder/ChangeLog | 8 ++
sci-biology/transdecoder/files/TransDecoder.patch | 136 +++++++++++++++++++++
.../transdecoder/files/pfam_runner.pl.patch | 20 +++
.../transdecoder/transdecoder-20140704.ebuild | 9 +-
4 files changed, 171 insertions(+), 2 deletions(-)
diff --git a/sci-biology/transdecoder/ChangeLog b/sci-biology/transdecoder/ChangeLog
index 252ef53..c564760 100644
--- a/sci-biology/transdecoder/ChangeLog
+++ b/sci-biology/transdecoder/ChangeLog
@@ -2,6 +2,14 @@
# Copyright 1999-2015 Gentoo Foundation; Distributed under the GPL v2
# $Header: $
+ 09 Jan 2015; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
+ +files/TransDecoder.patch, +files/pfam_runner.pl.patch,
+ transdecoder-20140704.ebuild:
+ sci-biology/transdecoder: added patches so that we use PATH to loclate
+ binaries and not in a local subdirectory named 'util', drop sys-
+ cluster/openmpi requirement, it does not link against it all all, this is a
+ bunch of perl and shell scripts
+
*transdecoder-20140704 (08 Jan 2015)
08 Jan 2015; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz> +metadata.xml,
diff --git a/sci-biology/transdecoder/files/TransDecoder.patch b/sci-biology/transdecoder/files/TransDecoder.patch
new file mode 100644
index 0000000..c0cff94
--- /dev/null
+++ b/sci-biology/transdecoder/files/TransDecoder.patch
@@ -0,0 +1,136 @@
+--- /usr/bin/TransDecoder 2015-01-09 11:22:55.000000000 +0100
++++ TransDecoder 2015-01-09 14:31:44.095839522 +0100
+@@ -48,7 +48,7 @@
+ --prepare_pfam Prepare data for PFAM search and then quit (for running PFAM on HPC/computing cluster
+ with or without MPI )
+
+- --CPU <int> number of threads to use; (default: 2)
++ --CPU <int> number of threads to use; (default: 1)
+
+ --MPI use MPI w/ execution of hmmscan
+
+@@ -76,7 +76,7 @@
+
+ =head1 PFAM
+
+-You will need hmmer installed. Use hmmpress to prepare the database for hmmer.
++You will need hmmer installed. Use hmmpress from >=hmmer-3.0 to prepare the database for hmmer.
+ L<See|https://sourceforge.net/projects/transdecoder/files/Pfam-AB.hmm.bin> for downloading the database.
+
+ =head1 CD-HIT
+@@ -105,7 +105,6 @@
+ use Longest_orf;
+
+ my $UTIL_DIR = "$FindBin::RealBin/util";
+-$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
+ $ENV{LD_LIBRARY_PATH} .= ":$FindBin::RealBin/util/lib64";
+
+ my ($cd_hit_est_exec) = &check_program('cd-hit-est');
+@@ -124,7 +123,7 @@
+ my $verbose;
+ my $search_pfam = "";
+ my ($reuse,$pfam_out);
+-my $CPU = 2;
++my $CPU = 1;
+ my $RETAIN_LONG_ORFS = 900;
+ my $MPI = 0;
+
+@@ -330,15 +329,15 @@
+ my $top_cds_file = $train_file && -s $train_file ? $train_file : "$cds_file.top_${top_ORFs_train}_longest";
+ if (!-s $top_cds_file) {
+ # get longest entries
+- my $cmd = "$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file";
++ my $cmd = "get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file";
+
+ unless ($reuse && -s $top_cds_file){
+ if ($cd_hit_est_exec){
+ # to speed things up only check for redundancy up to 4x the number of entries we want
+ my $red_num = $top_ORFs_train * 4 ;
+- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top");
++ &process_cmd("get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top");
+ &process_cmd("$cd_hit_est_exec -r 1 -i $workdir/redundant_top -o $workdir/redundant_top.nr90 -M 0 -T $CPU >/dev/null 2>/dev/null");
+- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file");
++ &process_cmd("get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file");
+ unlink("$workdir/redundant_top");
+ unlink("$workdir/redundant_top.nr90");
+ unlink("$workdir/redundant_top.nr90.bak.clstr");
+@@ -349,20 +348,20 @@
+ }
+ }
+
+-$cmd = "$UTIL_DIR/compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat";
++$cmd = "compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat";
+ &process_cmd($cmd) unless $reuse && -s "$workdir/base_freqs.dat";
+
+
+ # get hexamer scores
+-#$cmd = "$UTIL_DIR/seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores";
++#$cmd = "seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores";
+ #&process_cmd($cmd) unless ($reuse && -s "hexamer.scores");
+
+-$cmd = "$UTIL_DIR/seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores";
++$cmd = "seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores";
+ &process_cmd($cmd) unless $reuse && -s "$workdir/hexamer.scores";
+
+
+ # score all cds entries
+-$cmd = "$UTIL_DIR/score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores";
++$cmd = "score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores";
+ &process_cmd($cmd) unless ($reuse && -s "$cds_file.scores");
+
+
+@@ -440,18 +439,18 @@
+ }
+
+ # index the current gff file:
+-$cmd = "$UTIL_DIR/index_gff3_files_by_isoform.pl $gff3_file";
++$cmd = "index_gff3_files_by_isoform.pl $gff3_file";
+ &process_cmd($cmd);
+
+ # retrieve the best entries:
+-$cmd = "$UTIL_DIR/gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3";
++$cmd = "gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3";
+ &process_cmd($cmd);
+
+ {
+ my $final_output_prefix = basename($transcripts_file) . ".transdecoder";
+
+ # exclude shadow orfs (smaller orfs in different reading frame that are eclipsed by longer orfs)
+- $cmd = "$UTIL_DIR/remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3";
++ $cmd = "remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3";
+ &process_cmd($cmd);
+
+
+@@ -462,14 +461,14 @@
+ my $gff3_file = "$final_output_prefix.gff3";
+ my $bed_file = $gff3_file;
+ $bed_file =~ s/\.gff3$/\.bed/;
+- $cmd = "$UTIL_DIR/gff3_file_to_bed.pl $gff3_file > $bed_file";
++ $cmd = "gff3_file_to_bed.pl $gff3_file > $bed_file";
+ &process_cmd($cmd);
+
+
+ # make a peptide file:
+ my $best_pep_file = $gff3_file;
+ $best_pep_file =~ s/\.gff3$/\.pep/;
+- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
+ &process_cmd($cmd);
+
+
+@@ -477,13 +476,13 @@
+ # make a CDS file:
+ my $best_cds_file = $best_pep_file;
+ $best_cds_file =~ s/\.pep$/\.cds/;
+- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
+ &process_cmd($cmd);
+
+ # make a CDS file:
+ my $best_cdna_file = $best_pep_file;
+ $best_cdna_file =~ s/\.pep$/\.mRNA/;
+- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file";
++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file";
+ &process_cmd($cmd);
+
+ }
diff --git a/sci-biology/transdecoder/files/pfam_runner.pl.patch b/sci-biology/transdecoder/files/pfam_runner.pl.patch
new file mode 100644
index 0000000..7809b1a
--- /dev/null
+++ b/sci-biology/transdecoder/files/pfam_runner.pl.patch
@@ -0,0 +1,20 @@
+--- /usr/bin/pfam_runner.pl 2015-01-09 11:22:55.000000000 +0100
++++ pfam_runner.pl 2015-01-09 14:25:43.385838579 +0100
+@@ -24,7 +24,7 @@
+ my $workdir;
+ my $verbose;
+ my ($reuse,$pfam_out);
+-my $CPU = 2;
++my $CPU = 1;
+
+ my $usage = <<_EOH_;
+
+@@ -59,7 +59,7 @@
+ # -h print this option menu and quit
+ # -v verbose
+ #
+-# --CPU <int> number of threads to use; (default: 2)
++# --CPU <int> number of threads to use; (default: 1)
+ #
+ # --MPI use MPI (via ffindex_apply_mpi)
+ #
diff --git a/sci-biology/transdecoder/transdecoder-20140704.ebuild b/sci-biology/transdecoder/transdecoder-20140704.ebuild
index c539231..af5bb4f 100644
--- a/sci-biology/transdecoder/transdecoder-20140704.ebuild
+++ b/sci-biology/transdecoder/transdecoder-20140704.ebuild
@@ -15,8 +15,7 @@ SLOT="0"
KEYWORDS="~amd64"
IUSE=""
-DEPEND="sys-cluster/openmpi
- sci-biology/hmmer
+DEPEND=">=sci-biology/hmmer-3.0
sci-biology/cd-hit
sci-biology/parafly
sci-biology/ffindex"
@@ -27,6 +26,8 @@ S="${WORKDIR}"/TransDecoder_r20140704
src_prepare(){
rm -rf 3rd_party
mv Makefile Makefile.old
+ epatch "${FILESDIR}"/TransDecoder.patch
+ epatch "${FILESDIR}"/pfam_runner.pl.patch
}
# avoid fetching 1.5TB "${S}"/pfam/Pfam-AB.hmm.bin, see
@@ -41,4 +42,8 @@ src_install(){
dodir ${vendor_lib_install_dir}
insinto ${vendor_lib_install_dir}
doins PerlLib/*.pm
+
+ einfo "Fetch on your own:"
+ einfo "wget --mirror -nH -nd http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin"
+ einfo "hmmpress Pfam-AB.hmm.bin"
}
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [gentoo-commits] proj/sci:master commit in: sci-biology/TransDecoder/, sci-biology/TransDecoder/files/
@ 2016-03-30 14:50 Martin Mokrejs
0 siblings, 0 replies; 2+ messages in thread
From: Martin Mokrejs @ 2016-03-30 14:50 UTC (permalink / raw
To: gentoo-commits
commit: 1a200d0a079c95ff7357a3581d1f43be63af802d
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Wed Mar 30 14:47:35 2016 +0000
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Wed Mar 30 14:47:35 2016 +0000
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=1a200d0a
sci-biology/TransDecoder: version bump
Package-Manager: portage-2.2.28
sci-biology/TransDecoder/TransDecoder-2.1.0.ebuild | 83 ++++++++++++++++++++++
.../files/TransDecoder-2.1.0__fix_paths.patch | 22 ++++++
2 files changed, 105 insertions(+)
diff --git a/sci-biology/TransDecoder/TransDecoder-2.1.0.ebuild b/sci-biology/TransDecoder/TransDecoder-2.1.0.ebuild
new file mode 100644
index 0000000..9fbcea5
--- /dev/null
+++ b/sci-biology/TransDecoder/TransDecoder-2.1.0.ebuild
@@ -0,0 +1,83 @@
+# Copyright 1999-2016 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=5
+
+[ "$PV" == "9999" ] && inherit git-r3
+
+PERL_EXPORT_PHASE_FUNCTIONS=no
+inherit perl-module eutils toolchain-funcs
+
+DESCRIPTION="Extract ORF/CDS regions from FASTA sequences"
+HOMEPAGE="http://transdecoder.github.io"
+if [ "$PV" == "9999" ]; then
+ EGIT_REPO_URI="https://github.com/TransDecoder/TransDecoder.git"
+ KEYWORDS=""
+else
+ SRC_URI="https://github.com/TransDecoder/TransDecoder/archive/v"${PV}".tar.gz -> ${P}.tar.gz"
+ KEYWORDS="~amd64"
+ S="${WORKDIR}"/TransDecoder-"${PV}"
+fi
+
+LICENSE="BSD-BroadInstitute"
+SLOT="0"
+IUSE=""
+
+DEPEND=""
+RDEPEND="${DEPEND}
+ sci-biology/cd-hit
+ sci-biology/parafly
+ sci-biology/ffindex"
+
+src_prepare(){
+ rm -rf transdecoder_plugins/cd-hit
+ for f in PerlLib/*.pm; do
+ p=`basename $f .pm`;
+ sed -e "s#use $p;#use TransDecoder::$p;#" -i PerlLib/*.pm util/*.pl TransDecoder.LongOrfs TransDecoder.Predict || die;
+ done
+ epatch "${FILESDIR}"/"${P}"__fix_paths.patch
+ epatch "${FILESDIR}"/pfam_runner.pl.patch
+}
+
+src_compile(){
+ einfo "Skipping compilation of bundled cd-hit code, nothing else to do"
+}
+
+# avoid fetching 1.5TB "${S}"/pfam/Pfam-AB.hmm.bin, see
+# "Re: [Transdecoder-users] Announcement: Transdecoder release r20140704"
+# thread in archives. You can get it from
+# http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin
+
+src_install(){
+ dobin TransDecoder.Predict TransDecoder.LongOrfs
+ insinto /usr/share/${PN}/util
+ doins util/*.pl
+ chmod -R a+rx "${D}"/usr/share/${PN}/util
+ # zap the bundled cdhit binaries copied from transdecoder_plugins/cdhit/ to util/bin
+ rm -rf util/bin
+ #
+ # * sci-biology/trinityrnaseq-20140413:0::science
+ # * /usr/bin/Fasta_reader.pm
+ # * /usr/bin/GFF3_utils.pm
+ # * /usr/bin/Gene_obj.pm
+ # * /usr/bin/Gene_obj_indexer.pm
+ # * /usr/bin/Longest_orf.pm
+ # * /usr/bin/Nuc_translator.pm
+ # * /usr/bin/TiedHash.pm
+ #
+ perl_set_version
+ insinto ${VENDOR_LIB}/${PN}
+ doins PerlLib/*.pm
+ # dodoc Release.Notes
+ einfo "Fetch your own Pfam-A.hmm (Pfam-AB.hmm is discontinued since 05/2015):"
+ einfo "wget --mirror -nH -nd ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz"
+ einfo "hmmpress Pfam-A.hmm.bin"
+}
+
+pkg_postinst(){
+ einfo "It is recommended to use TransDecoder with sci-biology/hmmer-3 or"
+ einfo "at least with NCBI blast from either:"
+ einfo " sci-biology/ncbi-blast+ (released more often) or from"
+ einfo " sci-biology/ncbi-toolkit++ (a huge bundle with releases and less frequent bugfixes)"
+}
diff --git a/sci-biology/TransDecoder/files/TransDecoder-2.1.0__fix_paths.patch b/sci-biology/TransDecoder/files/TransDecoder-2.1.0__fix_paths.patch
new file mode 100644
index 0000000..0a6fca0
--- /dev/null
+++ b/sci-biology/TransDecoder/files/TransDecoder-2.1.0__fix_paths.patch
@@ -0,0 +1,22 @@
+--- TransDecoder-2.0.1/TransDecoder.LongOrfs.ori 2015-11-19 21:05:53.340219051 +0100
++++ TransDecoder-2.0.1/TransDecoder.LongOrfs 2015-11-19 21:20:44.870221380 +0100
+@@ -64,7 +64,7 @@
+ use TransDecoder::Fasta_reader;
+ use TransDecoder::Longest_orf;
+
+-my $UTIL_DIR = "$FindBin::RealBin/util";
++my $UTIL_DIR = "/usr/share/TransDecoder/util/";
+ $ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
+
+
+--- TransDecoder-2.0.1/TransDecoder.Predict.ori 2015-11-19 21:06:04.280219080 +0100
++++ TransDecoder-2.0.1/TransDecoder.Predict 2015-11-19 21:21:22.560221479 +0100
+@@ -52,7 +52,7 @@
+ use TransDecoder::Fasta_reader;
+ use TransDecoder::Longest_orf;
+
+-my $UTIL_DIR = "$FindBin::RealBin/util";
++my $UTIL_DIR = "/usr/share/TransDecoder/util/";
+ $ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
+
+
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-03-30 14:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-09 15:35 [gentoo-commits] proj/sci:master commit in: sci-biology/transdecoder/, sci-biology/transdecoder/files/ Martin Mokrejs
-- strict thread matches above, loose matches on Subject: below --
2016-03-30 14:50 [gentoo-commits] proj/sci:master commit in: sci-biology/TransDecoder/, sci-biology/TransDecoder/files/ Martin Mokrejs
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox