* [gentoo-commits] proj/sci:master commit in: sci-biology/cd-hit/files/, sci-biology/cd-hit/
@ 2014-06-01 18:20 Martin Mokrejs
0 siblings, 0 replies; 3+ messages in thread
From: Martin Mokrejs @ 2014-06-01 18:20 UTC (permalink / raw
To: gentoo-commits
commit: 4004d49530eb309e24220d3998ed32edc592f381
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Sun Jun 1 18:19:23 2014 +0000
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Sun Jun 1 18:19:23 2014 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=4004d495
sci-biology/cd-hit-4.6.1: version bump
Package-Manager: portage-2.2.7
---
sci-biology/cd-hit/ChangeLog | 9 +++
sci-biology/cd-hit/cd-hit-4.6.1.ebuild | 44 +++++++++++
sci-biology/cd-hit/files/4.6.1-gentoo.patch | 118 ++++++++++++++++++++++++++++
sci-biology/cd-hit/metadata.xml | 23 ++++++
4 files changed, 194 insertions(+)
diff --git a/sci-biology/cd-hit/ChangeLog b/sci-biology/cd-hit/ChangeLog
new file mode 100644
index 0000000..2ef3e8f
--- /dev/null
+++ b/sci-biology/cd-hit/ChangeLog
@@ -0,0 +1,9 @@
+# ChangeLog for sci-biology/cd-hit
+# Copyright 1999-2014 Gentoo Foundation; Distributed under the GPL v2
+# $Header: $
+
+*cd-hit-4.6.1 (01 Jun 2014)
+
+ 01 Jun 2014; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
+ +cd-hit-4.6.1.ebuild, +files/4.6.1-gentoo.patch, +metadata.xml:
+ sci-biology/cd-hit-4.6.1: version bump
diff --git a/sci-biology/cd-hit/cd-hit-4.6.1.ebuild b/sci-biology/cd-hit/cd-hit-4.6.1.ebuild
new file mode 100644
index 0000000..1c97d86
--- /dev/null
+++ b/sci-biology/cd-hit/cd-hit-4.6.1.ebuild
@@ -0,0 +1,44 @@
+# Copyright 1999-2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: /var/cvsroot/gentoo-x86/sci-biology/cd-hit/cd-hit-4.6.ebuild,v 1.1 2012/06/20 18:39:24 jlec Exp $
+
+EAPI=4
+
+inherit eutils flag-o-matic toolchain-funcs
+
+RELDATE="2012-08-27"
+RELEASE="${PN}-v${PV}-${RELDATE}"
+
+DESCRIPTION="Clustering Database at High Identity with Tolerance"
+HOMEPAGE="http://weizhong-lab.ucsd.edu/cd-hit/"
+SRC_URI="http://cdhit.googlecode.com/files/${RELEASE}.tgz"
+
+SLOT="0"
+KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux"
+LICENSE="GPL-2"
+IUSE="doc openmp"
+
+S="${WORKDIR}"/${RELEASE}
+
+pkg_setup() {
+ use openmp && ! tc-has-openmp && die "Please switch to an openmp compatible compiler"
+}
+
+src_prepare() {
+ tc-export CXX
+ use openmp || append-flags -DNO_OPENMP
+ #epatch "${FILESDIR}"/${PV}-gentoo.patch
+}
+
+src_compile() {
+ local myconf=
+ use openmp && myconf="openmp=yes"
+ emake ${myconf}
+}
+
+src_install() {
+ dodir /usr/bin
+ emake PREFIX="${ED}/usr/bin" install
+ dodoc ChangeLog
+ use doc && dodoc doc/*
+}
diff --git a/sci-biology/cd-hit/files/4.6.1-gentoo.patch b/sci-biology/cd-hit/files/4.6.1-gentoo.patch
new file mode 100644
index 0000000..d9db28c
--- /dev/null
+++ b/sci-biology/cd-hit/files/4.6.1-gentoo.patch
@@ -0,0 +1,118 @@
+--- Makefile.ori 2014-06-01 20:12:44.000000000 +0200
++++ Makefile 2014-06-01 20:16:34.000000000 +0200
+@@ -1,16 +1,13 @@
+-
+-CC = g++ -Wall -ggdb
+-CC = g++ -pg
+-CC = g++
++CXX ?= g++
+
+ # without OpenMP
+-CCFLAGS = -DNO_OPENMP
++#CXXFLAGS = -DNO_OPENMP
+
+ # with OpenMP
+ # in command line:
+ # make openmp=yes
+ ifeq ($(openmp),yes)
+-CCFLAGS = -fopenmp
++CXXFLAGS += -fopenmp
+ endif
+
+ # support debugging
+@@ -18,17 +15,17 @@
+ # make debug=yes
+ # make openmp=yes debug=yes
+ ifeq ($(debug),yes)
+-CCFLAGS += -ggdb
++CXXFLAGS +=
+ else
+-CCFLAGS += -O2
++CXXFLAGS +=
+ endif
+
+ ifdef MAX_SEQ
+-CCFLAGS += -DMAX_SEQ=$(MAX_SEQ)
++CXXFLAGS += -DMAX_SEQ=$(MAX_SEQ)
+ endif
+
+ #LDFLAGS = -static -o
+-LDFLAGS += -o
++#LDFLAGS += -o
+
+ PROGS = cd-hit cd-hit-est cd-hit-2d cd-hit-est-2d cd-hit-div cd-hit-454
+
+@@ -36,7 +33,7 @@
+ CCFLAGS := $(CPPFLAGS) $(CCFLAGS) $(CXXFLAGS)
+
+ .c++.o:
+- $(CC) $(CCFLAGS) -c $<
++ $(CXX) $(CXXFLAGS) -c $<
+
+ all: $(PROGS)
+
+@@ -46,49 +43,49 @@
+ # programs
+
+ cd-hit: cdhit-common.o cdhit-utility.o cdhit.o
+- $(CC) $(CCFLAGS) cdhit.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit
++ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit.o cdhit-common.o cdhit-utility.o -o cd-hit
+
+ cd-hit-2d: cdhit-common.o cdhit-utility.o cdhit-2d.o
+- $(CC) $(CCFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-2d
++ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-2d
+
+ cd-hit-est: cdhit-common.o cdhit-utility.o cdhit-est.o
+- $(CC) $(CCFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est
++ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o -o cd-hit-est
+
+ cd-hit-est-2d: cdhit-common.o cdhit-utility.o cdhit-est-2d.o
+- $(CC) $(CCFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est-2d
++ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-est-2d
+
+ cd-hit-div: cdhit-common.o cdhit-utility.o cdhit-div.o
+- $(CC) $(CCFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-div
++ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o -o cd-hit-div
+
+ cd-hit-454: cdhit-common.o cdhit-utility.o cdhit-454.o
+- $(CC) $(CCFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-454
++ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o -o cd-hit-454
+
+ # objects
+ cdhit-common.o: cdhit-common.c++ cdhit-common.h
+- $(CC) $(CCFLAGS) cdhit-common.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit-common.c++ -c
+
+ cdhit-utility.o: cdhit-utility.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-utility.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit-utility.c++ -c
+
+ cdhit.o: cdhit.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit.c++ -c
+
+ cdhit-2d.o: cdhit-2d.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-2d.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit-2d.c++ -c
+
+ cdhit-est.o: cdhit-est.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-est.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit-est.c++ -c
+
+ cdhit-est-2d.o: cdhit-est-2d.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-est-2d.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit-est-2d.c++ -c
+
+ cdhit-div.o: cdhit-div.c++ cdhit-common.h
+- $(CC) $(CCFLAGS) cdhit-div.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit-div.c++ -c
+
+ cdhit-454.o: cdhit-454.c++ cdhit-common.h
+- $(CC) $(CCFLAGS) cdhit-454.c++ -c
++ $(CXX) $(CXXFLAGS) cdhit-454.c++ -c
+
+-PREFIX ?= /usr/local/bin
++PREFIX ?= $(DESTDIR)/usr/bin
+
+ install:
+ for prog in $(PROGS); do \
diff --git a/sci-biology/cd-hit/metadata.xml b/sci-biology/cd-hit/metadata.xml
new file mode 100644
index 0000000..bd5607a
--- /dev/null
+++ b/sci-biology/cd-hit/metadata.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+ <herd>sci-biology</herd>
+ <longdescription>
+CD-HIT is a very widely used program for clustering and comparing large sets
+of protein or nucleotide sequences. CD-HIT is very fast and can handle
+extremely large databases. CD-HIT helps to significantly reduce the
+computational and manual efforts in many sequence analysis tasks and aids in
+understanding the data structure and correct the bias within a dataset.
+The CD-HIT package has CD-HIT, CD-HIT-2D, CD-HIT-EST, CD-HIT-EST-2D,
+CD-HIT-454, CD-HIT-PARA, PSI-CD-HIT and over a dozen scripts. CD-HIT
+(CD-HIT-EST) clusters similar proteins (DNAs) into clusters that meet a
+user-defined similarity threshold. CD-HIT-2D (CD-HIT-EST-2D) compares 2
+datasets and identifies the sequences in db2 that are similar to db1 above
+a threshold. CD-HIT-454 is a program to identify natural and artificial
+duplicates from pyrosequencing reads. The usage of other programs and
+scripts can be found in CD-HIT user's guide.
+</longdescription>
+ <upstream>
+ <remote-id type="google-code">cdhit</remote-id>
+ </upstream>
+</pkgmetadata>
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [gentoo-commits] proj/sci:master commit in: sci-biology/cd-hit/files/, sci-biology/cd-hit/
@ 2016-03-15 17:16 Martin Mokrejs
0 siblings, 0 replies; 3+ messages in thread
From: Martin Mokrejs @ 2016-03-15 17:16 UTC (permalink / raw
To: gentoo-commits
commit: 98458165e817abd0ecc0fac1b3e2db74dc4dc570
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Tue Mar 15 17:12:53 2016 +0000
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Tue Mar 15 17:12:53 2016 +0000
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=98458165
sci-biology/cd-hit: version bump; new SRC_URI; merge with sci-biology/cd-hit-auxtools
Package-Manager: portage-2.2.26
sci-biology/cd-hit/cd-hit-4.6.1.ebuild | 44 ----------
sci-biology/cd-hit/cd-hit-4.6.5.ebuild | 61 ++++++++++++++
.../{4.6.1-gentoo.patch => 4.6.5-gentoo.patch} | 95 ++++++++++++++++------
3 files changed, 131 insertions(+), 69 deletions(-)
diff --git a/sci-biology/cd-hit/cd-hit-4.6.1.ebuild b/sci-biology/cd-hit/cd-hit-4.6.1.ebuild
deleted file mode 100644
index aa51801..0000000
--- a/sci-biology/cd-hit/cd-hit-4.6.1.ebuild
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright 1999-2015 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Id$
-
-EAPI=5
-
-inherit eutils flag-o-matic toolchain-funcs
-
-RELDATE="2012-08-27"
-RELEASE="${PN}-v${PV}-${RELDATE}"
-
-DESCRIPTION="Clustering Database at High Identity with Tolerance"
-HOMEPAGE="http://weizhong-lab.ucsd.edu/cd-hit/"
-SRC_URI="http://cdhit.googlecode.com/files/${RELEASE}.tgz"
-
-SLOT="0"
-KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux"
-LICENSE="GPL-2"
-IUSE="doc openmp"
-
-S="${WORKDIR}"/${RELEASE}
-
-pkg_setup() {
- use openmp && ! tc-has-openmp && die "Please switch to an openmp compatible compiler"
-}
-
-src_prepare() {
- tc-export CXX
- use openmp || append-flags -DNO_OPENMP
- epatch "${FILESDIR}"/${PV}-gentoo.patch
-}
-
-src_compile() {
- local myconf=
- use openmp && myconf="openmp=yes"
- emake ${myconf}
-}
-
-src_install() {
- dodir /usr/bin
- emake PREFIX="${ED}/usr/bin" install
- dodoc ChangeLog
- use doc && dodoc doc/*
-}
diff --git a/sci-biology/cd-hit/cd-hit-4.6.5.ebuild b/sci-biology/cd-hit/cd-hit-4.6.5.ebuild
new file mode 100644
index 0000000..c90dee0
--- /dev/null
+++ b/sci-biology/cd-hit/cd-hit-4.6.5.ebuild
@@ -0,0 +1,61 @@
+# Copyright 1999-2015 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=5
+
+inherit eutils flag-o-matic toolchain-funcs
+
+RELDATE="2016-03-04"
+RELEASE="${PN}-v${PV}-${RELDATE}"
+
+DESCRIPTION="Clustering Database at High Identity with Tolerance"
+HOMEPAGE="http://weizhong-lab.ucsd.edu/cd-hit
+ http://weizhongli-lab.org/cd-hit"
+SRC_URI="https://github.com/weizhongli/cdhit/archive/V${PV}.tar.gz -> ${P}.tar.gz
+ http://weizhong-lab.ucsd.edu/cd-hit/wiki/doku.php?id=cd-hit-auxtools-manual -> cd-hit-auxtools-manual.html"
+
+SLOT="0"
+KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux"
+LICENSE="GPL-2"
+IUSE="doc openmp"
+
+DEPEND="!sci-biology/cd-hit-auxtools"
+
+S="${WORKDIR}"/cdhit-"${PV}"
+
+pkg_setup() {
+ use openmp && ! tc-has-openmp && die "Please switch to an openmp compatible compiler"
+}
+
+src_prepare() {
+ tc-export CXX
+ use openmp || append-flags -DNO_OPENMP
+ epatch "${FILESDIR}"/${PV}-gentoo.patch
+}
+
+src_compile() {
+ local myconf=
+ use openmp && myconf="openmp=yes"
+ emake ${myconf}
+ cd cd-hit-auxtools || die
+ emake ${myconf}
+}
+
+src_install() {
+ dodir /usr/bin
+ emake PREFIX="${ED}/usr/bin" install
+ dobin psi-cd-hit/*.pl cd-hit-auxtools/*.pl cd-hit-auxtools/{cd-hit-lap,read-linker,cd-hit-dup}
+ dodoc ChangeLog psi-cd-hit/README.psi-cd-hit
+ use doc && dodoc doc/* psi-cd-hit/qsub-template "${DISTDIR}"/cd-hit-auxtools-manual.html
+}
+
+pkg_postinst(){
+ einfo "From the original http://weizhong-lab.ucsd.edu/software/cdhit-454 package"
+ einfo "we still lack cdhit-cluster-consensus part. You may want to install yourself"
+ einfo "http://weizhong-lab.ucsd.edu/softwares/cd-hit-454/cdhit-cluster-consensus-2013-03-27.tgz"
+ einfo ""
+ einfo "The cd-hit-auxtools are no longer a separate package and belong to cd-hit since"
+ einfo "version 4.6.5. However, there is no manual for that in current cd-hit tree. Therefore"
+ einfo "see http://weizhong-lab.ucsd.edu/cd-hit/wiki/doku.php?id=cd-hit-auxtools-manual"
+}
diff --git a/sci-biology/cd-hit/files/4.6.1-gentoo.patch b/sci-biology/cd-hit/files/4.6.5-gentoo.patch
similarity index 53%
rename from sci-biology/cd-hit/files/4.6.1-gentoo.patch
rename to sci-biology/cd-hit/files/4.6.5-gentoo.patch
index d9db28c..5e989da 100644
--- a/sci-biology/cd-hit/files/4.6.1-gentoo.patch
+++ b/sci-biology/cd-hit/files/4.6.5-gentoo.patch
@@ -1,6 +1,6 @@
---- Makefile.ori 2014-06-01 20:12:44.000000000 +0200
-+++ Makefile 2014-06-01 20:16:34.000000000 +0200
-@@ -1,16 +1,13 @@
+--- Makefile.ori 2016-02-12 01:58:32.000000000 +0100
++++ Makefile 2016-03-15 16:55:53.726069350 +0100
+@@ -1,7 +1,4 @@
-
-CC = g++ -Wall -ggdb
-CC = g++ -pg
@@ -8,27 +8,28 @@
+CXX ?= g++
# without OpenMP
--CCFLAGS = -DNO_OPENMP
-+#CXXFLAGS = -DNO_OPENMP
- # with OpenMP
+@@ -9,9 +6,9 @@
# in command line:
# make openmp=yes
- ifeq ($(openmp),yes)
--CCFLAGS = -fopenmp
-+CXXFLAGS += -fopenmp
+ ifeq ($(openmp),no)
+- CCFLAGS = -DNO_OPENMP
++ CXXFLAGS += -DNO_OPENMP
+ else
+- CCFLAGS = -fopenmp
++ CXXFLAGS += -fopenmp
endif
# support debugging
-@@ -18,17 +15,17 @@
+@@ -19,13 +16,13 @@
# make debug=yes
# make openmp=yes debug=yes
ifeq ($(debug),yes)
-CCFLAGS += -ggdb
-+CXXFLAGS +=
++CXXFLAGS += -ggdb
else
-CCFLAGS += -O2
-+CXXFLAGS +=
++CXXFLAGS ?= -O2
endif
ifdef MAX_SEQ
@@ -37,13 +38,12 @@
endif
#LDFLAGS = -static -o
--LDFLAGS += -o
-+#LDFLAGS += -o
-
+@@ -34,10 +31,10 @@
PROGS = cd-hit cd-hit-est cd-hit-2d cd-hit-est-2d cd-hit-div cd-hit-454
-@@ -36,7 +33,7 @@
- CCFLAGS := $(CPPFLAGS) $(CCFLAGS) $(CXXFLAGS)
+ # Propagate hardening flags
+-CCFLAGS := $(CPPFLAGS) $(CCFLAGS) $(CXXFLAGS)
++CXXFLAGS := $(CPPFLAGS) $(CXXFLAGS)
.c++.o:
- $(CC) $(CCFLAGS) -c $<
@@ -51,32 +51,32 @@
all: $(PROGS)
-@@ -46,49 +43,49 @@
+@@ -47,49 +44,49 @@
# programs
cd-hit: cdhit-common.o cdhit-utility.o cdhit.o
- $(CC) $(CCFLAGS) cdhit.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit
-+ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit.o cdhit-common.o cdhit-utility.o -o cd-hit
++ $(CXX) $(CXXFLAGS) cdhit.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit
cd-hit-2d: cdhit-common.o cdhit-utility.o cdhit-2d.o
- $(CC) $(CCFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-2d
-+ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-2d
++ $(CXX) $(CXXFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-2d
cd-hit-est: cdhit-common.o cdhit-utility.o cdhit-est.o
- $(CC) $(CCFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est
-+ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o -o cd-hit-est
++ $(CXX) $(CXXFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est
cd-hit-est-2d: cdhit-common.o cdhit-utility.o cdhit-est-2d.o
- $(CC) $(CCFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est-2d
-+ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-est-2d
++ $(CXX) $(CXXFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est-2d
cd-hit-div: cdhit-common.o cdhit-utility.o cdhit-div.o
- $(CC) $(CCFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-div
-+ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o -o cd-hit-div
++ $(CXX) $(CXXFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-div
cd-hit-454: cdhit-common.o cdhit-utility.o cdhit-454.o
- $(CC) $(CCFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-454
-+ $(CXX) $(CXXFLAGS) $(LDFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o -o cd-hit-454
++ $(CXX) $(CXXFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-454
# objects
cdhit-common.o: cdhit-common.c++ cdhit-common.h
@@ -112,7 +112,52 @@
+ $(CXX) $(CXXFLAGS) cdhit-454.c++ -c
-PREFIX ?= /usr/local/bin
-+PREFIX ?= $(DESTDIR)/usr/bin
++PREFIX ?= $(DESTDIR)/usr/local/bin
install:
for prog in $(PROGS); do \
+--- cd-hit-auxtools/Makefile.ori 2016-03-15 17:02:39.986070411 +0100
++++ cd-hit-auxtools/Makefile 2016-03-15 17:05:00.556070779 +0100
+@@ -1,9 +1,8 @@
++CXX ?= g++
+
+-CC = g++
+-
+-CFLAGS = -Wall -Wno-unused -I. -Imintlib
++CXXFLAGS ?= -Wall -Wno-unused
+ LFLAGS = -fPIC
+-
++CPPFLAGS = -I. -Imintlib
+
+ UNAME = $(shell uname)
+
+@@ -18,7 +17,7 @@
+ ifeq ($(debug),yes)
+ CXXFLAGS += -ggdb
+ else
+-CFLAGS += -O2
++CXXFLAGS ?= -O2
+ endif
+
+
+@@ -32,16 +31,16 @@
+ .SUFFIXES: .c .obj .cpp .cc .cxx .C
+
+ .cxx.o:
+- $(CC) -c $(CFLAGS) -o $@ $<
++ $(CXX) -c $(CXXFLAGS) $(CPPFLAGS) -o $@ $<
+
+ cd-hit-dup: $(OBJECTS) cdhit-dup.o
+- $(CC) $(LFLAGS) $(OBJECTS) cdhit-dup.o -o cd-hit-dup
++ $(CXX) $(CXXFLAGS) $(LFLAGS) $(OBJECTS) cdhit-dup.o -o cd-hit-dup
+
+ cd-hit-lap: $(OBJECTS) cdhit-lap.o
+- $(CC) $(LFLAGS) $(OBJECTS) cdhit-lap.o -o cd-hit-lap
++ $(CXX) $(CXXFLAGS) $(LFLAGS) $(OBJECTS) cdhit-lap.o -o cd-hit-lap
+
+ read-linker: $(OBJECTS) read-linker.o
+- $(CC) $(LFLAGS) $(OBJECTS) read-linker.o -o read-linker
++ $(CXX) $(CXXFLAGS) $(LFLAGS) $(OBJECTS) read-linker.o -o read-linker
+
+ clean:
+ rm $(OBJECTS) cdhit-dup.o cdhit-lap.o read-linker.o
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [gentoo-commits] proj/sci:master commit in: sci-biology/cd-hit/files/, sci-biology/cd-hit/
@ 2020-09-22 11:12 Aisha Tammy
0 siblings, 0 replies; 3+ messages in thread
From: Aisha Tammy @ 2020-09-22 11:12 UTC (permalink / raw
To: gentoo-commits
commit: 2d09355c7f0d5d1b1e49022b0b85285b85a347f9
Author: Aisha Tammy <gentoo <AT> aisha <DOT> cc>
AuthorDate: Tue Sep 22 11:05:47 2020 +0000
Commit: Aisha Tammy <gentoo <AT> aisha <DOT> cc>
CommitDate: Tue Sep 22 11:05:47 2020 +0000
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=2d09355c
sci-biology/cd-hit: drop package
present in ::gentoo
Package-Manager: Portage-3.0.7, Repoman-3.0.1
Signed-off-by: Aisha Tammy <gentoo <AT> aisha.cc>
sci-biology/cd-hit/cd-hit-4.6.5.ebuild | 60 ------
sci-biology/cd-hit/files/4.6.5-gentoo.patch | 163 ----------------
.../cd-hit/files/cd-hit-auxtools-manual.txt | 212 ---------------------
sci-biology/cd-hit/metadata.xml | 26 ---
4 files changed, 461 deletions(-)
diff --git a/sci-biology/cd-hit/cd-hit-4.6.5.ebuild b/sci-biology/cd-hit/cd-hit-4.6.5.ebuild
deleted file mode 100644
index c7ba45f91..000000000
--- a/sci-biology/cd-hit/cd-hit-4.6.5.ebuild
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright 1999-2015 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=5
-
-inherit eutils flag-o-matic toolchain-funcs
-
-RELDATE="2016-03-04"
-RELEASE="${PN}-v${PV}-${RELDATE}"
-
-DESCRIPTION="Clustering Database at High Identity with Tolerance"
-HOMEPAGE="http://weizhong-lab.ucsd.edu/cd-hit
- http://weizhongli-lab.org/cd-hit"
-SRC_URI="https://github.com/weizhongli/cdhit/archive/V${PV}.tar.gz -> ${P}.tar.gz"
-
-SLOT="0"
-KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux"
-LICENSE="GPL-2"
-IUSE="doc openmp"
-
-DEPEND="!sci-biology/cd-hit-auxtools"
-
-S="${WORKDIR}"/cdhit-"${PV}"
-
-pkg_setup() {
- use openmp && ! tc-has-openmp && die "Please switch to an openmp compatible compiler"
-}
-
-src_prepare() {
- tc-export CXX
- use openmp || append-flags -DNO_OPENMP
- epatch "${FILESDIR}"/${PV}-gentoo.patch
-}
-
-src_compile() {
- local myconf=
- use openmp && myconf="openmp=yes"
- emake ${myconf}
- cd cd-hit-auxtools || die
- emake ${myconf}
-}
-
-src_install() {
- dodir /usr/bin
- emake PREFIX="${ED}/usr/bin" install
- dobin psi-cd-hit/*.pl cd-hit-auxtools/*.pl cd-hit-auxtools/{cd-hit-lap,read-linker,cd-hit-dup}
- dodoc ChangeLog psi-cd-hit/README.psi-cd-hit
- use doc && dodoc doc/* psi-cd-hit/qsub-template "${FILESDIR}"/cd-hit-auxtools-manual.txt
-}
-
-pkg_postinst(){
- einfo "From the original http://weizhong-lab.ucsd.edu/software/cdhit-454 package"
- einfo "we still lack cdhit-cluster-consensus part. You may want to install yourself"
- einfo "http://weizhong-lab.ucsd.edu/softwares/cd-hit-454/cdhit-cluster-consensus-2013-03-27.tgz"
- einfo ""
- einfo "The cd-hit-auxtools are no longer a separate package and belong to cd-hit since"
- einfo "version 4.6.5. However, there is no manual for that in current cd-hit tree. Therefore"
- einfo "see http://weizhong-lab.ucsd.edu/cd-hit/wiki/doku.php?id=cd-hit-auxtools-manual"
- einfo "A local copy is is in /usr/share/doc/${PN}/cd-hit-auxtools-manual.txt"
-}
diff --git a/sci-biology/cd-hit/files/4.6.5-gentoo.patch b/sci-biology/cd-hit/files/4.6.5-gentoo.patch
deleted file mode 100644
index 5e989da40..000000000
--- a/sci-biology/cd-hit/files/4.6.5-gentoo.patch
+++ /dev/null
@@ -1,163 +0,0 @@
---- Makefile.ori 2016-02-12 01:58:32.000000000 +0100
-+++ Makefile 2016-03-15 16:55:53.726069350 +0100
-@@ -1,7 +1,4 @@
--
--CC = g++ -Wall -ggdb
--CC = g++ -pg
--CC = g++
-+CXX ?= g++
-
- # without OpenMP
-
-@@ -9,9 +6,9 @@
- # in command line:
- # make openmp=yes
- ifeq ($(openmp),no)
-- CCFLAGS = -DNO_OPENMP
-+ CXXFLAGS += -DNO_OPENMP
- else
-- CCFLAGS = -fopenmp
-+ CXXFLAGS += -fopenmp
- endif
-
- # support debugging
-@@ -19,13 +16,13 @@
- # make debug=yes
- # make openmp=yes debug=yes
- ifeq ($(debug),yes)
--CCFLAGS += -ggdb
-+CXXFLAGS += -ggdb
- else
--CCFLAGS += -O2
-+CXXFLAGS ?= -O2
- endif
-
- ifdef MAX_SEQ
--CCFLAGS += -DMAX_SEQ=$(MAX_SEQ)
-+CXXFLAGS += -DMAX_SEQ=$(MAX_SEQ)
- endif
-
- #LDFLAGS = -static -o
-@@ -34,10 +31,10 @@
- PROGS = cd-hit cd-hit-est cd-hit-2d cd-hit-est-2d cd-hit-div cd-hit-454
-
- # Propagate hardening flags
--CCFLAGS := $(CPPFLAGS) $(CCFLAGS) $(CXXFLAGS)
-+CXXFLAGS := $(CPPFLAGS) $(CXXFLAGS)
-
- .c++.o:
-- $(CC) $(CCFLAGS) -c $<
-+ $(CXX) $(CXXFLAGS) -c $<
-
- all: $(PROGS)
-
-@@ -47,49 +44,49 @@
- # programs
-
- cd-hit: cdhit-common.o cdhit-utility.o cdhit.o
-- $(CC) $(CCFLAGS) cdhit.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit
-+ $(CXX) $(CXXFLAGS) cdhit.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit
-
- cd-hit-2d: cdhit-common.o cdhit-utility.o cdhit-2d.o
-- $(CC) $(CCFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-2d
-+ $(CXX) $(CXXFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-2d
-
- cd-hit-est: cdhit-common.o cdhit-utility.o cdhit-est.o
-- $(CC) $(CCFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est
-+ $(CXX) $(CXXFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est
-
- cd-hit-est-2d: cdhit-common.o cdhit-utility.o cdhit-est-2d.o
-- $(CC) $(CCFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est-2d
-+ $(CXX) $(CXXFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est-2d
-
- cd-hit-div: cdhit-common.o cdhit-utility.o cdhit-div.o
-- $(CC) $(CCFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-div
-+ $(CXX) $(CXXFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-div
-
- cd-hit-454: cdhit-common.o cdhit-utility.o cdhit-454.o
-- $(CC) $(CCFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-454
-+ $(CXX) $(CXXFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-454
-
- # objects
- cdhit-common.o: cdhit-common.c++ cdhit-common.h
-- $(CC) $(CCFLAGS) cdhit-common.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit-common.c++ -c
-
- cdhit-utility.o: cdhit-utility.c++ cdhit-utility.h
-- $(CC) $(CCFLAGS) cdhit-utility.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit-utility.c++ -c
-
- cdhit.o: cdhit.c++ cdhit-utility.h
-- $(CC) $(CCFLAGS) cdhit.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit.c++ -c
-
- cdhit-2d.o: cdhit-2d.c++ cdhit-utility.h
-- $(CC) $(CCFLAGS) cdhit-2d.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit-2d.c++ -c
-
- cdhit-est.o: cdhit-est.c++ cdhit-utility.h
-- $(CC) $(CCFLAGS) cdhit-est.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit-est.c++ -c
-
- cdhit-est-2d.o: cdhit-est-2d.c++ cdhit-utility.h
-- $(CC) $(CCFLAGS) cdhit-est-2d.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit-est-2d.c++ -c
-
- cdhit-div.o: cdhit-div.c++ cdhit-common.h
-- $(CC) $(CCFLAGS) cdhit-div.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit-div.c++ -c
-
- cdhit-454.o: cdhit-454.c++ cdhit-common.h
-- $(CC) $(CCFLAGS) cdhit-454.c++ -c
-+ $(CXX) $(CXXFLAGS) cdhit-454.c++ -c
-
--PREFIX ?= /usr/local/bin
-+PREFIX ?= $(DESTDIR)/usr/local/bin
-
- install:
- for prog in $(PROGS); do \
---- cd-hit-auxtools/Makefile.ori 2016-03-15 17:02:39.986070411 +0100
-+++ cd-hit-auxtools/Makefile 2016-03-15 17:05:00.556070779 +0100
-@@ -1,9 +1,8 @@
-+CXX ?= g++
-
--CC = g++
--
--CFLAGS = -Wall -Wno-unused -I. -Imintlib
-+CXXFLAGS ?= -Wall -Wno-unused
- LFLAGS = -fPIC
--
-+CPPFLAGS = -I. -Imintlib
-
- UNAME = $(shell uname)
-
-@@ -18,7 +17,7 @@
- ifeq ($(debug),yes)
- CXXFLAGS += -ggdb
- else
--CFLAGS += -O2
-+CXXFLAGS ?= -O2
- endif
-
-
-@@ -32,16 +31,16 @@
- .SUFFIXES: .c .obj .cpp .cc .cxx .C
-
- .cxx.o:
-- $(CC) -c $(CFLAGS) -o $@ $<
-+ $(CXX) -c $(CXXFLAGS) $(CPPFLAGS) -o $@ $<
-
- cd-hit-dup: $(OBJECTS) cdhit-dup.o
-- $(CC) $(LFLAGS) $(OBJECTS) cdhit-dup.o -o cd-hit-dup
-+ $(CXX) $(CXXFLAGS) $(LFLAGS) $(OBJECTS) cdhit-dup.o -o cd-hit-dup
-
- cd-hit-lap: $(OBJECTS) cdhit-lap.o
-- $(CC) $(LFLAGS) $(OBJECTS) cdhit-lap.o -o cd-hit-lap
-+ $(CXX) $(CXXFLAGS) $(LFLAGS) $(OBJECTS) cdhit-lap.o -o cd-hit-lap
-
- read-linker: $(OBJECTS) read-linker.o
-- $(CC) $(LFLAGS) $(OBJECTS) read-linker.o -o read-linker
-+ $(CXX) $(CXXFLAGS) $(LFLAGS) $(OBJECTS) read-linker.o -o read-linker
-
- clean:
- rm $(OBJECTS) cdhit-dup.o cdhit-lap.o read-linker.o
diff --git a/sci-biology/cd-hit/files/cd-hit-auxtools-manual.txt b/sci-biology/cd-hit/files/cd-hit-auxtools-manual.txt
deleted file mode 100644
index 6e0ca68a9..000000000
--- a/sci-biology/cd-hit/files/cd-hit-auxtools-manual.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-http://weizhong-lab.ucsd.edu/cd-hit/wiki/doku.php?id=cd-hit-auxtools-manual
-
-CD-HIT AuxTools: Manual
-
-Last updated: 2012/03/08 00:59
-
-http://cd-hit.org
-
-http://bioinformatics.org/cd-hit/
-
-Program developed by Weizhong Li's lab at UCSD http://weizhong-lab.ucsd.edu liwz@sdsc.edu
-Introduction
-
-CD-HIT AuxTools is a set of auxiliary programs that can be used to assist the analysis of the next generation sequencing data. It currently includes programs for removing read duplicates, finding pairs of overlapping reads or joining pair-end reads etc.
-cd-hit-dup
-
-cd-hit-dup is a simple tool for removing duplicates from sequencing reads, with optional step to detect and remove chimeric reads. A number of options are provided to tune how the duplicates are removed. Running the program without arguments should print out the list of available options, as the following:
-
-Options:
- -i Input file;
- -i2 Second input file;
- -o Output file;
- -d Description length (default 0, truncate at the first whitespace character)
- -u Length of prefix to be used in the analysis (default 0, for full/maximum length);
- -m Match length (true/false, default true);
- -e Maximum number/percent of mismatches allowed;
- -f Filter out chimeric clusters (true/false, default false);
- -s Minimum length of common sequence shared between a chimeric read
- and each of its parents (default 30, minimum 20);
- -a Abundance cutoff (default 1 without chimeric filtering, 2 with chimeric filtering);
- -b Abundance ratio between a parent read and a chimeric read (default 1);
- -p Dissimilarity control for chimeric filtering (default 1);
-
-Option details
-Common options
-
-Here are the more detailed description of the options.
-
- -i Input file;
-
-Input file that must be in fasta or fastq format.
-
- -i2 Second input file;
-
-cd-hit-dup can take a pair of files as inputs, assuming they contain sequences of pair-end reads. ”-i” can be used to specify the file for the first end; and ”-i2” can be used to specify the file for the second end.
-
-When two files of pair-end reads are used as inputs, each pair of reads will be concatenated into a single one. And the following steps of duplicate and chimeric detection and removing.
-
- -o Output file;
-
-Output file which contains a list of reads without duplicates.
-
- -d Description length (default 0, truncate at the first whitespace character)
-
-The length of description line that should be written to the output.
-
- -u Length of prefix to be used in the analysis (default 0, for full/maximum length);
-
-For pair-end inputs, the program will take part (whole or prefix) of the first end and part (whole or prefix) of the second read, and join them together to form a single read to do the analysis. A positive value of this option specifies the length of the prefix to be taken from each read. If a read is shorter than this length, letter 'N's will be appended to the read to make up for the length. When this option is not used or is used with a non-positive value, the program will use the length of the longest read as the value of this option.
-
-For single input analysis, only a positive value of this option will be effective. It also allows the program to use only the prefix up to the specified length of each read to do the analysis. In case that a read is shorter than this length, no 'N' is appended to the read since it is not necessary.
-Options for duplicate detection
-
- -m Match length (true/false, default true);
-
-”-m” specifies whether the lengths of two reads should be exactly the same to be considered as duplicates.
-
- -e Maximum number/percent of mismatches allowed;
-
-Maximum number/percent of mismatches can be specified to control the similarity between two reads for duplicate and chimeric detection. For duplicate detection, any two reads with number of mismatches no greater than the specified value are considered to be duplicates. For chimeric detection, this option control how similar a read should be to either of its parents.
-Options for chimeric filtering
-
- -f Filter out chimeric clusters (true/false, default false);
-
-This option specifies whether or not to carry out an additional step to filter out chimeric clusters.
-
- -s Minimum length of common sequence shared between a chimeric read
- and each of its parents (default 30, minimum 20);
-
-A read or cluster representative is considered as a potential chimeric only if it shares at least the number of bases specified by this option with either of its parents. This option is effective only if the option is set to true for filtering chimeric clusters.
-
- -a Abundance cutoff (default 1 without chimeric filtering, 2 with chimeric filtering);
-
-Each read is associated with an abundance number, which is the number of duplicates for the read. cd-hit-dup always assumes the input contains duplicates and perform the duplicate detection step. If no duplicate is found, the input is assumed to have duplicates remove in advance, and then, the program will try to obtain the abundance information from the descriptions of the reads, it interprets the number following “_abundance_” as the abundance number.
-
-The abundance cutoff is mainly used for chimeric filtering to skip chimeric checking on reads with abundance below this cutoff.
-
- -b Abundance ratio between a parent read and a chimeric read (default 1);
-
-This option specifies the abundance ratio between a parent read and a chimeric read. So for a read to be chimeric, either of its parents must have abundance at least as high as the ratio times the abundance of the chimeric read.
-
- -p Dissimilarity control for chimeric filtering (default 1);
-
-Internally dissimilarity is measured by percent of mismatches with ungapped alignments. By default the percentage cutoff is set to 0.01 (one percent). This option specifies a multiplier to this percentage cutoff. A higher value will increase the dissimilarity thresholds in chimeric filtering.
-Output files
-
-cd-hit-dup will output three files. Two of them are the same as the output files of CD-HIT: one (named exactly the same as the file name specified by the ”-o” option) is the cluster (or duplicate) representatives, the other is the clustering file (xxx.clstr) relating each duplicate to its representative. The third file (xxx2.clstr) contains the chimeric clusters. In this file, the description for each chimeric cluster contains cluster ids of its parent clusters from the clustering file xxx.clstr.
-Examples
-Duplicate Detection
-
-Remove duplicates using default parameters:
-
-cd-hit-dup -i input.fa -o output
-
-By default, only reads that are identical are considered as duplicates. If ”-m” is set to false, duplicates will be allowed to have different length, but the longer ones must have a prefix that is identical to the shorter ones.
-
-Remove duplicates with a few mismatches:
-
-cd-hit-dup -i input.fa -o output -e 2
-cd-hit-dup -i input.fa -o output -e 0.01
-
-The former will allow each duplicate read to have up to 2 mismatches when aligned to its representative; and the later will allow up to one percent mismatches.
-
-Remove duplicates from pair-end reads:
-
-cd-hit-dup -i pair-end1.fa -i2 pair-end2.fa -o output
-
-Each read from “pair-end1.fa” and “pair-end2.fa” will be joint to form a single read to detect duplicates. If they all are of the same length, the full length of each ends will be used in forming the single read; otherwise, the default value of option ”-u” will be used to determine how the single read is created.
-
-Remove duplicates from pair-end reads with control on how the pair-ends are jointed:
-
-cd-hit-dup -i pair-end1.fa -i2 pair-end2.fa -o output -u 100
-
-With explicit ”-u” options, any reads shorter than 100 will be padded with 'N's, and the longer ones will be cut down to 100 base long. Then each pair of the 100 base long reads will be jointed to form a single 200 base long read.
-Chimeric Filtering
-
-cd-hit-dup offers a very efficient way to detect chimeric reads. The basic idea is to find two parent reads whose cross-over is sufficient similar to the chimeric read, while each single parent is sufficiently dissimilar to it.
-
-Such dissimilarity is measured by the percent of mismatches for no-gapped alignments. For a given percentage “p” (from option ”-p”), a chimeric read must share at least “p” percent mismatches with any other single read, namely, it much be sufficiently dissimilar to any single read.
-
-For more robust detection of chimeric reads, a background percentage “p_bg” is calculated as the mismatch percentage shared between the candidate chimeric read and the single read that is most similar to the candidate. If “p_bg” is greater than “1.5*p”, “1.5*p” will be used as “p_bg” instead.
-
-For a read to be classified as chimeric read, there must exist two reads/parents such that, the leading part of the read is sufficiently similar to one parent, and the rest is sufficiently similar to the other parent, with at most “p+p_bg” percent of mismatches in each part. And the crossover between the two parents must share at most “p_bg” mismatches with the chimeric read.
-
-Chimeric filtering with default parameters:
-
-cd-hit-dup -i input.fa -o output -f true
-
-Chimeric filtering with specified similarity level:
-
-cd-hit-dup -i input.fa -o output -f true -p 1.5
-
-Chimeric filtering with specified abundance difference:
-
-cd-hit-dup -i input.fa -o output -f true -a 2
-
-which means each parent of a chimeric read must be a least as twice abundant as the chimeric read.
-
-Chimeric filtering will produce a cluster file named like “xxx2.clstr”, in which each cluster entry is a chimeric read/cluster. For example,
-
-......
->Cluster 4 chimeric_parent1=2,chimeric_parent2=8
-0 256nt, >FV9NWLF01CRIR3_abundance_23... *
->Cluster 5 chimeric_parent1=2,chimeric_parent2=0
-0 250nt, >FV9NWLF01B4TBX_abundance_21... *
-......
-
-here “Cluster 5” contains a chimeric read “FV9NWLF01B4TBX”, whose parents are identified by cluster numbers “2” and “0” from the associated “xxx.clstr” file,
-
->Cluster 0
-0 252nt, >FV9NWLF01ANLX2_abundance_2239... *
->Cluster 1
-0 246nt, >FV9NWLF01C3KOB_abundance_1465... *
->Cluster 2
-0 260nt, >FV9NWLF01AQOWA_abundance_1284... *
-......
-
-So the parent reads of the chimeric read “FV9NWLF01B4TBX” are “FV9NWLF01AQOWA” and “FV9NWLF01ANLX2”.
-cd-hit-lap
-
-cd-hit-lap is program for extracting pairs of overlapping reads by clustering based on tail-head overlaps (with perfect matching). The basic clustering strategy is the same as that in standard CD-HIT programs. In this program, each read is clustered as either a “representative” or a “redundant” read. For each “redundant” read, it must have a prefix that is identical a suffix of its representative read.
-
-The options of this program can be obtained by running it it without any arguments:
-
-[compute-0-0 cdhit-dup]$ ./cd-hit-lap
-Options:
- -i Input file;
- -o Output file;
- -m Minimum length of overlapping part (default 20);
- -p Minimum percentage of overlapping part (default 0, any percentage);
- -d Description length (default 0, truncate at the first whitespace character)
- -s Random number seed for shuffling (default 0, no shuffling; shuffled before sorting by length);
- -stdout Standard output type (default "log", other options "rep", "clstr");
-
-The two options ”-m” and ”-p” can be used to control the minimum overlap that is required to classify them as overlapping reads. Each pair of overlapping reads must have overlap length no less than the threshold specified by ”-m”, and must also not be less than the length threshold computed from the ”-p” option.
-
-Since the overlapping reads are searched using a greedy strategy, so different sortings of reads may lead to different result. So it is advisable to run the program multiple times with read shuffling by different random number seeds, and then collect and merge the results.
-
-Sometimes it may be more convenient to pipe the results of this program as stdout directly to the stdin of other programs, to do this, the option ”-stdout” can be used to choose which type (“log” for program console information, “rep” for representative reads in FASTA or FASTQ format, “clstr” for the clustering output in CD-HIT format) of results to be writen to the stdout.
-
-The output format of this program is the same as the standard CD-HIT. In the .clstr file, the alignment positions indicate how the reads are overlapped. For example,
-
->Cluster 0
-0 75nt, >1_lane2_624... *
-1 75nt, >1_lane2_7169... at 1:65:11:75/+/100.00%
-2 75nt, >1_lane2_36713... at 69:1:1:69/-/100.00%
-3 75nt, >1_lane2_141482... at 1:56:20:75/+/100.00%
-
-The cluster member #0 in cluster #0 is the representative of the cluster, and it overlaps with each of the other members in the cluster. For cluster member #1, “1:65:11:75/+” tells that the first 65 bases of member #1 overlaps with the last 65 bases of member #0; “69:1:1:69/-” indicates that the last 69 bases of member #2 overlaps with the first 69 bases of member #0.
-read-linker
-
-read-linker is a very simple program to concatenate pair-end reads into single ones. It support the following options:
-
-[compute-0-0 cdhit-dup]$ ./read-linker
-Options:
- -1 file Input file, first end;
- -2 file Input file, second end;
- -o file Output file;
- -l number Minimum overlapping length (default 10);
- -e number Maximum number of errors (mismatches, default 1);
-
-Only the pairs of reads that share at least a minimum overlapping length with mismatched no more than the maximum number of errors, are jointed to form a single read.
diff --git a/sci-biology/cd-hit/metadata.xml b/sci-biology/cd-hit/metadata.xml
deleted file mode 100644
index 4112e9551..000000000
--- a/sci-biology/cd-hit/metadata.xml
+++ /dev/null
@@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
-<pkgmetadata>
- <maintainer type="project">
- <email>sci-biology@gentoo.org</email>
- <name>Gentoo Biology Project</name>
- </maintainer>
- <longdescription>
-CD-HIT is a very widely used program for clustering and comparing large sets
-of protein or nucleotide sequences. CD-HIT is very fast and can handle
-extremely large databases. CD-HIT helps to significantly reduce the
-computational and manual efforts in many sequence analysis tasks and aids in
-understanding the data structure and correct the bias within a dataset.
-The CD-HIT package has CD-HIT, CD-HIT-2D, CD-HIT-EST, CD-HIT-EST-2D,
-CD-HIT-454, CD-HIT-PARA, PSI-CD-HIT and over a dozen scripts. CD-HIT
-(CD-HIT-EST) clusters similar proteins (DNAs) into clusters that meet a
-user-defined similarity threshold. CD-HIT-2D (CD-HIT-EST-2D) compares 2
-datasets and identifies the sequences in db2 that are similar to db1 above
-a threshold. CD-HIT-454 is a program to identify natural and artificial
-duplicates from pyrosequencing reads. The usage of other programs and
-scripts can be found in CD-HIT user's guide.
-</longdescription>
- <upstream>
- <remote-id type="github">weizhongli/cdhit</remote-id>
- </upstream>
-</pkgmetadata>
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-09-22 11:12 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-15 17:16 [gentoo-commits] proj/sci:master commit in: sci-biology/cd-hit/files/, sci-biology/cd-hit/ Martin Mokrejs
-- strict thread matches above, loose matches on Subject: below --
2020-09-22 11:12 Aisha Tammy
2014-06-01 18:20 Martin Mokrejs
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox