680 |
11 Oct 06 |
jari |
1 |
#ifndef _theplu_yat_utility_alignment_ |
680 |
11 Oct 06 |
jari |
2 |
#define _theplu_yat_utility_alignment_ |
183 |
05 Oct 04 |
jari |
3 |
|
616 |
31 Aug 06 |
jari |
// $Id$ |
616 |
31 Aug 06 |
jari |
5 |
|
675 |
10 Oct 06 |
jari |
6 |
/* |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2004 Jari Häkkinen, Peter Johansson |
831 |
27 Mar 07 |
peter |
Copyright (C) 2005 Peter Johansson |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2006 Jari Häkkinen |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
4207 |
26 Aug 22 |
peter |
Copyright (C) 2012, 2014, 2022 Peter Johansson |
675 |
10 Oct 06 |
jari |
13 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
675 |
10 Oct 06 |
jari |
15 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
20 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
25 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
28 |
*/ |
675 |
10 Oct 06 |
jari |
29 |
|
2815 |
28 Aug 12 |
peter |
30 |
#include "deprecate.h" |
2815 |
28 Aug 12 |
peter |
31 |
|
869 |
14 Sep 07 |
peter |
32 |
#include <string> |
256 |
03 Mar 05 |
peter |
33 |
#include <utility> |
256 |
03 Mar 05 |
peter |
34 |
#include <vector> |
256 |
03 Mar 05 |
peter |
35 |
|
183 |
05 Oct 04 |
jari |
36 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
37 |
namespace yat { |
680 |
11 Oct 06 |
jari |
38 |
namespace utility { |
183 |
05 Oct 04 |
jari |
39 |
|
1121 |
22 Feb 08 |
peter |
40 |
class Matrix; |
4125 |
14 Jan 22 |
peter |
41 |
class MatrixBase; |
183 |
05 Oct 04 |
jari |
42 |
|
184 |
06 Oct 04 |
peter |
43 |
/// |
265 |
11 Apr 05 |
peter |
/// Function performing alignment following the Needleman-Wunch |
265 |
11 Apr 05 |
peter |
/// algorithm. The algorithm starts from the dot-matrix, \a s, where |
265 |
11 Apr 05 |
peter |
/// \f$ s_{ij} \f$ describes how well element \f$ i \f$ in the first |
265 |
11 Apr 05 |
peter |
/// sequence match to element \f$ j \f$ in the second sequence. A |
265 |
11 Apr 05 |
peter |
/// path through this matrix corresponds to an alignment of the two |
265 |
11 Apr 05 |
peter |
/// sequences, in which a diagonal step over a matrix element |
265 |
11 Apr 05 |
peter |
/// corresponds to a match between the corresponding sequnce |
265 |
11 Apr 05 |
peter |
/// elements and a vertical or a horizontal step corresponds to |
265 |
11 Apr 05 |
peter |
/// inserting a gap in one of the sequnces. The function maximizes |
265 |
11 Apr 05 |
peter |
/// \f$ \sum s_{ij}-n \f$ \a gap, where the first sum goes over all |
2716 |
05 Apr 12 |
peter |
/// matches and the second term is a penalty term for inserting |
2716 |
05 Apr 12 |
peter |
/// \f$ n \f$ gaps. Default is to have the \a gap cost set to zero. The |
265 |
11 Apr 05 |
peter |
/// vector \a path contains information about which elements that |
265 |
11 Apr 05 |
peter |
/// were matched. If the first element in the first sequence was |
265 |
11 Apr 05 |
peter |
/// matched to the first element in the second sequence, the last |
265 |
11 Apr 05 |
peter |
/// element in \a path is pair(0,0). |
184 |
06 Oct 04 |
peter |
60 |
/// |
265 |
11 Apr 05 |
peter |
/// @return the global maximum alignment score. |
256 |
03 Mar 05 |
peter |
62 |
/// |
2815 |
28 Aug 12 |
peter |
/// \see Aligner |
2815 |
28 Aug 12 |
peter |
64 |
/// |
4125 |
14 Jan 22 |
peter |
65 |
double NeedlemanWunsch(const MatrixBase& s, |
256 |
03 Mar 05 |
peter |
66 |
std::vector<std::pair<size_t, size_t> >& path, |
184 |
06 Oct 04 |
peter |
67 |
const double gap); |
183 |
05 Oct 04 |
jari |
68 |
|
867 |
13 Sep 07 |
peter |
69 |
/** |
869 |
14 Sep 07 |
peter |
\brief Local alignment following the Smith-Waterman |
2717 |
05 Apr 12 |
peter |
algorithm. |
867 |
13 Sep 07 |
peter |
72 |
|
869 |
14 Sep 07 |
peter |
Instead of looking at each sequence in its entirety the S-W algorithm |
1514 |
19 Sep 08 |
peter |
compares segment of all possible lengths (LOCAL alignment) and chooses |
869 |
14 Sep 07 |
peter |
whichever maximises the similarity measure. |
869 |
14 Sep 07 |
peter |
76 |
|
2738 |
30 May 12 |
peter |
\return the similarity score that maximizes \f$ \sum_{i,j} |
2738 |
30 May 12 |
peter |
A_{ij}s_{ij} - L*gap - N*open\textunderscore gap\f$, where \f$ |
2738 |
30 May 12 |
peter |
A_{ij} \f$ is unity if element \a i in first sequence is aligned |
2738 |
30 May 12 |
peter |
against element \a j in second sequence (zero otherwise); \a L is total |
2738 |
30 May 12 |
peter |
length of gaps; and \a N is number of gaps. |
2738 |
30 May 12 |
peter |
82 |
|
873 |
18 Sep 07 |
peter |
\param s score matrix in which element \f$(i,j)\f$ is the score between |
2738 |
30 May 12 |
peter |
element \f$i\f$ in first sequence and element \f$j\f$ in second sequence. |
869 |
14 Sep 07 |
peter |
\param gap cost for having a gap (insertion or deletion) |
869 |
14 Sep 07 |
peter |
\param open_gap cost for open up a gap in sequence, in other words, for a |
2738 |
30 May 12 |
peter |
gap of length L the total cost is open_gap + L*gap. |
2815 |
28 Aug 12 |
peter |
88 |
|
2815 |
28 Aug 12 |
peter |
\deprecated Provided for backward compatibility with 0.8 API. Use |
2815 |
28 Aug 12 |
peter |
Aligner::smith_waterman() instead. |
867 |
13 Sep 07 |
peter |
91 |
*/ |
1121 |
22 Feb 08 |
peter |
92 |
double SmithWaterman(const utility::Matrix& s, |
2815 |
28 Aug 12 |
peter |
93 |
double gap, double open_gap) YAT_DEPRECATE; |
867 |
13 Sep 07 |
peter |
94 |
|
869 |
14 Sep 07 |
peter |
95 |
/** |
2717 |
05 Apr 12 |
peter |
SSearch does a rigorous Smith-Waterman search for similarity between |
3192 |
22 Apr 14 |
peter |
sequences. For long sequences this may be very expensive (both in time and |
869 |
14 Sep 07 |
peter |
space) and BLAST or FASTA is preferable. |
869 |
14 Sep 07 |
peter |
99 |
|
2792 |
27 Jul 12 |
peter |
Function calculates a score matrix with elements \f$ s_{ij} \f$ |
2792 |
27 Jul 12 |
peter |
equal 1.0 when first[i] == second[j] and minus \a mismatch |
2792 |
27 Jul 12 |
peter |
otherwise. |
2792 |
27 Jul 12 |
peter |
103 |
|
2792 |
27 Jul 12 |
peter |
@return SmithWaterman score |
2792 |
27 Jul 12 |
peter |
105 |
|
2792 |
27 Jul 12 |
peter |
\since parameter mismatch available since yat 0.9 |
2815 |
28 Aug 12 |
peter |
107 |
|
2815 |
28 Aug 12 |
peter |
\see Aligner |
869 |
14 Sep 07 |
peter |
109 |
*/ |
2717 |
05 Apr 12 |
peter |
110 |
double ssearch(std::string first, std::string second, double gap, |
2792 |
27 Jul 12 |
peter |
111 |
double open_gap, double mismatch=0.0); |
869 |
14 Sep 07 |
peter |
112 |
|
687 |
16 Oct 06 |
jari |
113 |
}}} // of namespace utility, yat, and theplu |
183 |
05 Oct 04 |
jari |
114 |
|
183 |
05 Oct 04 |
jari |
115 |
#endif |