2815 |
28 Aug 12 |
peter |
1 |
#ifndef _theplu_yat_utility_aligner_ |
2815 |
28 Aug 12 |
peter |
2 |
#define _theplu_yat_utility_aligner_ |
2815 |
28 Aug 12 |
peter |
3 |
|
2815 |
28 Aug 12 |
peter |
// $Id$ |
2815 |
28 Aug 12 |
peter |
5 |
|
2815 |
28 Aug 12 |
peter |
6 |
/* |
2815 |
28 Aug 12 |
peter |
Copyright (C) 2012 Peter Johansson |
3114 |
10 Nov 13 |
peter |
Copyright (C) 2013 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2014, 2016, 2019, 2020, 2022, 2023 Peter Johansson |
2815 |
28 Aug 12 |
peter |
10 |
|
2815 |
28 Aug 12 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2815 |
28 Aug 12 |
peter |
12 |
|
2815 |
28 Aug 12 |
peter |
The yat library is free software; you can redistribute it and/or |
2815 |
28 Aug 12 |
peter |
modify it under the terms of the GNU General Public License as |
2815 |
28 Aug 12 |
peter |
published by the Free Software Foundation; either version 3 of the |
2815 |
28 Aug 12 |
peter |
License, or (at your option) any later version. |
2815 |
28 Aug 12 |
peter |
17 |
|
2815 |
28 Aug 12 |
peter |
The yat library is distributed in the hope that it will be useful, |
2815 |
28 Aug 12 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2815 |
28 Aug 12 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2815 |
28 Aug 12 |
peter |
General Public License for more details. |
2815 |
28 Aug 12 |
peter |
22 |
|
2815 |
28 Aug 12 |
peter |
You should have received a copy of the GNU General Public License |
2815 |
28 Aug 12 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2815 |
28 Aug 12 |
peter |
25 |
*/ |
2815 |
28 Aug 12 |
peter |
26 |
|
3336 |
24 Oct 14 |
peter |
27 |
#include "CigarIterator.h" |
3336 |
24 Oct 14 |
peter |
28 |
|
3194 |
22 Apr 14 |
peter |
29 |
#include <boost/cstdint.hpp> |
3194 |
22 Apr 14 |
peter |
30 |
|
3200 |
03 May 14 |
peter |
31 |
#include <deque> |
2966 |
23 Jan 13 |
jari |
32 |
#include <cstddef> |
3194 |
22 Apr 14 |
peter |
33 |
#include <iosfwd> |
2815 |
28 Aug 12 |
peter |
34 |
#include <vector> |
2815 |
28 Aug 12 |
peter |
35 |
|
2815 |
28 Aug 12 |
peter |
36 |
namespace theplu { |
2815 |
28 Aug 12 |
peter |
37 |
namespace yat { |
2815 |
28 Aug 12 |
peter |
38 |
namespace utility { |
2815 |
28 Aug 12 |
peter |
39 |
|
2815 |
28 Aug 12 |
peter |
40 |
class Matrix; |
4125 |
14 Jan 22 |
peter |
41 |
class MatrixBase; |
4125 |
14 Jan 22 |
peter |
42 |
class MatrixMutable; |
2815 |
28 Aug 12 |
peter |
43 |
|
2815 |
28 Aug 12 |
peter |
44 |
/** |
2815 |
28 Aug 12 |
peter |
\brief Aligning two sequences |
2815 |
28 Aug 12 |
peter |
46 |
|
2815 |
28 Aug 12 |
peter |
General class aligning two sequences described in a dot-matrix, |
2815 |
28 Aug 12 |
peter |
D, in which \f$ D(i,j) \f$ describes how similar element \c i in |
2815 |
28 Aug 12 |
peter |
the first sequence is with element \c j in the second sequence. A |
2815 |
28 Aug 12 |
peter |
path through this matrix corresponds to an alignment of the two |
2815 |
28 Aug 12 |
peter |
sequences, in which a diagonal step correspoinds to a match |
2815 |
28 Aug 12 |
peter |
between corresponding sequence elements, and a vertical or |
2815 |
28 Aug 12 |
peter |
horizontal step correspond correspond to inserting a gap into one |
2815 |
28 Aug 12 |
peter |
of the sequences. |
2815 |
28 Aug 12 |
peter |
55 |
|
2815 |
28 Aug 12 |
peter |
\since New in yat 0.9 |
2815 |
28 Aug 12 |
peter |
57 |
*/ |
2815 |
28 Aug 12 |
peter |
58 |
class Aligner |
2815 |
28 Aug 12 |
peter |
59 |
{ |
2815 |
28 Aug 12 |
peter |
60 |
public: |
2815 |
28 Aug 12 |
peter |
61 |
/** |
2815 |
28 Aug 12 |
peter |
Enum used to describe alignment. |
2815 |
28 Aug 12 |
peter |
63 |
|
2816 |
28 Aug 12 |
peter |
\see alignment |
2815 |
28 Aug 12 |
peter |
65 |
*/ |
2815 |
28 Aug 12 |
peter |
66 |
enum direction { none, right, down, diagonal }; |
2815 |
28 Aug 12 |
peter |
67 |
|
2815 |
28 Aug 12 |
peter |
68 |
/** |
2815 |
28 Aug 12 |
peter |
\brief Constructor |
2815 |
28 Aug 12 |
peter |
70 |
|
2815 |
28 Aug 12 |
peter |
Same as Aligner(gap, open_gap, gap, open_gap) |
2815 |
28 Aug 12 |
peter |
72 |
*/ |
3207 |
04 May 14 |
peter |
73 |
explicit Aligner(double gap=0, double open_gap=0); |
2815 |
28 Aug 12 |
peter |
74 |
|
2815 |
28 Aug 12 |
peter |
75 |
/** |
2815 |
28 Aug 12 |
peter |
\brief constructor |
2815 |
28 Aug 12 |
peter |
77 |
|
3180 |
23 Mar 14 |
peter |
\param vertical_gap Penalty for extending a vertical gap. A |
3180 |
23 Mar 14 |
peter |
vertical gap means alignment consumes an element in second |
3180 |
23 Mar 14 |
peter |
element and not, in the first element, i.e., an element has |
3180 |
23 Mar 14 |
peter |
been inserted to the second element or equivalently an element |
3180 |
23 Mar 14 |
peter |
has been deleted in first sequence. |
2815 |
28 Aug 12 |
peter |
83 |
|
2815 |
28 Aug 12 |
peter |
\param open_vertical_gap Penalty for open a vertical gap. Total |
2815 |
28 Aug 12 |
peter |
cost for a vertical gap is open_vertical_gap + N * |
2815 |
28 Aug 12 |
peter |
vertical_gap. |
2815 |
28 Aug 12 |
peter |
87 |
|
4334 |
25 Mar 23 |
peter |
\param horizontal_gap Penalty for extending a horizontal gap |
2815 |
28 Aug 12 |
peter |
89 |
|
4334 |
25 Mar 23 |
peter |
\param open_horizontal_gap Penalty for open a horizontal |
2815 |
28 Aug 12 |
peter |
gap. Total penalty for a horizontal gap is open_horizon_gap + N |
2815 |
28 Aug 12 |
peter |
* horizon_gap. |
2815 |
28 Aug 12 |
peter |
93 |
*/ |
2815 |
28 Aug 12 |
peter |
94 |
Aligner(double vertical_gap, double open_vertical_gap, |
4334 |
25 Mar 23 |
peter |
95 |
double horizontal_gap, double open_horizontal_gap); |
2815 |
28 Aug 12 |
peter |
96 |
|
2815 |
28 Aug 12 |
peter |
97 |
/** |
2815 |
28 Aug 12 |
peter |
Initialize a score matrix with \f$ S_{k,0} = - |
2815 |
28 Aug 12 |
peter |
\textrm{open\_vertical\_gap} - k * \textrm{vertical\_gap} \f$ |
2815 |
28 Aug 12 |
peter |
and \f$ S_{0,k} = - \textrm{open\_horizon\_gap} - k * |
2815 |
28 Aug 12 |
peter |
\textrm{horizon\_gap} \f$ and align using operator(). |
2815 |
28 Aug 12 |
peter |
102 |
|
2815 |
28 Aug 12 |
peter |
\return most lower right element of score matrix |
2815 |
28 Aug 12 |
peter |
104 |
*/ |
4125 |
14 Jan 22 |
peter |
105 |
double needleman_wunsch(const MatrixBase& d); |
2815 |
28 Aug 12 |
peter |
106 |
|
2815 |
28 Aug 12 |
peter |
107 |
/** |
2815 |
28 Aug 12 |
peter |
Initialize a score matrix with all elements equal to zero, |
2815 |
28 Aug 12 |
peter |
align using operator(). |
2815 |
28 Aug 12 |
peter |
110 |
|
2815 |
28 Aug 12 |
peter |
\return max element in score matrix. |
3206 |
04 May 14 |
peter |
112 |
|
3206 |
04 May 14 |
peter |
\see SmithWaterman |
2815 |
28 Aug 12 |
peter |
114 |
*/ |
4125 |
14 Jan 22 |
peter |
115 |
double smith_waterman(const MatrixBase& d); |
2815 |
28 Aug 12 |
peter |
116 |
|
2815 |
28 Aug 12 |
peter |
117 |
/** |
2815 |
28 Aug 12 |
peter |
\brief calculate score matrix based on the \a dot matrix |
2815 |
28 Aug 12 |
peter |
119 |
|
2815 |
28 Aug 12 |
peter |
The algorithm calculates a maximum \a score matrix as |
2815 |
28 Aug 12 |
peter |
121 |
|
2815 |
28 Aug 12 |
peter |
\f$ S_{i,j} = \textrm{max} \{ S_{ij}; S_{i,j-1} - F(\textrm{gap}_V); |
2815 |
28 Aug 12 |
peter |
S_{i-1,j-1} + D_{i-1,j-1}; S_{i-1,j} - F(\textrm{gap}_H) \} \f$ |
2815 |
28 Aug 12 |
peter |
124 |
|
2815 |
28 Aug 12 |
peter |
where \f$ F(\textrm{gap}) \f$ is gap if there was already a |
2815 |
28 Aug 12 |
peter |
gap, and gap + open_gap otherwise. |
2815 |
28 Aug 12 |
peter |
127 |
|
3205 |
04 May 14 |
peter |
To get the CIGAR to behave as expected, the each row in \a dot |
3205 |
04 May 14 |
peter |
corresponds to an element in reference sequence and each row in |
3205 |
04 May 14 |
peter |
\a dot corresponds to an element in query reference. If that is |
3205 |
04 May 14 |
peter |
transposed, insertions and deletions are swapped in CIGAR. |
3847 |
22 Sep 19 |
peter |
132 |
|
3847 |
22 Sep 19 |
peter |
The \a score matrix must have one column more than the \a dot |
3847 |
22 Sep 19 |
peter |
matrix and one more row than the \a dot matrix. |
2815 |
28 Aug 12 |
peter |
135 |
*/ |
4125 |
14 Jan 22 |
peter |
136 |
void operator()(const MatrixBase& dot, MatrixMutable& score); |
2815 |
28 Aug 12 |
peter |
137 |
|
2815 |
28 Aug 12 |
peter |
138 |
/** |
3027 |
08 Apr 13 |
peter |
If, for example, alignment(i,j) returns Aligner::diagonal, |
3027 |
08 Apr 13 |
peter |
implies that score(i,j) was maximized as score(i-1,j-1) + |
3027 |
08 Apr 13 |
peter |
dot(i-1, j-1), in other words, element i-1 in first sequence |
2815 |
28 Aug 12 |
peter |
was aligned with element j-1 in second sequence. |
2815 |
28 Aug 12 |
peter |
143 |
*/ |
2816 |
28 Aug 12 |
peter |
144 |
const direction& alignment(size_t i, size_t j) const; |
2815 |
28 Aug 12 |
peter |
145 |
|
3194 |
22 Apr 14 |
peter |
146 |
/** |
3203 |
03 May 14 |
peter |
\brief Compact Idiosyncratic Gapped Alignment Report |
3203 |
03 May 14 |
peter |
148 |
|
3203 |
03 May 14 |
peter |
A CIGAR is a representation of an alignment, an array of |
3203 |
03 May 14 |
peter |
operation/length where the operation and length is packed into |
3203 |
03 May 14 |
peter |
a uint32_t. So a CIGAR of e.g. '3M1D5M' means alignment is |
3203 |
03 May 14 |
peter |
three matches, one deletion, and five matches, which is |
3203 |
03 May 14 |
peter |
represented by an array of length 3. |
3203 |
03 May 14 |
peter |
154 |
|
3211 |
05 May 14 |
peter |
<table> |
3211 |
05 May 14 |
peter |
<tr><td>\c \#define</td><td>value</td><td>char</td><td>type</td> |
3211 |
05 May 14 |
peter |
<td>consume ref</td><td>consume query</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CMATCH</td> <td>0</td><td>M</td><td>3</td> |
3211 |
05 May 14 |
peter |
<td>\c true</td><td>\c true</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CINS</td> <td>1</td><td>I</td><td>1</td> |
3211 |
05 May 14 |
peter |
<td>\c false</td><td>\c true</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CDEL</td> <td>2</td><td>D</td><td>2</td> |
3211 |
05 May 14 |
peter |
<td>\c true</td><td>\c false</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CREF_SKIP</td> <td>3</td><td>N</td><td>2</td> |
3211 |
05 May 14 |
peter |
<td>\c true</td><td>\c false</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CSOFT_CLIP</td><td>4</td><td>S</td><td>1</td> |
3211 |
05 May 14 |
peter |
<td>\c false</td><td>\c true</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CHARD_CLIP</td><td>5</td><td>H</td><td>0</td> |
3211 |
05 May 14 |
peter |
<td>\c false</td><td>\c false</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CPAD</td> <td>6</td><td>P</td><td>0</td> |
3211 |
05 May 14 |
peter |
<td>\c false</td><td>\c false</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CEQUAL</td> <td>7</td><td>=</td><td>3</td> |
3211 |
05 May 14 |
peter |
<td>\c true</td><td>\c true</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CDIFF</td> <td>8</td><td>X</td><td>3</td> |
3211 |
05 May 14 |
peter |
<td>\c true</td><td>\c true</td></tr> |
3211 |
05 May 14 |
peter |
<tr><td>BAM_CBACK</td> <td>9</td><td>B</td><td>0</td> |
3211 |
05 May 14 |
peter |
<td>\c false</td><td>\c false</td></tr> |
3211 |
05 May 14 |
peter |
</table> |
3211 |
05 May 14 |
peter |
179 |
|
3194 |
22 Apr 14 |
peter |
\since new in yat 0.12 |
3212 |
05 May 14 |
peter |
181 |
|
3212 |
05 May 14 |
peter |
\see file yat/utility/Cigar.h for some useful macros |
3194 |
22 Apr 14 |
peter |
183 |
*/ |
3194 |
22 Apr 14 |
peter |
184 |
class Cigar |
3194 |
22 Apr 14 |
peter |
185 |
{ |
3194 |
22 Apr 14 |
peter |
186 |
public: |
3195 |
22 Apr 14 |
peter |
187 |
/** |
3336 |
24 Oct 14 |
peter |
\since New in yat 0.13 |
3336 |
24 Oct 14 |
peter |
189 |
*/ |
3336 |
24 Oct 14 |
peter |
190 |
typedef |
3336 |
24 Oct 14 |
peter |
191 |
CigarIterator<std::deque<uint32_t>::const_iterator > const_iterator; |
3336 |
24 Oct 14 |
peter |
192 |
|
3336 |
24 Oct 14 |
peter |
193 |
/** |
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
195 |
*/ |
4334 |
25 Mar 23 |
peter |
196 |
typedef |
4334 |
25 Mar 23 |
peter |
197 |
CigarIterator<std::deque<uint32_t>::const_reverse_iterator> |
4334 |
25 Mar 23 |
peter |
198 |
const_reverse_iterator; |
4334 |
25 Mar 23 |
peter |
199 |
|
4334 |
25 Mar 23 |
peter |
200 |
/** |
4003 |
15 Oct 20 |
peter |
Default constructor |
4003 |
15 Oct 20 |
peter |
202 |
*/ |
4003 |
15 Oct 20 |
peter |
203 |
Cigar(void) = default; |
4003 |
15 Oct 20 |
peter |
204 |
|
4003 |
15 Oct 20 |
peter |
205 |
/** |
4003 |
15 Oct 20 |
peter |
\param s string in same format as output from operator<<, |
4003 |
15 Oct 20 |
peter |
i.e., an concatenated string of integer followed by one of |
4003 |
15 Oct 20 |
peter |
"MIDNSHP=XB". |
4003 |
15 Oct 20 |
peter |
209 |
|
4003 |
15 Oct 20 |
peter |
\since New in yat 0.19 |
4003 |
15 Oct 20 |
peter |
211 |
*/ |
4003 |
15 Oct 20 |
peter |
212 |
explicit Cigar(const std::string& s); |
4003 |
15 Oct 20 |
peter |
213 |
|
4003 |
15 Oct 20 |
peter |
214 |
/** |
3336 |
24 Oct 14 |
peter |
\return iterator pointing to first element |
3336 |
24 Oct 14 |
peter |
216 |
|
3336 |
24 Oct 14 |
peter |
\since New in yat 0.13 |
3840 |
13 Aug 19 |
peter |
218 |
|
3840 |
13 Aug 19 |
peter |
\note was not implemented prior 0.16.2 |
3336 |
24 Oct 14 |
peter |
220 |
*/ |
3336 |
24 Oct 14 |
peter |
221 |
const_iterator begin(void) const; |
3336 |
24 Oct 14 |
peter |
222 |
|
3336 |
24 Oct 14 |
peter |
223 |
/** |
3195 |
22 Apr 14 |
peter |
Erases all elements. |
3195 |
22 Apr 14 |
peter |
225 |
*/ |
3194 |
22 Apr 14 |
peter |
226 |
void clear(void); |
3195 |
22 Apr 14 |
peter |
227 |
|
3195 |
22 Apr 14 |
peter |
228 |
/** |
3336 |
24 Oct 14 |
peter |
\return iterator pointing to one passed last element |
3336 |
24 Oct 14 |
peter |
230 |
|
3336 |
24 Oct 14 |
peter |
\since New in yat 0.13 |
3840 |
13 Aug 19 |
peter |
232 |
|
3840 |
13 Aug 19 |
peter |
\note was not implemented prior 0.16.2 |
3336 |
24 Oct 14 |
peter |
234 |
*/ |
3336 |
24 Oct 14 |
peter |
235 |
const_iterator end(void) const; |
3336 |
24 Oct 14 |
peter |
236 |
|
3336 |
24 Oct 14 |
peter |
237 |
/** |
3213 |
05 May 14 |
peter |
Total length of operations counting operations that consume |
3213 |
05 May 14 |
peter |
either (or both) query and reference e.g. match, insertion, |
3213 |
05 May 14 |
peter |
or deletion. HardClip or Padding operations are ignored. |
3213 |
05 May 14 |
peter |
241 |
*/ |
3213 |
05 May 14 |
peter |
242 |
uint32_t length(void) const; |
3213 |
05 May 14 |
peter |
243 |
|
3213 |
05 May 14 |
peter |
244 |
/** |
3847 |
22 Sep 19 |
peter |
See table in class documentation. |
3203 |
03 May 14 |
peter |
246 |
|
3203 |
03 May 14 |
peter |
\return operation part of element \a i |
3195 |
22 Apr 14 |
peter |
248 |
*/ |
3200 |
03 May 14 |
peter |
249 |
uint8_t op(size_t i) const; |
3195 |
22 Apr 14 |
peter |
250 |
|
3195 |
22 Apr 14 |
peter |
251 |
/** |
3203 |
03 May 14 |
peter |
Translate the op(i) to a descriptive character of one of the |
3211 |
05 May 14 |
peter |
following "MIDNSHP=XB". See table in class documentation. |
3203 |
03 May 14 |
peter |
254 |
|
3211 |
05 May 14 |
peter |
\return character describing operation i |
3195 |
22 Apr 14 |
peter |
256 |
*/ |
3200 |
03 May 14 |
peter |
257 |
char opchr(size_t i) const; |
3195 |
22 Apr 14 |
peter |
258 |
|
3195 |
22 Apr 14 |
peter |
259 |
/** |
3200 |
03 May 14 |
peter |
\return length of element i |
3195 |
22 Apr 14 |
peter |
261 |
*/ |
3200 |
03 May 14 |
peter |
262 |
uint32_t oplen(size_t i) const; |
3195 |
22 Apr 14 |
peter |
263 |
|
3195 |
22 Apr 14 |
peter |
264 |
/** |
3203 |
03 May 14 |
peter |
Erase last \a len operation. |
3195 |
22 Apr 14 |
peter |
266 |
*/ |
3200 |
03 May 14 |
peter |
267 |
void pop_back(uint32_t len=1); |
3195 |
22 Apr 14 |
peter |
268 |
|
3195 |
22 Apr 14 |
peter |
269 |
/** |
3203 |
03 May 14 |
peter |
Erase first \a len operation. |
3195 |
22 Apr 14 |
peter |
271 |
*/ |
3200 |
03 May 14 |
peter |
272 |
void pop_front(uint32_t len=1); |
3195 |
22 Apr 14 |
peter |
273 |
|
3195 |
22 Apr 14 |
peter |
274 |
/** |
3203 |
03 May 14 |
peter |
Add an operation \a op of length \a len at back of cigar. If |
3203 |
03 May 14 |
peter |
last operation equals \a op, length of that element is |
3203 |
03 May 14 |
peter |
changed and the size() is not modified. |
3195 |
22 Apr 14 |
peter |
278 |
*/ |
3200 |
03 May 14 |
peter |
279 |
void push_back(uint8_t op, uint32_t len=1); |
3195 |
22 Apr 14 |
peter |
280 |
|
3195 |
22 Apr 14 |
peter |
281 |
/** |
3200 |
03 May 14 |
peter |
Add an operation \a op of length \a len at front of cigar. |
3195 |
22 Apr 14 |
peter |
283 |
*/ |
3200 |
03 May 14 |
peter |
284 |
void push_front(uint8_t op, uint32_t len=1); |
3195 |
22 Apr 14 |
peter |
285 |
|
3195 |
22 Apr 14 |
peter |
286 |
/** |
3213 |
05 May 14 |
peter |
\brief Translate CIGAR to a query length. |
3213 |
05 May 14 |
peter |
288 |
|
3213 |
05 May 14 |
peter |
Calculate total length of operations which consume a query |
3213 |
05 May 14 |
peter |
e.g. match or insetion. See table in class documentation. |
3213 |
05 May 14 |
peter |
291 |
|
3213 |
05 May 14 |
peter |
This is the same function as bam_cigar2qlen in samtools C |
3213 |
05 May 14 |
peter |
API. |
3213 |
05 May 14 |
peter |
294 |
*/ |
3213 |
05 May 14 |
peter |
295 |
uint32_t query_length(void) const; |
3213 |
05 May 14 |
peter |
296 |
|
3213 |
05 May 14 |
peter |
297 |
/** |
4334 |
25 Mar 23 |
peter |
\return reverse iterator pointing to the beginning |
4334 |
25 Mar 23 |
peter |
299 |
|
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
301 |
*/ |
4334 |
25 Mar 23 |
peter |
302 |
const_reverse_iterator rbegin(void) const; |
4334 |
25 Mar 23 |
peter |
303 |
|
4334 |
25 Mar 23 |
peter |
304 |
/** |
3213 |
05 May 14 |
peter |
\brief Translate CIGAR to a reference length. |
3213 |
05 May 14 |
peter |
306 |
|
3213 |
05 May 14 |
peter |
Calculate total length of operations which consume a reference |
3213 |
05 May 14 |
peter |
e.g. match or deletion. See table in class documentation. |
3213 |
05 May 14 |
peter |
309 |
|
3213 |
05 May 14 |
peter |
Similar to bam_calend but does not handle \c BAM_CBACK as it has |
3213 |
05 May 14 |
peter |
not been included in SAM specification. |
3213 |
05 May 14 |
peter |
http://sourceforge.net/p/samtools/mailman/message/29373646/ |
3213 |
05 May 14 |
peter |
313 |
*/ |
3213 |
05 May 14 |
peter |
314 |
uint32_t reference_length(void) const; |
3213 |
05 May 14 |
peter |
315 |
|
3213 |
05 May 14 |
peter |
316 |
/** |
4334 |
25 Mar 23 |
peter |
\return reverse iterator pointing to the end |
4334 |
25 Mar 23 |
peter |
318 |
|
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
320 |
*/ |
4334 |
25 Mar 23 |
peter |
321 |
const_reverse_iterator rend(void) const; |
4334 |
25 Mar 23 |
peter |
322 |
|
4334 |
25 Mar 23 |
peter |
323 |
/** |
3462 |
27 Jan 16 |
peter |
\brief reverse the CIGAR |
3462 |
27 Jan 16 |
peter |
325 |
|
3462 |
27 Jan 16 |
peter |
\since New in yat 0.14 |
3462 |
27 Jan 16 |
peter |
327 |
*/ |
3462 |
27 Jan 16 |
peter |
328 |
void reverse(void); |
3462 |
27 Jan 16 |
peter |
329 |
|
3462 |
27 Jan 16 |
peter |
330 |
/** |
3203 |
03 May 14 |
peter |
\return cigar element \a i |
3195 |
22 Apr 14 |
peter |
332 |
*/ |
3203 |
03 May 14 |
peter |
333 |
uint32_t operator[](size_t i) const; |
3195 |
22 Apr 14 |
peter |
334 |
|
3195 |
22 Apr 14 |
peter |
335 |
/** |
3195 |
22 Apr 14 |
peter |
\return number of elements in cigar |
3195 |
22 Apr 14 |
peter |
337 |
*/ |
3194 |
22 Apr 14 |
peter |
338 |
size_t size(void) const; |
4334 |
25 Mar 23 |
peter |
339 |
|
4334 |
25 Mar 23 |
peter |
340 |
/** |
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
342 |
*/ |
4334 |
25 Mar 23 |
peter |
343 |
bool operator==(const Cigar& other) const; |
4334 |
25 Mar 23 |
peter |
344 |
|
4334 |
25 Mar 23 |
peter |
345 |
/** |
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
347 |
*/ |
4334 |
25 Mar 23 |
peter |
348 |
bool operator!=(const Cigar& other) const; |
3194 |
22 Apr 14 |
peter |
349 |
private: |
3200 |
03 May 14 |
peter |
350 |
std::deque<uint32_t> cigar_; |
3200 |
03 May 14 |
peter |
351 |
|
4003 |
15 Oct 20 |
peter |
// parse a string of type 12M and push_back into cigar. Format |
4003 |
15 Oct 20 |
peter |
// is is first an integer followed by a 1-char describing the |
4003 |
15 Oct 20 |
peter |
// operation (see Cigar.h) |
4003 |
15 Oct 20 |
peter |
355 |
bool parse_element(std::istream& is); |
4003 |
15 Oct 20 |
peter |
356 |
|
3213 |
05 May 14 |
peter |
// calculate length only counting operations whose type is set |
3213 |
05 May 14 |
peter |
// in mask, i.e., mask & type returns true |
3213 |
05 May 14 |
peter |
359 |
uint32_t length(uint8_t mask) const; |
3194 |
22 Apr 14 |
peter |
// using compiler generated copy |
3194 |
22 Apr 14 |
peter |
// Cigar(const Cigar& other); |
3194 |
22 Apr 14 |
peter |
// Cigar& operator=(const Cigar&); |
3194 |
22 Apr 14 |
peter |
363 |
}; |
3194 |
22 Apr 14 |
peter |
364 |
|
3194 |
22 Apr 14 |
peter |
365 |
/** |
3194 |
22 Apr 14 |
peter |
\since new in yat 0.12 |
3194 |
22 Apr 14 |
peter |
367 |
*/ |
3194 |
22 Apr 14 |
peter |
368 |
const Cigar cigar(size_t i, size_t j) const; |
3194 |
22 Apr 14 |
peter |
369 |
|
4334 |
25 Mar 23 |
peter |
370 |
/** |
4334 |
25 Mar 23 |
peter |
Same as vertical_gap in Constructor |
4334 |
25 Mar 23 |
peter |
372 |
|
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
374 |
*/ |
4334 |
25 Mar 23 |
peter |
375 |
double insertion_penalty(void) const; |
4334 |
25 Mar 23 |
peter |
376 |
|
4334 |
25 Mar 23 |
peter |
377 |
/** |
4334 |
25 Mar 23 |
peter |
Same as open_vertical_gap in Constructor |
4334 |
25 Mar 23 |
peter |
379 |
|
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
381 |
*/ |
4334 |
25 Mar 23 |
peter |
382 |
double open_insertion_penalty(void) const; |
4334 |
25 Mar 23 |
peter |
383 |
|
4334 |
25 Mar 23 |
peter |
384 |
/** |
4334 |
25 Mar 23 |
peter |
Same as horizontal_gap in Constructor |
4334 |
25 Mar 23 |
peter |
386 |
|
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
388 |
*/ |
4334 |
25 Mar 23 |
peter |
389 |
double deletion_penalty(void) const; |
4334 |
25 Mar 23 |
peter |
390 |
|
4334 |
25 Mar 23 |
peter |
391 |
/** |
4334 |
25 Mar 23 |
peter |
Same as open_horizontal_gap in Constructor |
4334 |
25 Mar 23 |
peter |
393 |
|
4334 |
25 Mar 23 |
peter |
\since New in yat 0.21 |
4334 |
25 Mar 23 |
peter |
395 |
*/ |
4334 |
25 Mar 23 |
peter |
396 |
double open_deletion_penalty(void) const; |
2815 |
28 Aug 12 |
peter |
397 |
private: |
3194 |
22 Apr 14 |
peter |
398 |
direction& directions(size_t i, size_t j); |
2815 |
28 Aug 12 |
peter |
399 |
|
2815 |
28 Aug 12 |
peter |
400 |
size_t columns_; |
2815 |
28 Aug 12 |
peter |
401 |
double vertical_gap_; |
2815 |
28 Aug 12 |
peter |
402 |
double open_vertical_gap_; |
2815 |
28 Aug 12 |
peter |
403 |
double horizon_gap_; |
2815 |
28 Aug 12 |
peter |
404 |
double open_horizon_gap_; |
2816 |
28 Aug 12 |
peter |
405 |
std::vector<direction> alignment_; |
2815 |
28 Aug 12 |
peter |
406 |
}; |
2815 |
28 Aug 12 |
peter |
407 |
|
3194 |
22 Apr 14 |
peter |
408 |
|
3194 |
22 Apr 14 |
peter |
409 |
/** |
3195 |
22 Apr 14 |
peter |
Output e.g. '5S44M5I98M' if the cigar has four elements. Each |
3195 |
22 Apr 14 |
peter |
element is output as the length followed by the character |
3195 |
22 Apr 14 |
peter |
descibing the operation (see opchr). |
3195 |
22 Apr 14 |
peter |
413 |
|
3195 |
22 Apr 14 |
peter |
\relates Aligner::Cigar |
3194 |
22 Apr 14 |
peter |
415 |
*/ |
3194 |
22 Apr 14 |
peter |
416 |
std::ostream& operator<<(std::ostream& os, const Aligner::Cigar& cigar); |
3194 |
22 Apr 14 |
peter |
417 |
|
4353 |
23 Aug 23 |
peter |
418 |
/** |
4353 |
23 Aug 23 |
peter |
\since New in yat 0.21 |
4353 |
23 Aug 23 |
peter |
420 |
*/ |
4353 |
23 Aug 23 |
peter |
421 |
template<typename Iterator1, typename Iterator2, class KernelFunction> |
4353 |
23 Aug 23 |
peter |
422 |
Aligner::Cigar cigar2(Iterator1 ref_begin, Iterator1 ref_end, |
4353 |
23 Aug 23 |
peter |
423 |
Iterator2 query_begin, Iterator2 query_end, |
4353 |
23 Aug 23 |
peter |
424 |
const KernelFunction& kernel, |
4353 |
23 Aug 23 |
peter |
425 |
const Aligner::Cigar& cigar) |
4353 |
23 Aug 23 |
peter |
426 |
{ |
4353 |
23 Aug 23 |
peter |
427 |
Aligner::Cigar result; |
4353 |
23 Aug 23 |
peter |
428 |
Iterator1 ref(ref_begin); |
4353 |
23 Aug 23 |
peter |
429 |
Iterator2 query(query_begin); |
4353 |
23 Aug 23 |
peter |
430 |
for (size_t i=0; i<cigar.size(); ++i) { |
4353 |
23 Aug 23 |
peter |
431 |
switch (cigar.op(i)) { |
4353 |
23 Aug 23 |
peter |
432 |
case (BAM_CMATCH) : |
4353 |
23 Aug 23 |
peter |
433 |
for (size_t j=0; j<cigar.oplen(i); ++j) { |
4353 |
23 Aug 23 |
peter |
434 |
if (kernel(ref, query) > 0.0) |
4353 |
23 Aug 23 |
peter |
435 |
result.push_back(BAM_CEQUAL); |
4353 |
23 Aug 23 |
peter |
436 |
else |
4353 |
23 Aug 23 |
peter |
437 |
result.push_back(BAM_CDIFF); |
4353 |
23 Aug 23 |
peter |
438 |
++ref; |
4353 |
23 Aug 23 |
peter |
439 |
++query; |
4353 |
23 Aug 23 |
peter |
440 |
} |
4353 |
23 Aug 23 |
peter |
441 |
break; |
4353 |
23 Aug 23 |
peter |
442 |
case (BAM_CDEL) : |
4353 |
23 Aug 23 |
peter |
443 |
result.push_back(cigar.op(i), cigar.oplen(i)); |
4353 |
23 Aug 23 |
peter |
444 |
ref += cigar.oplen(i); |
4353 |
23 Aug 23 |
peter |
445 |
break; |
4353 |
23 Aug 23 |
peter |
446 |
case (BAM_CINS) : |
4353 |
23 Aug 23 |
peter |
447 |
case (BAM_CSOFT_CLIP) : |
4353 |
23 Aug 23 |
peter |
448 |
result.push_back(cigar.op(i), cigar.oplen(i)); |
4353 |
23 Aug 23 |
peter |
449 |
query += cigar.oplen(i); |
4353 |
23 Aug 23 |
peter |
450 |
break; |
4353 |
23 Aug 23 |
peter |
451 |
default: |
4353 |
23 Aug 23 |
peter |
452 |
throw std::runtime_error("calculate_cigar2: invalid cigar"); |
4353 |
23 Aug 23 |
peter |
453 |
} |
4353 |
23 Aug 23 |
peter |
454 |
} |
4353 |
23 Aug 23 |
peter |
455 |
return result; |
4353 |
23 Aug 23 |
peter |
456 |
} |
4353 |
23 Aug 23 |
peter |
457 |
|
2815 |
28 Aug 12 |
peter |
458 |
}}} // of namespace utility, yat, and theplu |
2815 |
28 Aug 12 |
peter |
459 |
|
2815 |
28 Aug 12 |
peter |
460 |
#endif |