3881 |
16 Mar 20 |
peter |
1 |
#ifndef theplu_yat_omic_bam_pair_iterator2 |
3881 |
16 Mar 20 |
peter |
2 |
#define theplu_yat_omic_bam_pair_iterator2 |
3878 |
12 Mar 20 |
peter |
3 |
|
3878 |
12 Mar 20 |
peter |
// $Id$ |
3878 |
12 Mar 20 |
peter |
5 |
|
3878 |
12 Mar 20 |
peter |
6 |
/* |
4057 |
31 Mar 21 |
peter |
Copyright (C) 2020, 2021 Peter Johansson |
3878 |
12 Mar 20 |
peter |
8 |
|
3878 |
12 Mar 20 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
3878 |
12 Mar 20 |
peter |
10 |
|
3878 |
12 Mar 20 |
peter |
The yat library is free software; you can redistribute it and/or |
3878 |
12 Mar 20 |
peter |
modify it under the terms of the GNU General Public License as |
3878 |
12 Mar 20 |
peter |
published by the Free Software Foundation; either version 3 of the |
3878 |
12 Mar 20 |
peter |
License, or (at your option) any later version. |
3878 |
12 Mar 20 |
peter |
15 |
|
3878 |
12 Mar 20 |
peter |
The yat library is distributed in the hope that it will be useful, |
3878 |
12 Mar 20 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
3878 |
12 Mar 20 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3878 |
12 Mar 20 |
peter |
General Public License for more details. |
3878 |
12 Mar 20 |
peter |
20 |
|
3878 |
12 Mar 20 |
peter |
You should have received a copy of the GNU General Public License |
3878 |
12 Mar 20 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
3878 |
12 Mar 20 |
peter |
23 |
*/ |
3878 |
12 Mar 20 |
peter |
24 |
|
3878 |
12 Mar 20 |
peter |
25 |
#include "BamPair.h" |
3878 |
12 Mar 20 |
peter |
26 |
#include "BamRead.h" |
3878 |
12 Mar 20 |
peter |
27 |
|
3878 |
12 Mar 20 |
peter |
28 |
#include "yat/utility/config_public.h" |
3878 |
12 Mar 20 |
peter |
29 |
#include "yat/utility/yat_assert.h" |
3878 |
12 Mar 20 |
peter |
30 |
|
3878 |
12 Mar 20 |
peter |
31 |
#include <boost/concept/assert.hpp> |
3878 |
12 Mar 20 |
peter |
32 |
#include <boost/iterator/iterator_concepts.hpp> |
3878 |
12 Mar 20 |
peter |
33 |
#include <boost/iterator/iterator_facade.hpp> |
3878 |
12 Mar 20 |
peter |
34 |
|
3878 |
12 Mar 20 |
peter |
35 |
#include <iterator> |
3878 |
12 Mar 20 |
peter |
36 |
#include <map> |
4019 |
06 Nov 20 |
peter |
37 |
#include <memory> |
3878 |
12 Mar 20 |
peter |
38 |
#include <utility> |
3878 |
12 Mar 20 |
peter |
39 |
|
3878 |
12 Mar 20 |
peter |
40 |
namespace theplu { |
3878 |
12 Mar 20 |
peter |
41 |
namespace yat { |
3878 |
12 Mar 20 |
peter |
42 |
namespace omic { |
3878 |
12 Mar 20 |
peter |
43 |
|
3878 |
12 Mar 20 |
peter |
44 |
/** |
3878 |
12 Mar 20 |
peter |
Type Requirments: |
3878 |
12 Mar 20 |
peter |
- \c Base is a \readable_iterator |
3878 |
12 Mar 20 |
peter |
- \c Base is a \single_pass_iterator |
3878 |
12 Mar 20 |
peter |
- \c value_type is BamRead |
3878 |
12 Mar 20 |
peter |
49 |
|
3878 |
12 Mar 20 |
peter |
This iterator works on a sorted range [begin, end) and provides a |
3878 |
12 Mar 20 |
peter |
covenient way to access the pairs rather than the reads |
3878 |
12 Mar 20 |
peter |
individually. The pairs are iterated such that they appear sorted |
3878 |
12 Mar 20 |
peter |
with respect to \c first(). |
3878 |
12 Mar 20 |
peter |
54 |
|
3878 |
12 Mar 20 |
peter |
If read pairs with large isize (tlen) are included the iterator |
3878 |
12 Mar 20 |
peter |
will consumer a lot of memory because iterator does store |
3878 |
12 Mar 20 |
peter |
virtually all reads in the underlying range between the current |
3878 |
12 Mar 20 |
peter |
\c first() and its mate, \c second(). |
3878 |
12 Mar 20 |
peter |
59 |
|
3878 |
12 Mar 20 |
peter |
Note that BamPairIterator2 is a single-pass iterator, i.e., once it |
3878 |
12 Mar 20 |
peter |
is incremented the behaviour of its copies is undefined. |
3878 |
12 Mar 20 |
peter |
62 |
|
4057 |
31 Mar 21 |
peter |
\see BamPairBuffer |
4057 |
31 Mar 21 |
peter |
64 |
|
3878 |
12 Mar 20 |
peter |
\since New in yat 0.18 |
3878 |
12 Mar 20 |
peter |
66 |
*/ |
3878 |
12 Mar 20 |
peter |
67 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
68 |
class BamPairIterator2 |
3878 |
12 Mar 20 |
peter |
69 |
: public boost::iterator_facade< |
3878 |
12 Mar 20 |
peter |
70 |
BamPairIterator2<Base>, const BamPair, std::input_iterator_tag, |
3878 |
12 Mar 20 |
peter |
71 |
const BamPairProxy |
3878 |
12 Mar 20 |
peter |
72 |
> |
3878 |
12 Mar 20 |
peter |
73 |
{ |
3878 |
12 Mar 20 |
peter |
74 |
public: |
3878 |
12 Mar 20 |
peter |
75 |
/** |
3878 |
12 Mar 20 |
peter |
\brief default constructor |
3878 |
12 Mar 20 |
peter |
77 |
*/ |
3878 |
12 Mar 20 |
peter |
78 |
BamPairIterator2(void); |
3878 |
12 Mar 20 |
peter |
79 |
|
3878 |
12 Mar 20 |
peter |
80 |
/** |
3878 |
12 Mar 20 |
peter |
Creates an iterator that will work on [begin, end). |
3878 |
12 Mar 20 |
peter |
82 |
|
3878 |
12 Mar 20 |
peter |
\note Input range \c [\a begin, \a end \c ) must be sorted or |
3878 |
12 Mar 20 |
peter |
behaviour is undefined. |
3878 |
12 Mar 20 |
peter |
85 |
*/ |
3878 |
12 Mar 20 |
peter |
86 |
explicit BamPairIterator2(Base begin, Base end); |
3878 |
12 Mar 20 |
peter |
87 |
|
3878 |
12 Mar 20 |
peter |
88 |
#ifndef YAT_HAVE_BOOST_ITERATOR_FACADE_PROXY_PTR |
3878 |
12 Mar 20 |
peter |
89 |
/** |
3878 |
12 Mar 20 |
peter |
This is workaround implementation of operator-> when |
3878 |
12 Mar 20 |
peter |
implementation in base class (boost) does not compile. This |
3878 |
12 Mar 20 |
peter |
implementation may be slow, so when using old boost it often |
3878 |
12 Mar 20 |
peter |
prefereble to use operator*. |
3878 |
12 Mar 20 |
peter |
94 |
*/ |
3878 |
12 Mar 20 |
peter |
95 |
typename BamPairIterator2::pointer operator->(void) const |
3878 |
12 Mar 20 |
peter |
96 |
{ |
3878 |
12 Mar 20 |
peter |
// older versions of boost mandates pointer to be |
3878 |
12 Mar 20 |
peter |
// value_type*. To accomplish that we implement this slow thing |
3878 |
12 Mar 20 |
peter |
// which keeps a copy of a value_type as member. |
3878 |
12 Mar 20 |
peter |
// See https://svn.boost.org/trac/boost/ticket/5697 for discussion. |
3878 |
12 Mar 20 |
peter |
101 |
|
3878 |
12 Mar 20 |
peter |
// Possibly stupid to assign each time, but why bother optimize |
3878 |
12 Mar 20 |
peter |
// for old bugs in boost. Users who are eager for speed, should |
3878 |
12 Mar 20 |
peter |
// either upgrade their boost or use (*iterator).function() |
3878 |
12 Mar 20 |
peter |
// rather than iterator->function(). |
3878 |
12 Mar 20 |
peter |
106 |
YAT_ASSERT(first_); |
3878 |
12 Mar 20 |
peter |
107 |
YAT_ASSERT(second_); |
3878 |
12 Mar 20 |
peter |
108 |
dummie_.first() = first_->first; |
3878 |
12 Mar 20 |
peter |
109 |
dummie_.second() = second_->second; |
3878 |
12 Mar 20 |
peter |
110 |
return &dummie_; |
3878 |
12 Mar 20 |
peter |
111 |
} |
3878 |
12 Mar 20 |
peter |
112 |
private: |
3878 |
12 Mar 20 |
peter |
113 |
mutable BamPair dummie_; |
3878 |
12 Mar 20 |
peter |
114 |
#endif |
3878 |
12 Mar 20 |
peter |
115 |
|
3878 |
12 Mar 20 |
peter |
116 |
private: |
3878 |
12 Mar 20 |
peter |
117 |
Base iter_; |
3878 |
12 Mar 20 |
peter |
118 |
Base end_; |
3878 |
12 Mar 20 |
peter |
119 |
typedef std::pair<int32_t, int32_t> Position; |
3878 |
12 Mar 20 |
peter |
// multimap with reads' position as key |
3878 |
12 Mar 20 |
peter |
121 |
typedef std::multimap<Position, BamRead> MultiMap; |
4019 |
06 Nov 20 |
peter |
122 |
std::shared_ptr<MultiMap> reads_; |
3880 |
13 Mar 20 |
peter |
123 |
MultiMap::iterator first_; |
3880 |
13 Mar 20 |
peter |
124 |
MultiMap::iterator second_; |
3878 |
12 Mar 20 |
peter |
125 |
friend class boost::iterator_core_access; |
3878 |
12 Mar 20 |
peter |
126 |
|
3878 |
12 Mar 20 |
peter |
127 |
void assign_pair(void); |
3878 |
12 Mar 20 |
peter |
128 |
const BamPairProxy dereference(void) const; |
3878 |
12 Mar 20 |
peter |
129 |
bool equal(const BamPairIterator2& other) const; |
3878 |
12 Mar 20 |
peter |
130 |
// |
3880 |
13 Mar 20 |
peter |
131 |
std::multimap<Position, BamRead>::iterator |
3878 |
12 Mar 20 |
peter |
132 |
find_mate(const yat::omic::BamRead&); |
3878 |
12 Mar 20 |
peter |
133 |
void increment(void); |
3878 |
12 Mar 20 |
peter |
134 |
void pop(void); |
3878 |
12 Mar 20 |
peter |
135 |
bool push(void); |
3878 |
12 Mar 20 |
peter |
136 |
}; |
3878 |
12 Mar 20 |
peter |
137 |
|
3878 |
12 Mar 20 |
peter |
138 |
|
3878 |
12 Mar 20 |
peter |
139 |
/** |
3878 |
12 Mar 20 |
peter |
Convenient function to create BamPairIterator |
3878 |
12 Mar 20 |
peter |
141 |
|
3878 |
12 Mar 20 |
peter |
\relates BamPairIterator |
3878 |
12 Mar 20 |
peter |
143 |
*/ |
3878 |
12 Mar 20 |
peter |
144 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
145 |
BamPairIterator2<Base> bam_pair_iterator2(Base base, Base end) |
3878 |
12 Mar 20 |
peter |
146 |
{ return BamPairIterator2<Base>(base, end); } |
3878 |
12 Mar 20 |
peter |
147 |
|
3878 |
12 Mar 20 |
peter |
148 |
|
3878 |
12 Mar 20 |
peter |
149 |
/** |
3878 |
12 Mar 20 |
peter |
Convenient function to create BamPairIterator that works on empty |
3878 |
12 Mar 20 |
peter |
range. Equivalent to bam_pair_iterator(end, end). |
3878 |
12 Mar 20 |
peter |
152 |
|
3878 |
12 Mar 20 |
peter |
\relates BamPairIterator |
3878 |
12 Mar 20 |
peter |
154 |
*/ |
3878 |
12 Mar 20 |
peter |
155 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
156 |
BamPairIterator2<Base> bam_pair_iterator2(Base end) |
3878 |
12 Mar 20 |
peter |
157 |
{ return BamPairIterator2<Base>(end, end); } |
3878 |
12 Mar 20 |
peter |
158 |
|
3878 |
12 Mar 20 |
peter |
159 |
|
3878 |
12 Mar 20 |
peter |
// template implementations |
3878 |
12 Mar 20 |
peter |
161 |
/////////////////////////////////////////////////////////////// |
3878 |
12 Mar 20 |
peter |
162 |
|
3878 |
12 Mar 20 |
peter |
163 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
164 |
BamPairIterator2<Base>::BamPairIterator2(Base base, Base end) |
3878 |
12 Mar 20 |
peter |
165 |
: iter_(base), end_(end), |
3878 |
12 Mar 20 |
peter |
166 |
reads_(new std::multimap<Position, BamRead>), |
3878 |
12 Mar 20 |
peter |
167 |
first_(reads_->end()), second_(reads_->end()) |
3878 |
12 Mar 20 |
peter |
168 |
{ |
3878 |
12 Mar 20 |
peter |
169 |
BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Base>)); |
3878 |
12 Mar 20 |
peter |
170 |
BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Base>)); |
3878 |
12 Mar 20 |
peter |
171 |
using boost::Convertible; |
3878 |
12 Mar 20 |
peter |
172 |
typedef typename std::iterator_traits<Base>::value_type value_type; |
3878 |
12 Mar 20 |
peter |
173 |
BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>)); |
3878 |
12 Mar 20 |
peter |
174 |
assign_pair(); |
3878 |
12 Mar 20 |
peter |
175 |
} |
3878 |
12 Mar 20 |
peter |
176 |
|
3878 |
12 Mar 20 |
peter |
177 |
|
3878 |
12 Mar 20 |
peter |
178 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
179 |
BamPairIterator2<Base>::BamPairIterator2(void) |
3878 |
12 Mar 20 |
peter |
180 |
{ |
3878 |
12 Mar 20 |
peter |
181 |
BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>)); |
3878 |
12 Mar 20 |
peter |
182 |
using boost::Convertible; |
3878 |
12 Mar 20 |
peter |
183 |
typedef typename std::iterator_traits<Base>::value_type value_type; |
3878 |
12 Mar 20 |
peter |
184 |
BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>)); |
3878 |
12 Mar 20 |
peter |
185 |
} |
3878 |
12 Mar 20 |
peter |
186 |
|
3878 |
12 Mar 20 |
peter |
187 |
|
3878 |
12 Mar 20 |
peter |
188 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
189 |
void BamPairIterator2<Base>::assign_pair(void) |
3878 |
12 Mar 20 |
peter |
190 |
{ |
3878 |
12 Mar 20 |
peter |
191 |
while (true) { |
3878 |
12 Mar 20 |
peter |
192 |
if (reads_->empty()) |
3878 |
12 Mar 20 |
peter |
193 |
if (!push()) // if no reads, we are done |
3878 |
12 Mar 20 |
peter |
194 |
return; |
3878 |
12 Mar 20 |
peter |
195 |
YAT_ASSERT(reads_->size()); |
3878 |
12 Mar 20 |
peter |
196 |
first_ = reads_->begin(); |
3878 |
12 Mar 20 |
peter |
197 |
second_ = find_mate(first_->second); |
3878 |
12 Mar 20 |
peter |
198 |
if (second_ == reads_->end()) |
3878 |
12 Mar 20 |
peter |
199 |
pop(); |
3878 |
12 Mar 20 |
peter |
200 |
else |
3878 |
12 Mar 20 |
peter |
201 |
break; |
3878 |
12 Mar 20 |
peter |
202 |
} |
3878 |
12 Mar 20 |
peter |
203 |
} |
3878 |
12 Mar 20 |
peter |
204 |
|
3878 |
12 Mar 20 |
peter |
205 |
|
3878 |
12 Mar 20 |
peter |
206 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
207 |
const BamPairProxy |
3878 |
12 Mar 20 |
peter |
208 |
BamPairIterator2<Base>::dereference(void) const |
3878 |
12 Mar 20 |
peter |
209 |
{ |
3878 |
12 Mar 20 |
peter |
210 |
return BamPairProxy(&first_->second, &second_->second); |
3878 |
12 Mar 20 |
peter |
211 |
} |
3878 |
12 Mar 20 |
peter |
212 |
|
3878 |
12 Mar 20 |
peter |
213 |
|
3878 |
12 Mar 20 |
peter |
214 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
215 |
bool BamPairIterator2<Base>::equal(const BamPairIterator2& other) const |
3878 |
12 Mar 20 |
peter |
216 |
{ |
3878 |
12 Mar 20 |
peter |
217 |
return iter_ == other.iter_ && reads_->size() == other.reads_->size(); |
3878 |
12 Mar 20 |
peter |
218 |
} |
3878 |
12 Mar 20 |
peter |
219 |
|
3878 |
12 Mar 20 |
peter |
220 |
|
3878 |
12 Mar 20 |
peter |
221 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
222 |
void BamPairIterator2<Base>::increment(void) |
3878 |
12 Mar 20 |
peter |
223 |
{ |
3878 |
12 Mar 20 |
peter |
224 |
pop(); |
3878 |
12 Mar 20 |
peter |
225 |
assign_pair(); |
3878 |
12 Mar 20 |
peter |
226 |
} |
3878 |
12 Mar 20 |
peter |
227 |
|
3878 |
12 Mar 20 |
peter |
228 |
|
3878 |
12 Mar 20 |
peter |
229 |
template<typename Base> |
3880 |
13 Mar 20 |
peter |
230 |
BamPairIterator2<Base>::MultiMap::iterator |
3878 |
12 Mar 20 |
peter |
231 |
BamPairIterator2<Base>::find_mate(const yat::omic::BamRead& bam) |
3878 |
12 Mar 20 |
peter |
232 |
{ |
3878 |
12 Mar 20 |
peter |
233 |
Position mate_pos(bam.mtid(), bam.mpos()); |
3878 |
12 Mar 20 |
peter |
234 |
YAT_ASSERT(reads_->size()); |
3878 |
12 Mar 20 |
peter |
// push in reads, ensure all reads with pos==mate_pos has been |
3878 |
12 Mar 20 |
peter |
// pushed in by pushing until pos of last one is one-past mate_pos |
3878 |
12 Mar 20 |
peter |
237 |
while (reads_->rbegin()->first <= mate_pos && iter_!=end_) |
3878 |
12 Mar 20 |
peter |
238 |
push(); |
3878 |
12 Mar 20 |
peter |
239 |
|
3880 |
13 Mar 20 |
peter |
240 |
typedef MultiMap::iterator iterator; |
3878 |
12 Mar 20 |
peter |
241 |
std::pair<iterator, iterator> range = reads_->equal_range(mate_pos); |
3878 |
12 Mar 20 |
peter |
242 |
for (; range.first != range.second; ++range.first) { |
3878 |
12 Mar 20 |
peter |
243 |
if (!same_query_name(bam, range.first->second)) |
3878 |
12 Mar 20 |
peter |
244 |
continue; |
3878 |
12 Mar 20 |
peter |
// for siamese pair avoid pairing up with self |
3878 |
12 Mar 20 |
peter |
246 |
if (bam.pos()==bam.mpos() && bam.tid()==bam.mtid() && |
3878 |
12 Mar 20 |
peter |
247 |
bam.flag() == range.first->second.flag()) |
3878 |
12 Mar 20 |
peter |
248 |
continue; |
3878 |
12 Mar 20 |
peter |
249 |
return range.first; |
3878 |
12 Mar 20 |
peter |
250 |
} |
3878 |
12 Mar 20 |
peter |
251 |
return reads_->end(); |
3878 |
12 Mar 20 |
peter |
252 |
} |
3878 |
12 Mar 20 |
peter |
253 |
|
3878 |
12 Mar 20 |
peter |
254 |
|
3878 |
12 Mar 20 |
peter |
255 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
256 |
void BamPairIterator2<Base>::pop(void) |
3878 |
12 Mar 20 |
peter |
257 |
{ |
3878 |
12 Mar 20 |
peter |
258 |
YAT_ASSERT(reads_->size()); |
3878 |
12 Mar 20 |
peter |
259 |
reads_->erase(first_); |
3878 |
12 Mar 20 |
peter |
260 |
} |
3878 |
12 Mar 20 |
peter |
261 |
|
3878 |
12 Mar 20 |
peter |
262 |
|
3878 |
12 Mar 20 |
peter |
263 |
template<typename Base> |
3878 |
12 Mar 20 |
peter |
264 |
bool BamPairIterator2<Base>::push(void) |
3878 |
12 Mar 20 |
peter |
265 |
{ |
3878 |
12 Mar 20 |
peter |
266 |
if (iter_ == end_) |
3878 |
12 Mar 20 |
peter |
267 |
return false; |
3878 |
12 Mar 20 |
peter |
268 |
Position position(iter_->tid(), iter_->pos()); |
3878 |
12 Mar 20 |
peter |
269 |
reads_->insert(reads_->end(), std::make_pair(position, *iter_)); |
3878 |
12 Mar 20 |
peter |
270 |
++iter_; |
3878 |
12 Mar 20 |
peter |
271 |
return true; |
3878 |
12 Mar 20 |
peter |
272 |
} |
3878 |
12 Mar 20 |
peter |
273 |
|
3878 |
12 Mar 20 |
peter |
274 |
}}} |
3878 |
12 Mar 20 |
peter |
275 |
#endif |