3881 |
16 Mar 20 |
peter |
1 |
#ifndef _theplu_yat_omic_bam_pair_iterator_ |
3881 |
16 Mar 20 |
peter |
2 |
#define _theplu_yat_omic_bam_pair_iterator_ |
3173 |
08 Mar 14 |
peter |
3 |
|
3173 |
08 Mar 14 |
peter |
// $Id$ |
3173 |
08 Mar 14 |
peter |
5 |
|
3173 |
08 Mar 14 |
peter |
6 |
/* |
4089 |
07 Sep 21 |
peter |
Copyright (C) 2014, 2016, 2018, 2020, 2021 Peter Johansson |
3173 |
08 Mar 14 |
peter |
8 |
|
3173 |
08 Mar 14 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
3173 |
08 Mar 14 |
peter |
10 |
|
3173 |
08 Mar 14 |
peter |
The yat library is free software; you can redistribute it and/or |
3173 |
08 Mar 14 |
peter |
modify it under the terms of the GNU General Public License as |
3173 |
08 Mar 14 |
peter |
published by the Free Software Foundation; either version 3 of the |
3173 |
08 Mar 14 |
peter |
License, or (at your option) any later version. |
3173 |
08 Mar 14 |
peter |
15 |
|
3173 |
08 Mar 14 |
peter |
The yat library is distributed in the hope that it will be useful, |
3173 |
08 Mar 14 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
3173 |
08 Mar 14 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3173 |
08 Mar 14 |
peter |
General Public License for more details. |
3173 |
08 Mar 14 |
peter |
20 |
|
3173 |
08 Mar 14 |
peter |
You should have received a copy of the GNU General Public License |
3752 |
17 Oct 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
3173 |
08 Mar 14 |
peter |
23 |
*/ |
3173 |
08 Mar 14 |
peter |
24 |
|
3175 |
15 Mar 14 |
peter |
25 |
#include "BamPair.h" |
3173 |
08 Mar 14 |
peter |
26 |
#include "BamRead.h" |
3173 |
08 Mar 14 |
peter |
27 |
|
3198 |
29 Apr 14 |
peter |
28 |
#include <yat/utility/config_public.h> |
3197 |
29 Apr 14 |
peter |
29 |
#include <yat/utility/yat_assert.h> |
3197 |
29 Apr 14 |
peter |
30 |
|
3173 |
08 Mar 14 |
peter |
31 |
#include <boost/concept/assert.hpp> |
3295 |
25 Jul 14 |
peter |
32 |
#include <boost/iterator/iterator_concepts.hpp> |
3173 |
08 Mar 14 |
peter |
33 |
#include <boost/iterator/iterator_facade.hpp> |
3173 |
08 Mar 14 |
peter |
34 |
|
3173 |
08 Mar 14 |
peter |
35 |
#include <iterator> |
3173 |
08 Mar 14 |
peter |
36 |
#include <map> |
4019 |
06 Nov 20 |
peter |
37 |
#include <memory> |
3173 |
08 Mar 14 |
peter |
38 |
#include <utility> |
3173 |
08 Mar 14 |
peter |
39 |
|
3173 |
08 Mar 14 |
peter |
40 |
namespace theplu { |
3173 |
08 Mar 14 |
peter |
41 |
namespace yat { |
3173 |
08 Mar 14 |
peter |
42 |
namespace omic { |
3173 |
08 Mar 14 |
peter |
43 |
|
3173 |
08 Mar 14 |
peter |
44 |
/** |
3173 |
08 Mar 14 |
peter |
\since New in yat 0.12 |
3173 |
08 Mar 14 |
peter |
46 |
|
3517 |
16 Aug 16 |
peter |
Type Requirments: |
3517 |
16 Aug 16 |
peter |
- \c Base is a \readable_iterator |
3517 |
16 Aug 16 |
peter |
- \c Base is a \single_pass_iterator |
3517 |
16 Aug 16 |
peter |
- \c value_type is BamRead |
3173 |
08 Mar 14 |
peter |
51 |
|
3520 |
05 Oct 16 |
peter |
This iterator works on a sorted range [begin, end) and provides a |
4056 |
31 Mar 21 |
peter |
convenient way to access the pairs rather than the reads |
3173 |
08 Mar 14 |
peter |
individually. When BamPairIterator is incremented, rather than |
3173 |
08 Mar 14 |
peter |
just step to next element in [begin, end), BamPairIterator keeps |
3173 |
08 Mar 14 |
peter |
stepping until it finds a read whose mate it has already seen, and |
3173 |
08 Mar 14 |
peter |
operator* return the pair of BamReads. |
3174 |
08 Mar 14 |
peter |
58 |
|
4056 |
31 Mar 21 |
peter |
The resulting iterator is sorted with respect to \c second(). |
4056 |
31 Mar 21 |
peter |
60 |
|
3174 |
08 Mar 14 |
peter |
Note that BamPairIterator is a single-pass iterator, i.e., once it |
3174 |
08 Mar 14 |
peter |
is incremented the behaviour of its copies is undefined. |
3173 |
08 Mar 14 |
peter |
63 |
*/ |
3173 |
08 Mar 14 |
peter |
64 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
65 |
class BamPairIterator |
3173 |
08 Mar 14 |
peter |
66 |
: public boost::iterator_facade< |
3176 |
15 Mar 14 |
peter |
67 |
BamPairIterator<Base>, const BamPair, std::input_iterator_tag, |
3176 |
15 Mar 14 |
peter |
68 |
const BamPairProxy |
3173 |
08 Mar 14 |
peter |
69 |
> |
3173 |
08 Mar 14 |
peter |
70 |
{ |
3173 |
08 Mar 14 |
peter |
71 |
public: |
3173 |
08 Mar 14 |
peter |
72 |
/** |
3173 |
08 Mar 14 |
peter |
\brief default constructor |
3173 |
08 Mar 14 |
peter |
74 |
*/ |
3173 |
08 Mar 14 |
peter |
75 |
BamPairIterator(void); |
3173 |
08 Mar 14 |
peter |
76 |
|
3173 |
08 Mar 14 |
peter |
77 |
/** |
3173 |
08 Mar 14 |
peter |
Creates an iterator that will work on [begin, end). |
3173 |
08 Mar 14 |
peter |
79 |
|
3173 |
08 Mar 14 |
peter |
\see bam_pair_iterator |
3173 |
08 Mar 14 |
peter |
81 |
|
3173 |
08 Mar 14 |
peter |
\note Input range \c [\a first, \a last \c ) must be sorted or |
3173 |
08 Mar 14 |
peter |
behaviour is undefined. |
3173 |
08 Mar 14 |
peter |
84 |
*/ |
3173 |
08 Mar 14 |
peter |
85 |
explicit BamPairIterator(Base begin, Base end); |
3197 |
29 Apr 14 |
peter |
86 |
|
3198 |
29 Apr 14 |
peter |
87 |
#ifndef YAT_HAVE_BOOST_ITERATOR_FACADE_PROXY_PTR |
3197 |
29 Apr 14 |
peter |
88 |
/** |
3197 |
29 Apr 14 |
peter |
This is workaround implementation of operator-> when |
3197 |
29 Apr 14 |
peter |
implementation in base class (boost) does not compile. This |
3197 |
29 Apr 14 |
peter |
implementation may be slow, so when using old boost it often |
3197 |
29 Apr 14 |
peter |
prefereble to use operator*. |
3197 |
29 Apr 14 |
peter |
93 |
*/ |
3197 |
29 Apr 14 |
peter |
94 |
typename BamPairIterator::pointer operator->(void) const |
3197 |
29 Apr 14 |
peter |
95 |
{ |
3197 |
29 Apr 14 |
peter |
// older versions of boost mandates pointer to be |
3197 |
29 Apr 14 |
peter |
// value_type*. To accomplish that we implement this slow thing |
3197 |
29 Apr 14 |
peter |
// which keeps a copy of a value_type as member. |
3197 |
29 Apr 14 |
peter |
// See https://svn.boost.org/trac/boost/ticket/5697 for discussion. |
3197 |
29 Apr 14 |
peter |
100 |
|
3197 |
29 Apr 14 |
peter |
// Possibly stupid to assign each time, but why bother optimize |
3197 |
29 Apr 14 |
peter |
// for old bugs in boost. Users who are eager for speed, should |
3197 |
29 Apr 14 |
peter |
// either upgrade their boost or use (*iterator).function() |
3197 |
29 Apr 14 |
peter |
// rather than iterator->function(). |
3197 |
29 Apr 14 |
peter |
105 |
YAT_ASSERT(mate_); |
3197 |
29 Apr 14 |
peter |
106 |
YAT_ASSERT(iter_ != end_); |
3197 |
29 Apr 14 |
peter |
107 |
dummie_.first() = *mate_; |
3197 |
29 Apr 14 |
peter |
108 |
dummie_.second() = *iter_; |
3197 |
29 Apr 14 |
peter |
109 |
return &dummie_; |
3197 |
29 Apr 14 |
peter |
110 |
} |
3173 |
08 Mar 14 |
peter |
111 |
private: |
3197 |
29 Apr 14 |
peter |
112 |
mutable BamPair dummie_; |
3197 |
29 Apr 14 |
peter |
113 |
#endif |
3197 |
29 Apr 14 |
peter |
114 |
|
3197 |
29 Apr 14 |
peter |
115 |
private: |
3173 |
08 Mar 14 |
peter |
116 |
Base iter_; |
3173 |
08 Mar 14 |
peter |
117 |
Base end_; |
3176 |
15 Mar 14 |
peter |
118 |
const BamRead* mate_; |
4019 |
06 Nov 20 |
peter |
119 |
std::shared_ptr<std::map<std::string, BamRead> > siam_reads_; |
3173 |
08 Mar 14 |
peter |
120 |
typedef std::pair<int32_t, int32_t> Position; |
4019 |
06 Nov 20 |
peter |
121 |
std::shared_ptr<std::multimap<Position, BamRead> > reads_; |
3173 |
08 Mar 14 |
peter |
122 |
friend class boost::iterator_core_access; |
3173 |
08 Mar 14 |
peter |
123 |
|
3176 |
15 Mar 14 |
peter |
124 |
const BamPairProxy dereference(void) const; |
3173 |
08 Mar 14 |
peter |
125 |
bool equal(const BamPairIterator& other) const; |
3173 |
08 Mar 14 |
peter |
126 |
void increment(void); |
3173 |
08 Mar 14 |
peter |
127 |
void find_next(void); |
3173 |
08 Mar 14 |
peter |
128 |
}; |
3173 |
08 Mar 14 |
peter |
129 |
|
3173 |
08 Mar 14 |
peter |
130 |
|
3173 |
08 Mar 14 |
peter |
131 |
/** |
3173 |
08 Mar 14 |
peter |
Convenient function to create BamPairIterator |
3173 |
08 Mar 14 |
peter |
133 |
|
3173 |
08 Mar 14 |
peter |
\relates BamPairIterator |
3173 |
08 Mar 14 |
peter |
135 |
*/ |
3173 |
08 Mar 14 |
peter |
136 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
137 |
BamPairIterator<Base> bam_pair_iterator(Base base, Base end) |
3173 |
08 Mar 14 |
peter |
138 |
{ return BamPairIterator<Base>(base, end); } |
3173 |
08 Mar 14 |
peter |
139 |
|
3173 |
08 Mar 14 |
peter |
140 |
|
3173 |
08 Mar 14 |
peter |
141 |
/** |
3173 |
08 Mar 14 |
peter |
Convenient function to create BamPairIterator that works on empty |
3173 |
08 Mar 14 |
peter |
range. Equivalent to bam_pair_iterator(end, end). |
3173 |
08 Mar 14 |
peter |
144 |
|
3173 |
08 Mar 14 |
peter |
\relates BamPairIterator |
3173 |
08 Mar 14 |
peter |
146 |
*/ |
3173 |
08 Mar 14 |
peter |
147 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
148 |
BamPairIterator<Base> bam_pair_iterator(Base end) |
3173 |
08 Mar 14 |
peter |
149 |
{ return BamPairIterator<Base>(end, end); } |
3173 |
08 Mar 14 |
peter |
150 |
|
3173 |
08 Mar 14 |
peter |
151 |
|
3173 |
08 Mar 14 |
peter |
// template implementations |
3173 |
08 Mar 14 |
peter |
153 |
/////////////////////////////////////////////////////////////// |
3173 |
08 Mar 14 |
peter |
154 |
|
3173 |
08 Mar 14 |
peter |
155 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
156 |
BamPairIterator<Base>::BamPairIterator(Base base, Base end) |
3174 |
08 Mar 14 |
peter |
157 |
: iter_(base), end_(end), |
3174 |
08 Mar 14 |
peter |
158 |
siam_reads_(new std::map<std::string, BamRead>), |
3174 |
08 Mar 14 |
peter |
159 |
reads_(new std::multimap<Position, BamRead>) |
3173 |
08 Mar 14 |
peter |
160 |
{ |
3517 |
16 Aug 16 |
peter |
161 |
BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Base>)); |
3517 |
16 Aug 16 |
peter |
162 |
BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Base>)); |
3173 |
08 Mar 14 |
peter |
163 |
using boost::Convertible; |
3295 |
25 Jul 14 |
peter |
164 |
typedef typename std::iterator_traits<Base>::value_type value_type; |
3295 |
25 Jul 14 |
peter |
165 |
BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>)); |
3173 |
08 Mar 14 |
peter |
166 |
find_next(); |
3173 |
08 Mar 14 |
peter |
167 |
} |
3173 |
08 Mar 14 |
peter |
168 |
|
3173 |
08 Mar 14 |
peter |
169 |
|
3173 |
08 Mar 14 |
peter |
170 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
171 |
BamPairIterator<Base>::BamPairIterator(void) |
3173 |
08 Mar 14 |
peter |
172 |
{ |
3173 |
08 Mar 14 |
peter |
173 |
BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>)); |
3173 |
08 Mar 14 |
peter |
174 |
using boost::Convertible; |
3295 |
25 Jul 14 |
peter |
175 |
typedef typename std::iterator_traits<Base>::value_type value_type; |
3295 |
25 Jul 14 |
peter |
176 |
BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>)); |
3173 |
08 Mar 14 |
peter |
177 |
} |
3173 |
08 Mar 14 |
peter |
178 |
|
3173 |
08 Mar 14 |
peter |
179 |
|
3173 |
08 Mar 14 |
peter |
180 |
template<typename Base> |
3176 |
15 Mar 14 |
peter |
181 |
const BamPairProxy |
3173 |
08 Mar 14 |
peter |
182 |
BamPairIterator<Base>::dereference(void) const |
3173 |
08 Mar 14 |
peter |
183 |
{ |
3176 |
15 Mar 14 |
peter |
184 |
return BamPairProxy(mate_, &*iter_); |
3173 |
08 Mar 14 |
peter |
185 |
} |
3173 |
08 Mar 14 |
peter |
186 |
|
3173 |
08 Mar 14 |
peter |
187 |
|
3173 |
08 Mar 14 |
peter |
188 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
189 |
bool BamPairIterator<Base>::equal(const BamPairIterator& other) const |
3173 |
08 Mar 14 |
peter |
190 |
{ |
3173 |
08 Mar 14 |
peter |
191 |
return iter_ == other.iter_; |
3173 |
08 Mar 14 |
peter |
192 |
} |
3173 |
08 Mar 14 |
peter |
193 |
|
3173 |
08 Mar 14 |
peter |
194 |
|
3173 |
08 Mar 14 |
peter |
195 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
196 |
void BamPairIterator<Base>::increment(void) |
3173 |
08 Mar 14 |
peter |
197 |
{ |
3173 |
08 Mar 14 |
peter |
198 |
++iter_; |
3173 |
08 Mar 14 |
peter |
199 |
find_next(); |
3173 |
08 Mar 14 |
peter |
200 |
} |
3173 |
08 Mar 14 |
peter |
201 |
|
3173 |
08 Mar 14 |
peter |
202 |
|
3173 |
08 Mar 14 |
peter |
203 |
template<typename Base> |
3173 |
08 Mar 14 |
peter |
204 |
void BamPairIterator<Base>::find_next(void) |
3173 |
08 Mar 14 |
peter |
205 |
{ |
3173 |
08 Mar 14 |
peter |
206 |
BamLessPos less; |
3173 |
08 Mar 14 |
peter |
207 |
for (; iter_!=end_; ++iter_) { |
3173 |
08 Mar 14 |
peter |
208 |
Position position(iter_->tid(), iter_->pos()); |
3173 |
08 Mar 14 |
peter |
209 |
Position mate_position(iter_->mtid(), iter_->mpos()); |
3173 |
08 Mar 14 |
peter |
// clear siam reads if iter is more advanced than siam reads |
3174 |
08 Mar 14 |
peter |
211 |
if (siam_reads_->size() && less(siam_reads_->begin()->second, *iter_)) |
3174 |
08 Mar 14 |
peter |
212 |
siam_reads_->clear(); |
3173 |
08 Mar 14 |
peter |
213 |
|
3173 |
08 Mar 14 |
peter |
// have not seen mate yet |
3173 |
08 Mar 14 |
peter |
215 |
if (mate_position > position) |
3174 |
08 Mar 14 |
peter |
216 |
reads_->insert(std::make_pair(mate_position, *iter_)); |
3173 |
08 Mar 14 |
peter |
217 |
else if (position > mate_position) { |
3173 |
08 Mar 14 |
peter |
218 |
std::multimap<Position, BamRead>::iterator |
3174 |
08 Mar 14 |
peter |
219 |
lower = reads_->lower_bound(position); |
3173 |
08 Mar 14 |
peter |
// erase all reads with mate position less than current |
3173 |
08 Mar 14 |
peter |
// position (assuming input range is sorted) |
3174 |
08 Mar 14 |
peter |
222 |
reads_->erase(reads_->begin(), lower); |
3173 |
08 Mar 14 |
peter |
223 |
|
3173 |
08 Mar 14 |
peter |
// find mate and assign pair |
3174 |
08 Mar 14 |
peter |
225 |
for (; lower!=reads_->end() && lower->first == position; ++lower) |
3173 |
08 Mar 14 |
peter |
226 |
if (same_query_name(lower->second, *iter_)) { |
3176 |
15 Mar 14 |
peter |
227 |
mate_ = &lower->second; |
3173 |
08 Mar 14 |
peter |
228 |
return; |
3173 |
08 Mar 14 |
peter |
229 |
} |
3173 |
08 Mar 14 |
peter |
230 |
} |
3173 |
08 Mar 14 |
peter |
231 |
else { // siam read, i.e., iter_ and mate have same position |
3173 |
08 Mar 14 |
peter |
// check if we have already seen mate and stored it in map |
3173 |
08 Mar 14 |
peter |
233 |
std::map<std::string, BamRead>::iterator |
3174 |
08 Mar 14 |
peter |
234 |
mate = siam_reads_->lower_bound(iter_->name()); |
3174 |
08 Mar 14 |
peter |
235 |
if (mate!=siam_reads_->end() && same_query_name(mate->second, *iter_)) { |
3176 |
15 Mar 14 |
peter |
236 |
mate_ = &mate->second; |
3173 |
08 Mar 14 |
peter |
237 |
return; |
3173 |
08 Mar 14 |
peter |
238 |
} |
3173 |
08 Mar 14 |
peter |
239 |
else |
3173 |
08 Mar 14 |
peter |
// insert with hint |
3174 |
08 Mar 14 |
peter |
241 |
siam_reads_->insert(mate, std::make_pair(iter_->name(), *iter_)); |
3173 |
08 Mar 14 |
peter |
242 |
} |
3173 |
08 Mar 14 |
peter |
243 |
} |
3173 |
08 Mar 14 |
peter |
244 |
} |
3173 |
08 Mar 14 |
peter |
245 |
|
3173 |
08 Mar 14 |
peter |
246 |
}}} |
3173 |
08 Mar 14 |
peter |
247 |
#endif |