2883 |
03 Dec 12 |
peter |
1 |
#ifndef theplu_yat_omic_bam_read_iterator |
2883 |
03 Dec 12 |
peter |
2 |
#define theplu_yat_omic_bam_read_iterator |
2883 |
03 Dec 12 |
peter |
3 |
|
2883 |
03 Dec 12 |
peter |
// $Id$ |
2883 |
03 Dec 12 |
peter |
5 |
|
2993 |
03 Mar 13 |
peter |
6 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2012, 2013, 2014, 2017, 2018, 2020, 2023 Peter Johansson |
2993 |
03 Mar 13 |
peter |
8 |
|
2993 |
03 Mar 13 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2993 |
03 Mar 13 |
peter |
10 |
|
2993 |
03 Mar 13 |
peter |
The yat library is free software; you can redistribute it and/or |
2993 |
03 Mar 13 |
peter |
modify it under the terms of the GNU General Public License as |
2993 |
03 Mar 13 |
peter |
published by the Free Software Foundation; either version 3 of the |
2993 |
03 Mar 13 |
peter |
License, or (at your option) any later version. |
2993 |
03 Mar 13 |
peter |
15 |
|
2993 |
03 Mar 13 |
peter |
The yat library is distributed in the hope that it will be useful, |
2993 |
03 Mar 13 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2993 |
03 Mar 13 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2993 |
03 Mar 13 |
peter |
General Public License for more details. |
2993 |
03 Mar 13 |
peter |
20 |
|
2993 |
03 Mar 13 |
peter |
You should have received a copy of the GNU General Public License |
3752 |
17 Oct 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2993 |
03 Mar 13 |
peter |
23 |
*/ |
2993 |
03 Mar 13 |
peter |
24 |
|
2883 |
03 Dec 12 |
peter |
25 |
#include "BamFile.h" |
2883 |
03 Dec 12 |
peter |
26 |
#include "BamRead.h" |
2883 |
03 Dec 12 |
peter |
27 |
|
3883 |
24 Mar 20 |
peter |
28 |
#include <htslib/sam.h> |
2883 |
03 Dec 12 |
peter |
29 |
|
2883 |
03 Dec 12 |
peter |
30 |
#include <boost/iterator/iterator_facade.hpp> |
2883 |
03 Dec 12 |
peter |
31 |
|
2883 |
03 Dec 12 |
peter |
32 |
#include <iterator> |
4011 |
22 Oct 20 |
peter |
33 |
#include <memory> |
4272 |
24 Jan 23 |
peter |
34 |
#include <vector> |
2883 |
03 Dec 12 |
peter |
35 |
|
2883 |
03 Dec 12 |
peter |
36 |
namespace theplu { |
2883 |
03 Dec 12 |
peter |
37 |
namespace yat { |
2883 |
03 Dec 12 |
peter |
38 |
namespace omic { |
2883 |
03 Dec 12 |
peter |
39 |
|
2883 |
03 Dec 12 |
peter |
// forward declaration |
4272 |
24 Jan 23 |
peter |
41 |
class BamRegion; |
2883 |
03 Dec 12 |
peter |
42 |
class InBamFile; |
2883 |
03 Dec 12 |
peter |
43 |
|
2883 |
03 Dec 12 |
peter |
44 |
/** |
2883 |
03 Dec 12 |
peter |
\brief class to iterate through a InBamFile |
2883 |
03 Dec 12 |
peter |
46 |
|
2883 |
03 Dec 12 |
peter |
Iterator is a single pass \input_iterator which means iterator |
2986 |
18 Feb 13 |
peter |
can only be used read bam file. If creating two iterators working |
2986 |
18 Feb 13 |
peter |
on the same InBamFile, the iterators are not independent of each |
2986 |
18 Feb 13 |
peter |
other. Incrementing one, the behaviour of the other is undefined. |
2883 |
03 Dec 12 |
peter |
51 |
|
2883 |
03 Dec 12 |
peter |
Iterator can either be constructed to iterate over entire bam |
2883 |
03 Dec 12 |
peter |
file or if a region is specified only over that region. When no |
2883 |
03 Dec 12 |
peter |
more elements are available, iterator turns into a end-of-file |
2883 |
03 Dec 12 |
peter |
iterator and is no longer dereferencable. |
2883 |
03 Dec 12 |
peter |
56 |
|
2883 |
03 Dec 12 |
peter |
\see InBamFile |
2884 |
04 Dec 12 |
peter |
58 |
|
2884 |
04 Dec 12 |
peter |
\since New in yat 0.10 |
2883 |
03 Dec 12 |
peter |
60 |
*/ |
2883 |
03 Dec 12 |
peter |
61 |
class BamReadIterator |
2883 |
03 Dec 12 |
peter |
62 |
: public boost::iterator_facade< |
2883 |
03 Dec 12 |
peter |
63 |
BamReadIterator, const BamRead, std::input_iterator_tag |
2883 |
03 Dec 12 |
peter |
64 |
> |
2883 |
03 Dec 12 |
peter |
65 |
{ |
2883 |
03 Dec 12 |
peter |
66 |
public: |
2883 |
03 Dec 12 |
peter |
67 |
/** |
2883 |
03 Dec 12 |
peter |
\brief Contructs end of file iterator. |
2883 |
03 Dec 12 |
peter |
69 |
*/ |
2883 |
03 Dec 12 |
peter |
70 |
BamReadIterator(void); |
2883 |
03 Dec 12 |
peter |
71 |
|
2883 |
03 Dec 12 |
peter |
72 |
/** |
2883 |
03 Dec 12 |
peter |
Creates an iterator pointing to first BamRead in |
2883 |
03 Dec 12 |
peter |
InBamFile. |
2883 |
03 Dec 12 |
peter |
75 |
|
2883 |
03 Dec 12 |
peter |
\param in file to iterate over |
2883 |
03 Dec 12 |
peter |
77 |
*/ |
2883 |
03 Dec 12 |
peter |
78 |
explicit BamReadIterator(InBamFile& in); |
2883 |
03 Dec 12 |
peter |
79 |
|
2883 |
03 Dec 12 |
peter |
80 |
/** |
2883 |
03 Dec 12 |
peter |
Iterator that walks over specified region. Iterator iterates |
2883 |
03 Dec 12 |
peter |
over all BamReads in file \a in that overlap with specified |
2883 |
03 Dec 12 |
peter |
region and then it turns into an end-of-region iterator. |
2883 |
03 Dec 12 |
peter |
84 |
|
2883 |
03 Dec 12 |
peter |
This function requires an index file is available, see |
2883 |
03 Dec 12 |
peter |
InBamFile::index(). |
2883 |
03 Dec 12 |
peter |
87 |
|
2896 |
10 Dec 12 |
peter |
All reads iterated over overlap with defined region, in other |
2896 |
10 Dec 12 |
peter |
words, iter->tid == \a tid; iter->pos() < \a end and iter->end() |
2896 |
10 Dec 12 |
peter |
> start |
2896 |
10 Dec 12 |
peter |
91 |
|
2883 |
03 Dec 12 |
peter |
\param in file to iterate over |
2883 |
03 Dec 12 |
peter |
\param tid segment (chromosome) to iterate over |
2883 |
03 Dec 12 |
peter |
\param start left most position of specified region |
2883 |
03 Dec 12 |
peter |
\param end right most position of specified region |
4273 |
24 Jan 23 |
peter |
96 |
|
4273 |
24 Jan 23 |
peter |
\see For use with large chromosomes for with \c int32_t is not |
4273 |
24 Jan 23 |
peter |
sufficient, see BamReadIterator(InBamFile&, const BamRegion&); |
2883 |
03 Dec 12 |
peter |
99 |
*/ |
2883 |
03 Dec 12 |
peter |
100 |
BamReadIterator(InBamFile& in, int32_t tid, int32_t start, int32_t end); |
3672 |
31 Jul 17 |
peter |
101 |
|
3672 |
31 Jul 17 |
peter |
102 |
/** |
3672 |
31 Jul 17 |
peter |
Iterator that walks from tid:start to end of chromosome and |
3672 |
31 Jul 17 |
peter |
then it turns into an end-of-region iterator. |
3672 |
31 Jul 17 |
peter |
105 |
|
3672 |
31 Jul 17 |
peter |
This function requires an index file is available, see |
3672 |
31 Jul 17 |
peter |
InBamFile::index(). |
3672 |
31 Jul 17 |
peter |
108 |
|
3672 |
31 Jul 17 |
peter |
All reads iterated over overlap with defined region, in other |
3672 |
31 Jul 17 |
peter |
words, iter->tid == \a tid; iter->end() > start |
3672 |
31 Jul 17 |
peter |
111 |
|
3672 |
31 Jul 17 |
peter |
\param in file to iterate over |
3672 |
31 Jul 17 |
peter |
\param tid segment (chromosome) to iterate over |
3672 |
31 Jul 17 |
peter |
\param start left most position of specified region |
3672 |
31 Jul 17 |
peter |
115 |
|
3672 |
31 Jul 17 |
peter |
\since New in yat 0.15 |
4273 |
24 Jan 23 |
peter |
117 |
|
4273 |
24 Jan 23 |
peter |
\see For use with large chromosomes for with \c int32_t is not |
4273 |
24 Jan 23 |
peter |
sufficient, see BamReadIterator(InBamFile&, const BamRegion&); |
3672 |
31 Jul 17 |
peter |
120 |
*/ |
3672 |
31 Jul 17 |
peter |
121 |
BamReadIterator(InBamFile& in, int32_t tid, int32_t start=0); |
4272 |
24 Jan 23 |
peter |
122 |
|
4272 |
24 Jan 23 |
peter |
123 |
/** |
4273 |
24 Jan 23 |
peter |
Iterator that walks over specified region. Iterator iterates |
4273 |
24 Jan 23 |
peter |
over all BamReads in file \a in that overlap with specified |
4273 |
24 Jan 23 |
peter |
region and then it turns into an end-of-region iterator. |
4273 |
24 Jan 23 |
peter |
127 |
|
4273 |
24 Jan 23 |
peter |
This function requires an index file is available, see |
4273 |
24 Jan 23 |
peter |
InBamFile::index(). |
4273 |
24 Jan 23 |
peter |
130 |
|
4273 |
24 Jan 23 |
peter |
All reads iterated over overlap with defined region, in other |
4273 |
24 Jan 23 |
peter |
words, iter->tid == \a tid; iter->pos() < \a end and iter->end() |
4273 |
24 Jan 23 |
peter |
> start |
4273 |
24 Jan 23 |
peter |
134 |
|
4273 |
24 Jan 23 |
peter |
\param in file to iterate over |
4273 |
24 Jan 23 |
peter |
\param region defining the genomic region |
4273 |
24 Jan 23 |
peter |
137 |
|
4273 |
24 Jan 23 |
peter |
\since New in yat 0.21 |
4273 |
24 Jan 23 |
peter |
139 |
*/ |
4273 |
24 Jan 23 |
peter |
140 |
BamReadIterator(InBamFile& in, const BamRegion& region); |
4273 |
24 Jan 23 |
peter |
141 |
|
4273 |
24 Jan 23 |
peter |
142 |
/** |
4272 |
24 Jan 23 |
peter |
Create an iterator that can be used to iterate over reads from |
4272 |
24 Jan 23 |
peter |
multiple regions. The reads are iterated over in a sorted |
4272 |
24 Jan 23 |
peter |
fashion and reads are not multiple times even if \c regions is |
4272 |
24 Jan 23 |
peter |
sorted and/or have overlapping regions. |
4272 |
24 Jan 23 |
peter |
147 |
|
4272 |
24 Jan 23 |
peter |
\since New in yat 0.21 |
4272 |
24 Jan 23 |
peter |
149 |
*/ |
4272 |
24 Jan 23 |
peter |
150 |
BamReadIterator(InBamFile& in, const std::vector<BamRegion>& regions); |
2883 |
03 Dec 12 |
peter |
151 |
private: |
2883 |
03 Dec 12 |
peter |
152 |
friend class boost::iterator_core_access; |
2883 |
03 Dec 12 |
peter |
153 |
BamReadIterator::reference dereference(void) const; |
2883 |
03 Dec 12 |
peter |
154 |
bool equal(const BamReadIterator& other) const; |
2883 |
03 Dec 12 |
peter |
155 |
void increment(void); |
2883 |
03 Dec 12 |
peter |
156 |
|
4011 |
22 Oct 20 |
peter |
157 |
|
2896 |
10 Dec 12 |
peter |
158 |
/* |
2883 |
03 Dec 12 |
peter |
Interface class for the actor that handles difference between |
2883 |
03 Dec 12 |
peter |
an iterator that iterates the entire file and one that uses |
2883 |
03 Dec 12 |
peter |
index to iterate over a specific region. |
2883 |
03 Dec 12 |
peter |
162 |
*/ |
2883 |
03 Dec 12 |
peter |
163 |
class Actor |
2883 |
03 Dec 12 |
peter |
164 |
{ |
2883 |
03 Dec 12 |
peter |
165 |
public: |
2883 |
03 Dec 12 |
peter |
166 |
explicit Actor(InBamFile*); |
2883 |
03 Dec 12 |
peter |
167 |
virtual ~Actor(void); |
2883 |
03 Dec 12 |
peter |
168 |
virtual void increment(void)=0; |
2883 |
03 Dec 12 |
peter |
169 |
InBamFile* in_; |
2883 |
03 Dec 12 |
peter |
170 |
BamRead read_; |
2883 |
03 Dec 12 |
peter |
171 |
}; |
2883 |
03 Dec 12 |
peter |
172 |
|
2896 |
10 Dec 12 |
peter |
// class used when region is entire file |
2883 |
03 Dec 12 |
peter |
174 |
class AllActor : public Actor |
2883 |
03 Dec 12 |
peter |
175 |
{ |
2883 |
03 Dec 12 |
peter |
176 |
public: |
2883 |
03 Dec 12 |
peter |
177 |
AllActor(void); |
2883 |
03 Dec 12 |
peter |
178 |
explicit AllActor(InBamFile&); |
2883 |
03 Dec 12 |
peter |
179 |
void increment(void); |
2883 |
03 Dec 12 |
peter |
180 |
}; |
2883 |
03 Dec 12 |
peter |
181 |
|
4011 |
22 Oct 20 |
peter |
// functor calling bam_itr_destroy |
4011 |
22 Oct 20 |
peter |
183 |
struct IndexDestroyer |
4011 |
22 Oct 20 |
peter |
184 |
{ |
4011 |
22 Oct 20 |
peter |
185 |
void operator()(hts_itr_t* i) const; |
4011 |
22 Oct 20 |
peter |
186 |
}; |
4011 |
22 Oct 20 |
peter |
187 |
|
4272 |
24 Jan 23 |
peter |
188 |
class RegListDestroyer |
4272 |
24 Jan 23 |
peter |
189 |
{ |
4272 |
24 Jan 23 |
peter |
190 |
public: |
4272 |
24 Jan 23 |
peter |
191 |
RegListDestroyer(size_t n); |
4272 |
24 Jan 23 |
peter |
192 |
void operator()(hts_reglist_t* r) const; |
4272 |
24 Jan 23 |
peter |
193 |
private: |
4272 |
24 Jan 23 |
peter |
194 |
size_t n_; |
4272 |
24 Jan 23 |
peter |
195 |
}; |
4272 |
24 Jan 23 |
peter |
196 |
|
2896 |
10 Dec 12 |
peter |
// class used when iterating over a sub-region of the bam file |
2883 |
03 Dec 12 |
peter |
198 |
class IndexActor : public Actor |
2883 |
03 Dec 12 |
peter |
199 |
{ |
2883 |
03 Dec 12 |
peter |
200 |
public: |
4272 |
24 Jan 23 |
peter |
201 |
IndexActor(InBamFile& in, |
4272 |
24 Jan 23 |
peter |
202 |
std::unique_ptr<hts_itr_t, IndexDestroyer>&& iter); |
2883 |
03 Dec 12 |
peter |
203 |
void increment(void); |
2883 |
03 Dec 12 |
peter |
204 |
private: |
4011 |
22 Oct 20 |
peter |
205 |
std::unique_ptr<hts_itr_t, IndexDestroyer> iter_; |
2883 |
03 Dec 12 |
peter |
206 |
}; |
2883 |
03 Dec 12 |
peter |
207 |
|
4011 |
22 Oct 20 |
peter |
208 |
std::shared_ptr<Actor> actor_; |
2883 |
03 Dec 12 |
peter |
209 |
|
2883 |
03 Dec 12 |
peter |
210 |
}; |
2883 |
03 Dec 12 |
peter |
211 |
}}} |
2883 |
03 Dec 12 |
peter |
212 |
#endif |