2883 |
03 Dec 12 |
peter |
// $Id$ |
2883 |
03 Dec 12 |
peter |
2 |
|
2993 |
03 Mar 13 |
peter |
3 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2012, 2013, 2014, 2017, 2018, 2020, 2023 Peter Johansson |
2993 |
03 Mar 13 |
peter |
5 |
|
2993 |
03 Mar 13 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2993 |
03 Mar 13 |
peter |
7 |
|
2993 |
03 Mar 13 |
peter |
The yat library is free software; you can redistribute it and/or |
2993 |
03 Mar 13 |
peter |
modify it under the terms of the GNU General Public License as |
2993 |
03 Mar 13 |
peter |
published by the Free Software Foundation; either version 3 of the |
2993 |
03 Mar 13 |
peter |
License, or (at your option) any later version. |
2993 |
03 Mar 13 |
peter |
12 |
|
2993 |
03 Mar 13 |
peter |
The yat library is distributed in the hope that it will be useful, |
2993 |
03 Mar 13 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2993 |
03 Mar 13 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2993 |
03 Mar 13 |
peter |
General Public License for more details. |
2993 |
03 Mar 13 |
peter |
17 |
|
2993 |
03 Mar 13 |
peter |
You should have received a copy of the GNU General Public License |
3752 |
17 Oct 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2993 |
03 Mar 13 |
peter |
20 |
*/ |
2993 |
03 Mar 13 |
peter |
21 |
|
2980 |
04 Feb 13 |
peter |
22 |
#include <config.h> |
2980 |
04 Feb 13 |
peter |
23 |
|
2883 |
03 Dec 12 |
peter |
24 |
#include "BamReadIterator.h" |
2883 |
03 Dec 12 |
peter |
25 |
|
2883 |
03 Dec 12 |
peter |
26 |
#include "BamFile.h" |
4272 |
24 Jan 23 |
peter |
27 |
#include "BamRegion.h" |
2883 |
03 Dec 12 |
peter |
28 |
|
2897 |
11 Dec 12 |
peter |
29 |
#include "yat/utility/Exception.h" |
4272 |
24 Jan 23 |
peter |
30 |
#include "yat/utility/SegmentSet.h" |
2897 |
11 Dec 12 |
peter |
31 |
|
4011 |
22 Oct 20 |
peter |
32 |
#include <cassert> |
2883 |
03 Dec 12 |
peter |
33 |
#include <cstddef> |
4272 |
24 Jan 23 |
peter |
34 |
#include <cstring> |
4272 |
24 Jan 23 |
peter |
35 |
#include <map> |
3056 |
24 Jun 13 |
peter |
36 |
#include <sstream> |
4272 |
24 Jan 23 |
peter |
37 |
#include <vector> |
2883 |
03 Dec 12 |
peter |
38 |
|
2883 |
03 Dec 12 |
peter |
39 |
namespace theplu { |
2883 |
03 Dec 12 |
peter |
40 |
namespace yat { |
2883 |
03 Dec 12 |
peter |
41 |
namespace omic { |
2883 |
03 Dec 12 |
peter |
42 |
|
2883 |
03 Dec 12 |
peter |
43 |
BamReadIterator::BamReadIterator(void) |
4011 |
22 Oct 20 |
peter |
44 |
: actor_(new AllActor) |
2883 |
03 Dec 12 |
peter |
45 |
{ |
4011 |
22 Oct 20 |
peter |
46 |
assert(actor_); |
2883 |
03 Dec 12 |
peter |
47 |
} |
2883 |
03 Dec 12 |
peter |
48 |
|
2883 |
03 Dec 12 |
peter |
49 |
|
2883 |
03 Dec 12 |
peter |
50 |
BamReadIterator::BamReadIterator(InBamFile& in) |
4011 |
22 Oct 20 |
peter |
51 |
: actor_(new AllActor(in)) |
2883 |
03 Dec 12 |
peter |
52 |
{ |
4011 |
22 Oct 20 |
peter |
53 |
assert(actor_); |
2883 |
03 Dec 12 |
peter |
54 |
increment(); |
2883 |
03 Dec 12 |
peter |
55 |
} |
2883 |
03 Dec 12 |
peter |
56 |
|
2883 |
03 Dec 12 |
peter |
57 |
|
2883 |
03 Dec 12 |
peter |
58 |
BamReadIterator::BamReadIterator(InBamFile& bf, int32_t tid, int32_t start, |
2883 |
03 Dec 12 |
peter |
59 |
int32_t end) |
4274 |
24 Jan 23 |
peter |
60 |
: BamReadIterator(bf, BamRegion(tid, start, end)) |
2883 |
03 Dec 12 |
peter |
61 |
{ |
2883 |
03 Dec 12 |
peter |
62 |
} |
2883 |
03 Dec 12 |
peter |
63 |
|
2883 |
03 Dec 12 |
peter |
64 |
|
3672 |
31 Jul 17 |
peter |
65 |
BamReadIterator::BamReadIterator(InBamFile& bf, int32_t tid, int32_t start) |
4274 |
24 Jan 23 |
peter |
66 |
: BamReadIterator(bf, BamRegion(tid, start, |
4274 |
24 Jan 23 |
peter |
67 |
bf.header().target_length(tid))) |
3672 |
31 Jul 17 |
peter |
68 |
{ |
3672 |
31 Jul 17 |
peter |
69 |
} |
3672 |
31 Jul 17 |
peter |
70 |
|
3672 |
31 Jul 17 |
peter |
71 |
|
4273 |
24 Jan 23 |
peter |
72 |
BamReadIterator::BamReadIterator(InBamFile& bf, const BamRegion& region) |
4273 |
24 Jan 23 |
peter |
73 |
{ |
4273 |
24 Jan 23 |
peter |
74 |
|
4273 |
24 Jan 23 |
peter |
75 |
std::unique_ptr<hts_itr_t, IndexDestroyer> |
4273 |
24 Jan 23 |
peter |
76 |
iter(sam_itr_queryi(bf.index(), region.tid(), |
4273 |
24 Jan 23 |
peter |
77 |
region.begin(), region.end()), IndexDestroyer()); |
4274 |
24 Jan 23 |
peter |
78 |
if (!iter.get()) { |
4274 |
24 Jan 23 |
peter |
79 |
std::stringstream ss; |
4274 |
24 Jan 23 |
peter |
80 |
ss << "BamReadIterator constructor: invalid region: ( tid=" |
4274 |
24 Jan 23 |
peter |
81 |
<< region.tid() << ", begin=" << region.begin() |
4274 |
24 Jan 23 |
peter |
82 |
<< ", end=" << region.end() << ")"; |
4274 |
24 Jan 23 |
peter |
83 |
throw utility::runtime_error(ss.str()); |
4274 |
24 Jan 23 |
peter |
84 |
} |
4274 |
24 Jan 23 |
peter |
85 |
|
4273 |
24 Jan 23 |
peter |
86 |
actor_.reset(new IndexActor(bf, std::move(iter))); |
4273 |
24 Jan 23 |
peter |
87 |
assert(actor_); |
4273 |
24 Jan 23 |
peter |
88 |
increment(); |
4273 |
24 Jan 23 |
peter |
89 |
} |
4273 |
24 Jan 23 |
peter |
90 |
|
4273 |
24 Jan 23 |
peter |
91 |
|
4272 |
24 Jan 23 |
peter |
92 |
BamReadIterator::BamReadIterator(InBamFile& bf, |
4272 |
24 Jan 23 |
peter |
93 |
const std::vector<BamRegion>& regions) |
4272 |
24 Jan 23 |
peter |
94 |
{ |
4272 |
24 Jan 23 |
peter |
95 |
std::map<int, std::vector<size_t>> tid2idx; |
4272 |
24 Jan 23 |
peter |
96 |
for (size_t i=0; i<regions.size(); ++i) { |
4272 |
24 Jan 23 |
peter |
97 |
tid2idx[regions[i].tid()].push_back(i); |
4272 |
24 Jan 23 |
peter |
98 |
} |
4272 |
24 Jan 23 |
peter |
99 |
size_t n = tid2idx.size(); |
4272 |
24 Jan 23 |
peter |
100 |
|
4272 |
24 Jan 23 |
peter |
101 |
std::unique_ptr<hts_reglist_t[], RegListDestroyer> |
4272 |
24 Jan 23 |
peter |
102 |
reglist((hts_reglist_t*)calloc(n, sizeof(hts_reglist_t)), |
4272 |
24 Jan 23 |
peter |
103 |
RegListDestroyer(n)); |
4272 |
24 Jan 23 |
peter |
104 |
|
4272 |
24 Jan 23 |
peter |
105 |
size_t i=0; |
4272 |
24 Jan 23 |
peter |
106 |
for (auto it=tid2idx.begin(); it!=tid2idx.end(); ++it) { |
4272 |
24 Jan 23 |
peter |
107 |
reglist[i].reg = bf.header().target_name(it->first); |
4272 |
24 Jan 23 |
peter |
108 |
|
4272 |
24 Jan 23 |
peter |
// sort and merge segments on this chromosome |
4272 |
24 Jan 23 |
peter |
110 |
utility::SegmentSet<hts_pos_t> segments; |
4272 |
24 Jan 23 |
peter |
111 |
for (size_t idx : it->second) { |
4272 |
24 Jan 23 |
peter |
112 |
const BamRegion& reg = regions[idx]; |
4272 |
24 Jan 23 |
peter |
113 |
segments.insert_merge(utility::Segment<hts_pos_t>(reg.begin(), |
4272 |
24 Jan 23 |
peter |
114 |
reg.end())); |
4272 |
24 Jan 23 |
peter |
115 |
} |
4272 |
24 Jan 23 |
peter |
116 |
|
4272 |
24 Jan 23 |
peter |
// intervals |
4272 |
24 Jan 23 |
peter |
118 |
reglist[i].intervals = |
4272 |
24 Jan 23 |
peter |
119 |
(hts_pair_pos_t*)calloc(segments.size(), sizeof(hts_pair_pos_t)); |
4272 |
24 Jan 23 |
peter |
120 |
|
4272 |
24 Jan 23 |
peter |
121 |
hts_pair_pos_t* interval = reglist[i].intervals; |
4272 |
24 Jan 23 |
peter |
122 |
for (const auto& segment : segments) { |
4272 |
24 Jan 23 |
peter |
123 |
|
4272 |
24 Jan 23 |
peter |
124 |
interval->beg = segment.begin(); |
4272 |
24 Jan 23 |
peter |
125 |
interval->end = segment.end(); |
4272 |
24 Jan 23 |
peter |
126 |
++interval; |
4272 |
24 Jan 23 |
peter |
127 |
} |
4272 |
24 Jan 23 |
peter |
128 |
|
4272 |
24 Jan 23 |
peter |
// count |
4272 |
24 Jan 23 |
peter |
130 |
reglist[i].count = segments.size(); |
4272 |
24 Jan 23 |
peter |
131 |
++i; |
4272 |
24 Jan 23 |
peter |
132 |
} |
4272 |
24 Jan 23 |
peter |
133 |
|
4272 |
24 Jan 23 |
peter |
134 |
std::unique_ptr<hts_itr_t, IndexDestroyer> |
4272 |
24 Jan 23 |
peter |
135 |
iter(sam_itr_regions(bf.index(), bf.header().header_, reglist.get(), n)); |
4272 |
24 Jan 23 |
peter |
136 |
|
4272 |
24 Jan 23 |
peter |
137 |
if (iter.get()) |
4272 |
24 Jan 23 |
peter |
// release ownership to iter |
4272 |
24 Jan 23 |
peter |
139 |
reglist.release(); |
4272 |
24 Jan 23 |
peter |
140 |
else |
4272 |
24 Jan 23 |
peter |
141 |
throw utility::runtime_error("BamReadIterator: sam_itr_regions failed"); |
4272 |
24 Jan 23 |
peter |
142 |
|
4272 |
24 Jan 23 |
peter |
143 |
actor_.reset(new IndexActor(bf, std::move(iter))); |
4272 |
24 Jan 23 |
peter |
144 |
assert(actor_); |
4272 |
24 Jan 23 |
peter |
145 |
|
4272 |
24 Jan 23 |
peter |
146 |
increment(); |
4272 |
24 Jan 23 |
peter |
147 |
} |
4272 |
24 Jan 23 |
peter |
148 |
|
4272 |
24 Jan 23 |
peter |
149 |
|
2883 |
03 Dec 12 |
peter |
150 |
BamReadIterator::reference BamReadIterator::dereference(void) const |
2883 |
03 Dec 12 |
peter |
151 |
{ |
2883 |
03 Dec 12 |
peter |
152 |
return actor_->read_; |
2883 |
03 Dec 12 |
peter |
153 |
} |
2883 |
03 Dec 12 |
peter |
154 |
|
2883 |
03 Dec 12 |
peter |
155 |
|
2883 |
03 Dec 12 |
peter |
156 |
bool BamReadIterator::equal(const BamReadIterator& other) const |
2883 |
03 Dec 12 |
peter |
157 |
{ |
2883 |
03 Dec 12 |
peter |
158 |
return actor_->in_ == other.actor_->in_; |
2883 |
03 Dec 12 |
peter |
159 |
} |
2883 |
03 Dec 12 |
peter |
160 |
|
2883 |
03 Dec 12 |
peter |
161 |
|
2883 |
03 Dec 12 |
peter |
162 |
void BamReadIterator::increment(void) |
2883 |
03 Dec 12 |
peter |
163 |
{ |
2883 |
03 Dec 12 |
peter |
164 |
actor_->increment(); |
2883 |
03 Dec 12 |
peter |
165 |
} |
2883 |
03 Dec 12 |
peter |
166 |
|
2883 |
03 Dec 12 |
peter |
167 |
|
4011 |
22 Oct 20 |
peter |
168 |
void BamReadIterator::IndexDestroyer::operator()(hts_itr_t* i) const |
4011 |
22 Oct 20 |
peter |
169 |
{ |
4011 |
22 Oct 20 |
peter |
170 |
bam_itr_destroy(i); |
4011 |
22 Oct 20 |
peter |
171 |
} |
4011 |
22 Oct 20 |
peter |
172 |
|
4011 |
22 Oct 20 |
peter |
173 |
|
4272 |
24 Jan 23 |
peter |
174 |
BamReadIterator::RegListDestroyer::RegListDestroyer(size_t n) |
4272 |
24 Jan 23 |
peter |
175 |
: n_(n) |
4272 |
24 Jan 23 |
peter |
176 |
{} |
4011 |
22 Oct 20 |
peter |
177 |
|
4272 |
24 Jan 23 |
peter |
178 |
|
4272 |
24 Jan 23 |
peter |
179 |
void BamReadIterator::RegListDestroyer::operator()(hts_reglist_t* r) const |
4272 |
24 Jan 23 |
peter |
180 |
{ |
4272 |
24 Jan 23 |
peter |
181 |
hts_reglist_free(r, n_); |
4272 |
24 Jan 23 |
peter |
182 |
} |
4272 |
24 Jan 23 |
peter |
183 |
|
4272 |
24 Jan 23 |
peter |
184 |
|
2883 |
03 Dec 12 |
peter |
///// Actors ////// |
2883 |
03 Dec 12 |
peter |
186 |
|
2883 |
03 Dec 12 |
peter |
187 |
BamReadIterator::Actor::Actor(InBamFile* bf) |
2883 |
03 Dec 12 |
peter |
188 |
: in_(bf) {} |
2883 |
03 Dec 12 |
peter |
189 |
|
2883 |
03 Dec 12 |
peter |
190 |
|
2883 |
03 Dec 12 |
peter |
191 |
BamReadIterator::Actor::~Actor(void) |
2883 |
03 Dec 12 |
peter |
192 |
{} |
2883 |
03 Dec 12 |
peter |
193 |
|
2883 |
03 Dec 12 |
peter |
194 |
|
2883 |
03 Dec 12 |
peter |
195 |
BamReadIterator::AllActor::AllActor(void) |
2883 |
03 Dec 12 |
peter |
196 |
: Actor(NULL) {} |
2883 |
03 Dec 12 |
peter |
197 |
|
2883 |
03 Dec 12 |
peter |
198 |
|
2883 |
03 Dec 12 |
peter |
199 |
BamReadIterator::AllActor::AllActor(InBamFile& bf) |
2883 |
03 Dec 12 |
peter |
200 |
: Actor(&bf) {} |
2883 |
03 Dec 12 |
peter |
201 |
|
2883 |
03 Dec 12 |
peter |
202 |
|
2883 |
03 Dec 12 |
peter |
203 |
void BamReadIterator::AllActor::increment(void) |
2883 |
03 Dec 12 |
peter |
204 |
{ |
2883 |
03 Dec 12 |
peter |
205 |
if (!in_) |
2883 |
03 Dec 12 |
peter |
206 |
return; |
2883 |
03 Dec 12 |
peter |
207 |
try { |
2883 |
03 Dec 12 |
peter |
208 |
if (!in_->read(read_)) |
2883 |
03 Dec 12 |
peter |
209 |
in_ = NULL; |
2883 |
03 Dec 12 |
peter |
210 |
} |
2897 |
11 Dec 12 |
peter |
211 |
catch (utility::runtime_error& e) { |
2883 |
03 Dec 12 |
peter |
212 |
in_ = NULL; |
2883 |
03 Dec 12 |
peter |
213 |
throw e; |
2883 |
03 Dec 12 |
peter |
214 |
} |
2883 |
03 Dec 12 |
peter |
215 |
} |
2883 |
03 Dec 12 |
peter |
216 |
|
2883 |
03 Dec 12 |
peter |
217 |
|
4272 |
24 Jan 23 |
peter |
218 |
BamReadIterator::IndexActor:: |
4272 |
24 Jan 23 |
peter |
219 |
IndexActor(InBamFile& bf, |
4272 |
24 Jan 23 |
peter |
220 |
std::unique_ptr<hts_itr_t, IndexDestroyer>&& iter) |
4272 |
24 Jan 23 |
peter |
221 |
: Actor(&bf), iter_(std::move(iter)) |
4272 |
24 Jan 23 |
peter |
222 |
{ |
4272 |
24 Jan 23 |
peter |
223 |
assert(iter_); |
4272 |
24 Jan 23 |
peter |
224 |
} |
4272 |
24 Jan 23 |
peter |
225 |
|
4272 |
24 Jan 23 |
peter |
226 |
|
2883 |
03 Dec 12 |
peter |
227 |
void BamReadIterator::IndexActor::increment(void) |
2883 |
03 Dec 12 |
peter |
228 |
{ |
2883 |
03 Dec 12 |
peter |
229 |
if (!in_) |
2883 |
03 Dec 12 |
peter |
230 |
return; |
2883 |
03 Dec 12 |
peter |
231 |
assert(iter_.get()); |
2883 |
03 Dec 12 |
peter |
232 |
try { |
2883 |
03 Dec 12 |
peter |
233 |
if (! in_->read(read_, iter_.get())) |
2883 |
03 Dec 12 |
peter |
234 |
in_ = NULL; |
2883 |
03 Dec 12 |
peter |
235 |
} |
2897 |
11 Dec 12 |
peter |
236 |
catch (utility::runtime_error& e) { |
2883 |
03 Dec 12 |
peter |
237 |
in_ = NULL; |
2883 |
03 Dec 12 |
peter |
238 |
throw e; |
2883 |
03 Dec 12 |
peter |
239 |
} |
2883 |
03 Dec 12 |
peter |
240 |
} |
2883 |
03 Dec 12 |
peter |
241 |
|
2883 |
03 Dec 12 |
peter |
242 |
}}} |