2883 |
03 Dec 12 |
peter |
// $Id$ |
2883 |
03 Dec 12 |
peter |
2 |
|
2993 |
03 Mar 13 |
peter |
3 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2023 Peter Johansson |
2993 |
03 Mar 13 |
peter |
5 |
|
2993 |
03 Mar 13 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2993 |
03 Mar 13 |
peter |
7 |
|
2993 |
03 Mar 13 |
peter |
The yat library is free software; you can redistribute it and/or |
2993 |
03 Mar 13 |
peter |
modify it under the terms of the GNU General Public License as |
2993 |
03 Mar 13 |
peter |
published by the Free Software Foundation; either version 3 of the |
2993 |
03 Mar 13 |
peter |
License, or (at your option) any later version. |
2993 |
03 Mar 13 |
peter |
12 |
|
2993 |
03 Mar 13 |
peter |
The yat library is distributed in the hope that it will be useful, |
2993 |
03 Mar 13 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2993 |
03 Mar 13 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2993 |
03 Mar 13 |
peter |
General Public License for more details. |
2993 |
03 Mar 13 |
peter |
17 |
|
2993 |
03 Mar 13 |
peter |
You should have received a copy of the GNU General Public License |
3752 |
17 Oct 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2993 |
03 Mar 13 |
peter |
20 |
*/ |
2993 |
03 Mar 13 |
peter |
21 |
|
2883 |
03 Dec 12 |
peter |
22 |
#include <config.h> |
2883 |
03 Dec 12 |
peter |
23 |
|
2883 |
03 Dec 12 |
peter |
24 |
#include "BamRead.h" |
2883 |
03 Dec 12 |
peter |
25 |
|
3029 |
21 Apr 13 |
peter |
26 |
#include "yat/utility/Exception.h" |
3029 |
21 Apr 13 |
peter |
27 |
|
2883 |
03 Dec 12 |
peter |
28 |
#include <algorithm> |
2883 |
03 Dec 12 |
peter |
29 |
#include <cassert> |
3323 |
06 Oct 14 |
peter |
30 |
#include <cstring> |
2883 |
03 Dec 12 |
peter |
31 |
#include <sstream> |
2883 |
03 Dec 12 |
peter |
32 |
|
2988 |
23 Feb 13 |
peter |
// bam1_seq_seti is not defined in 0.1.18 (or older), so |
2988 |
23 Feb 13 |
peter |
// backport. Code is taken bam.h in version 0.1.19-dev. |
2988 |
23 Feb 13 |
peter |
35 |
#ifndef bam1_seq_seti |
2988 |
23 Feb 13 |
peter |
36 |
#define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = \ |
2988 |
23 Feb 13 |
peter |
37 |
((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) ) |
2988 |
23 Feb 13 |
peter |
38 |
#endif |
2988 |
23 Feb 13 |
peter |
39 |
|
2883 |
03 Dec 12 |
peter |
40 |
namespace theplu { |
2883 |
03 Dec 12 |
peter |
41 |
namespace yat { |
2883 |
03 Dec 12 |
peter |
42 |
namespace omic { |
2883 |
03 Dec 12 |
peter |
43 |
|
2883 |
03 Dec 12 |
peter |
44 |
BamRead::BamRead(void) |
2883 |
03 Dec 12 |
peter |
45 |
: bam_(bam_init1()) |
4265 |
13 Jan 23 |
peter |
46 |
{ |
4265 |
13 Jan 23 |
peter |
47 |
if (!bam_) |
4265 |
13 Jan 23 |
peter |
48 |
throw utility::runtime_error("BamRead: bam_init1 failed"); |
4265 |
13 Jan 23 |
peter |
49 |
} |
2883 |
03 Dec 12 |
peter |
50 |
|
2883 |
03 Dec 12 |
peter |
51 |
|
2883 |
03 Dec 12 |
peter |
52 |
BamRead::BamRead(const BamRead& other) |
2883 |
03 Dec 12 |
peter |
53 |
: bam_(bam_dup1(other.bam_)) |
2883 |
03 Dec 12 |
peter |
54 |
{ |
4265 |
13 Jan 23 |
peter |
55 |
if (other.bam_ && !bam_) |
4265 |
13 Jan 23 |
peter |
56 |
throw utility::runtime_error("BamRead: bam_dup1 failed"); |
2883 |
03 Dec 12 |
peter |
57 |
} |
2883 |
03 Dec 12 |
peter |
58 |
|
2883 |
03 Dec 12 |
peter |
59 |
|
3691 |
14 Sep 17 |
peter |
60 |
BamRead::BamRead(BamRead&& other) noexcept |
3583 |
19 Jan 17 |
peter |
61 |
: bam_(other.bam_) |
3583 |
19 Jan 17 |
peter |
62 |
{ |
3583 |
19 Jan 17 |
peter |
63 |
other.bam_ = NULL; |
3583 |
19 Jan 17 |
peter |
64 |
} |
3583 |
19 Jan 17 |
peter |
65 |
|
3583 |
19 Jan 17 |
peter |
66 |
|
2883 |
03 Dec 12 |
peter |
67 |
BamRead::~BamRead(void) |
2883 |
03 Dec 12 |
peter |
68 |
{ |
2883 |
03 Dec 12 |
peter |
69 |
bam_destroy1(bam_); |
2883 |
03 Dec 12 |
peter |
70 |
} |
2883 |
03 Dec 12 |
peter |
71 |
|
2883 |
03 Dec 12 |
peter |
72 |
|
2883 |
03 Dec 12 |
peter |
73 |
const BamRead& BamRead::operator=(const BamRead& rhs) |
2883 |
03 Dec 12 |
peter |
74 |
{ |
4266 |
13 Jan 23 |
peter |
75 |
if (bam_!=rhs.bam_) { |
4266 |
13 Jan 23 |
peter |
76 |
BamRead tmp(rhs); |
4266 |
13 Jan 23 |
peter |
77 |
this->swap(tmp); |
4266 |
13 Jan 23 |
peter |
78 |
} |
2883 |
03 Dec 12 |
peter |
79 |
return *this; |
2883 |
03 Dec 12 |
peter |
80 |
} |
2883 |
03 Dec 12 |
peter |
81 |
|
2883 |
03 Dec 12 |
peter |
82 |
|
3583 |
19 Jan 17 |
peter |
83 |
BamRead& BamRead::operator=(BamRead&& rhs) |
3583 |
19 Jan 17 |
peter |
84 |
{ |
3583 |
19 Jan 17 |
peter |
85 |
this->swap(rhs); |
3583 |
19 Jan 17 |
peter |
86 |
return *this; |
3583 |
19 Jan 17 |
peter |
87 |
} |
3583 |
19 Jan 17 |
peter |
88 |
|
3583 |
19 Jan 17 |
peter |
89 |
|
2883 |
03 Dec 12 |
peter |
90 |
const uint8_t* BamRead::aux(void) const |
2883 |
03 Dec 12 |
peter |
91 |
{ |
3028 |
21 Apr 13 |
peter |
92 |
assert(bam_); |
3351 |
21 Nov 14 |
peter |
93 |
return bam_get_aux(bam_); |
2883 |
03 Dec 12 |
peter |
94 |
} |
2883 |
03 Dec 12 |
peter |
95 |
|
2883 |
03 Dec 12 |
peter |
96 |
|
3028 |
21 Apr 13 |
peter |
97 |
const uint8_t* BamRead::aux(const char tag[2]) const |
3028 |
21 Apr 13 |
peter |
98 |
{ |
3028 |
21 Apr 13 |
peter |
99 |
assert(bam_); |
3028 |
21 Apr 13 |
peter |
100 |
return bam_aux_get(bam_, tag); |
3028 |
21 Apr 13 |
peter |
101 |
} |
3028 |
21 Apr 13 |
peter |
102 |
|
3028 |
21 Apr 13 |
peter |
103 |
|
3028 |
21 Apr 13 |
peter |
104 |
void BamRead::aux_append(const char tag[2], char type,int len,uint8_t* data) |
3028 |
21 Apr 13 |
peter |
105 |
{ |
3028 |
21 Apr 13 |
peter |
106 |
assert(bam_); |
3028 |
21 Apr 13 |
peter |
107 |
bam_aux_append(bam_, tag, type, len, data); |
3028 |
21 Apr 13 |
peter |
108 |
} |
3028 |
21 Apr 13 |
peter |
109 |
|
3028 |
21 Apr 13 |
peter |
110 |
|
3028 |
21 Apr 13 |
peter |
111 |
void BamRead::aux_del(const char tag[2]) |
3028 |
21 Apr 13 |
peter |
112 |
{ |
3028 |
21 Apr 13 |
peter |
113 |
assert(bam_); |
3029 |
21 Apr 13 |
peter |
114 |
uint8_t* s = bam_aux_get(bam_, tag); |
3030 |
21 Apr 13 |
peter |
115 |
if (!s) { |
3030 |
21 Apr 13 |
peter |
116 |
std::stringstream msg("BamRead::aux_del: absent tag: "); |
3030 |
21 Apr 13 |
peter |
117 |
msg << "'" << tag << "'"; |
3030 |
21 Apr 13 |
peter |
118 |
throw utility::runtime_error(msg.str()); |
3030 |
21 Apr 13 |
peter |
119 |
} |
3030 |
21 Apr 13 |
peter |
120 |
bam_aux_del(bam_, s); |
3028 |
21 Apr 13 |
peter |
121 |
} |
3028 |
21 Apr 13 |
peter |
122 |
|
3028 |
21 Apr 13 |
peter |
123 |
|
3028 |
21 Apr 13 |
peter |
124 |
int BamRead::aux_size(void) const |
3028 |
21 Apr 13 |
peter |
125 |
{ |
3028 |
21 Apr 13 |
peter |
126 |
assert(bam_); |
3351 |
21 Nov 14 |
peter |
127 |
return bam_get_l_aux(bam_); |
3028 |
21 Apr 13 |
peter |
128 |
} |
3028 |
21 Apr 13 |
peter |
129 |
|
3028 |
21 Apr 13 |
peter |
130 |
|
2883 |
03 Dec 12 |
peter |
131 |
const uint32_t* BamRead::cigar(void) const |
2883 |
03 Dec 12 |
peter |
132 |
{ |
3351 |
21 Nov 14 |
peter |
133 |
return bam_get_cigar(bam_); |
2883 |
03 Dec 12 |
peter |
134 |
} |
2883 |
03 Dec 12 |
peter |
135 |
|
2883 |
03 Dec 12 |
peter |
136 |
|
2883 |
03 Dec 12 |
peter |
137 |
uint32_t BamRead::cigar(size_t i) const |
2883 |
03 Dec 12 |
peter |
138 |
{ |
2883 |
03 Dec 12 |
peter |
139 |
assert(i<core().n_cigar); |
3351 |
21 Nov 14 |
peter |
140 |
return cigar()[i]; |
2883 |
03 Dec 12 |
peter |
141 |
} |
2883 |
03 Dec 12 |
peter |
142 |
|
2883 |
03 Dec 12 |
peter |
143 |
|
2883 |
03 Dec 12 |
peter |
144 |
uint32_t BamRead::cigar_op(size_t i) const |
2883 |
03 Dec 12 |
peter |
145 |
{ |
2883 |
03 Dec 12 |
peter |
146 |
assert(i<core().n_cigar); |
2883 |
03 Dec 12 |
peter |
147 |
return bam_cigar_op(cigar(i)); |
2883 |
03 Dec 12 |
peter |
148 |
} |
2883 |
03 Dec 12 |
peter |
149 |
|
2883 |
03 Dec 12 |
peter |
150 |
|
2883 |
03 Dec 12 |
peter |
151 |
uint32_t BamRead::cigar_oplen(size_t i) const |
2883 |
03 Dec 12 |
peter |
152 |
{ |
2883 |
03 Dec 12 |
peter |
153 |
assert(i<core().n_cigar); |
2883 |
03 Dec 12 |
peter |
154 |
return bam_cigar_oplen(cigar(i)); |
2883 |
03 Dec 12 |
peter |
155 |
} |
2883 |
03 Dec 12 |
peter |
156 |
|
2883 |
03 Dec 12 |
peter |
157 |
|
2883 |
03 Dec 12 |
peter |
158 |
std::string BamRead::cigar_str(void) const |
2883 |
03 Dec 12 |
peter |
159 |
{ |
2883 |
03 Dec 12 |
peter |
160 |
std::ostringstream os; |
2883 |
03 Dec 12 |
peter |
161 |
for (uint32_t i=0; i<core().n_cigar; ++i) { |
3351 |
21 Nov 14 |
peter |
162 |
os << bam_cigar_oplen(cigar(i)) |
3351 |
21 Nov 14 |
peter |
163 |
<< bam_cigar_opchr(cigar(i)); |
2883 |
03 Dec 12 |
peter |
164 |
} |
2883 |
03 Dec 12 |
peter |
165 |
return os.str(); |
2883 |
03 Dec 12 |
peter |
166 |
} |
2883 |
03 Dec 12 |
peter |
167 |
|
2883 |
03 Dec 12 |
peter |
168 |
|
2883 |
03 Dec 12 |
peter |
169 |
void BamRead::cigar(const std::vector<uint32_t>& c) |
2883 |
03 Dec 12 |
peter |
170 |
{ |
3295 |
25 Jul 14 |
peter |
// use the new function cigar(1) to implement this old deprecated variant |
3295 |
25 Jul 14 |
peter |
172 |
utility::Aligner::Cigar cig; |
3295 |
25 Jul 14 |
peter |
173 |
for (size_t i=0; i<c.size(); ++i) |
3295 |
25 Jul 14 |
peter |
174 |
cig.push_back(bam_cigar_op(c[i]), bam_cigar_oplen(c[i])); |
3295 |
25 Jul 14 |
peter |
175 |
cigar(cig); |
3295 |
25 Jul 14 |
peter |
176 |
} |
3295 |
25 Jul 14 |
peter |
177 |
|
3295 |
25 Jul 14 |
peter |
178 |
|
3295 |
25 Jul 14 |
peter |
179 |
void BamRead::cigar(const utility::Aligner::Cigar& c) |
3295 |
25 Jul 14 |
peter |
180 |
{ |
3032 |
27 Apr 13 |
peter |
181 |
int offset = 4*c.size() - 4*core().n_cigar; |
3398 |
25 Mar 15 |
peter |
182 |
|
3351 |
21 Nov 14 |
peter |
183 |
reserve(data_size() + offset); |
2883 |
03 Dec 12 |
peter |
184 |
/* |
3324 |
06 Oct 14 |
peter |
data come as qname, cigar, rest |
2883 |
03 Dec 12 |
peter |
186 |
*/ |
3032 |
27 Apr 13 |
peter |
// first move remaining data to new location |
3032 |
27 Apr 13 |
peter |
188 |
if (offset) |
3883 |
24 Mar 20 |
peter |
189 |
memmove(bam_get_seq(bam_)+offset, bam_get_seq(bam_), |
3351 |
21 Nov 14 |
peter |
190 |
data_size() - 4*core().n_cigar - core().l_qname); |
3032 |
27 Apr 13 |
peter |
191 |
|
2883 |
03 Dec 12 |
peter |
// copy new cigar |
3295 |
25 Jul 14 |
peter |
193 |
for (size_t i=0; i<c.size(); ++i) |
3883 |
24 Mar 20 |
peter |
194 |
bam_get_cigar(bam_)[i] = c[i]; |
3295 |
25 Jul 14 |
peter |
195 |
|
3351 |
21 Nov 14 |
peter |
196 |
data_size() += offset; |
2883 |
03 Dec 12 |
peter |
197 |
core().n_cigar = c.size(); |
3630 |
14 Mar 17 |
peter |
198 |
assert(static_cast<uint32_t>(data_size()) <= data_capacity()); |
2883 |
03 Dec 12 |
peter |
199 |
} |
2883 |
03 Dec 12 |
peter |
200 |
|
2883 |
03 Dec 12 |
peter |
201 |
|
2883 |
03 Dec 12 |
peter |
202 |
const bam1_core_t& BamRead::core(void) const |
2883 |
03 Dec 12 |
peter |
203 |
{ return bam_->core; } |
2883 |
03 Dec 12 |
peter |
204 |
|
2883 |
03 Dec 12 |
peter |
205 |
|
2883 |
03 Dec 12 |
peter |
206 |
bam1_core_t& BamRead::core(void) |
2883 |
03 Dec 12 |
peter |
207 |
{ return bam_->core; } |
2883 |
03 Dec 12 |
peter |
208 |
|
2883 |
03 Dec 12 |
peter |
209 |
|
3630 |
14 Mar 17 |
peter |
210 |
uint32_t BamRead::data_capacity(void) const |
3630 |
14 Mar 17 |
peter |
211 |
{ |
3630 |
14 Mar 17 |
peter |
212 |
return bam_->m_data; |
3630 |
14 Mar 17 |
peter |
213 |
} |
3630 |
14 Mar 17 |
peter |
214 |
|
3630 |
14 Mar 17 |
peter |
215 |
|
3351 |
21 Nov 14 |
peter |
216 |
int& BamRead::data_size(void) |
3351 |
21 Nov 14 |
peter |
217 |
{ |
3351 |
21 Nov 14 |
peter |
218 |
return bam_->l_data; |
3351 |
21 Nov 14 |
peter |
219 |
} |
3351 |
21 Nov 14 |
peter |
220 |
|
3351 |
21 Nov 14 |
peter |
221 |
|
3351 |
21 Nov 14 |
peter |
222 |
const int& BamRead::data_size(void) const |
3351 |
21 Nov 14 |
peter |
223 |
{ |
3351 |
21 Nov 14 |
peter |
224 |
return bam_->l_data; |
3351 |
21 Nov 14 |
peter |
225 |
} |
3351 |
21 Nov 14 |
peter |
226 |
|
3351 |
21 Nov 14 |
peter |
227 |
|
3306 |
21 Aug 14 |
peter |
228 |
int32_t BamRead::end(void) const |
3306 |
21 Aug 14 |
peter |
229 |
{ |
3306 |
21 Aug 14 |
peter |
230 |
assert(bam_); |
3355 |
22 Nov 14 |
peter |
231 |
return bam_endpos(bam_); |
3306 |
21 Aug 14 |
peter |
232 |
} |
3306 |
21 Aug 14 |
peter |
233 |
|
3306 |
21 Aug 14 |
peter |
234 |
|
3295 |
25 Jul 14 |
peter |
235 |
uint16_t BamRead::flag(void) const |
3295 |
25 Jul 14 |
peter |
236 |
{ |
3295 |
25 Jul 14 |
peter |
237 |
return bam_->core.flag; |
3295 |
25 Jul 14 |
peter |
238 |
} |
3295 |
25 Jul 14 |
peter |
239 |
|
3351 |
21 Nov 14 |
peter |
240 |
|
3732 |
11 Apr 18 |
peter |
241 |
bool BamRead::flag_bit_is_set(uint16_t x) const |
3732 |
11 Apr 18 |
peter |
242 |
{ |
3732 |
11 Apr 18 |
peter |
243 |
return get_flag_bit(x) == x; |
3732 |
11 Apr 18 |
peter |
244 |
} |
3732 |
11 Apr 18 |
peter |
245 |
|
3732 |
11 Apr 18 |
peter |
246 |
|
3731 |
11 Apr 18 |
peter |
247 |
uint16_t BamRead::get_flag_bit(uint16_t x) const |
3731 |
11 Apr 18 |
peter |
248 |
{ |
3731 |
11 Apr 18 |
peter |
249 |
return bam_->core.flag & x; |
3731 |
11 Apr 18 |
peter |
250 |
} |
3731 |
11 Apr 18 |
peter |
251 |
|
3731 |
11 Apr 18 |
peter |
252 |
|
3731 |
11 Apr 18 |
peter |
253 |
void BamRead::set_flag_bit(uint16_t x) |
3731 |
11 Apr 18 |
peter |
254 |
{ |
3731 |
11 Apr 18 |
peter |
255 |
bam_->core.flag |= x; |
3731 |
11 Apr 18 |
peter |
256 |
} |
3731 |
11 Apr 18 |
peter |
257 |
|
3731 |
11 Apr 18 |
peter |
258 |
|
3731 |
11 Apr 18 |
peter |
259 |
void BamRead::unset_flag_bit(uint16_t x) |
3731 |
11 Apr 18 |
peter |
260 |
{ |
3731 |
11 Apr 18 |
peter |
261 |
bam_->core.flag &= ~x; |
3731 |
11 Apr 18 |
peter |
262 |
} |
3731 |
11 Apr 18 |
peter |
263 |
|
3731 |
11 Apr 18 |
peter |
264 |
|
3731 |
11 Apr 18 |
peter |
265 |
void BamRead::flip_flag_bit(uint16_t x) |
3731 |
11 Apr 18 |
peter |
266 |
{ |
3731 |
11 Apr 18 |
peter |
267 |
bam_->core.flag ^= x; |
3731 |
11 Apr 18 |
peter |
268 |
} |
3731 |
11 Apr 18 |
peter |
269 |
|
3731 |
11 Apr 18 |
peter |
270 |
|
2883 |
03 Dec 12 |
peter |
271 |
const char* BamRead::name(void) const |
3883 |
24 Mar 20 |
peter |
272 |
{ |
3883 |
24 Mar 20 |
peter |
273 |
return bam_get_qname(bam_); |
3883 |
24 Mar 20 |
peter |
274 |
} |
2883 |
03 Dec 12 |
peter |
275 |
|
2883 |
03 Dec 12 |
peter |
276 |
|
3055 |
16 Jun 13 |
peter |
277 |
void BamRead::name(const std::string& n) |
3055 |
16 Jun 13 |
peter |
278 |
{ |
3055 |
16 Jun 13 |
peter |
279 |
int offset = n.size() + 1 - core().l_qname; |
3055 |
16 Jun 13 |
peter |
280 |
assert(bam_); |
3351 |
21 Nov 14 |
peter |
281 |
reserve(data_size() + offset); |
3055 |
16 Jun 13 |
peter |
// move remaining data |
3055 |
16 Jun 13 |
peter |
283 |
if (offset) |
3055 |
16 Jun 13 |
peter |
284 |
memmove(bam_->data + core().l_qname + offset, |
3055 |
16 Jun 13 |
peter |
285 |
bam_->data + core().l_qname, |
3351 |
21 Nov 14 |
peter |
286 |
data_size() - core().l_qname); |
3055 |
16 Jun 13 |
peter |
287 |
core().l_qname += offset; |
3055 |
16 Jun 13 |
peter |
// copy new name |
3883 |
24 Mar 20 |
peter |
289 |
memcpy(bam_get_qname(bam_), n.c_str(), n.size()+1); |
3351 |
21 Nov 14 |
peter |
290 |
data_size() += offset; |
3630 |
14 Mar 17 |
peter |
291 |
assert(static_cast<uint32_t>(data_size()) <= data_capacity()); |
3055 |
16 Jun 13 |
peter |
292 |
} |
3055 |
16 Jun 13 |
peter |
293 |
|
3055 |
16 Jun 13 |
peter |
294 |
|
2883 |
03 Dec 12 |
peter |
295 |
uint8_t BamRead::qual(size_t i) const |
3883 |
24 Mar 20 |
peter |
296 |
{ |
3883 |
24 Mar 20 |
peter |
297 |
return *(bam_get_qual(bam_)+i); |
3883 |
24 Mar 20 |
peter |
298 |
} |
2883 |
03 Dec 12 |
peter |
299 |
|
2883 |
03 Dec 12 |
peter |
300 |
|
3026 |
06 Apr 13 |
peter |
301 |
void BamRead::qual(size_t i, uint8_t q) |
3883 |
24 Mar 20 |
peter |
302 |
{ |
3883 |
24 Mar 20 |
peter |
303 |
*(bam_get_qual(bam_)+i)=q; |
3883 |
24 Mar 20 |
peter |
304 |
} |
3026 |
06 Apr 13 |
peter |
305 |
|
3026 |
06 Apr 13 |
peter |
306 |
|
3630 |
14 Mar 17 |
peter |
307 |
void BamRead::reserve(uint32_t n) |
3031 |
27 Apr 13 |
peter |
308 |
{ |
3031 |
27 Apr 13 |
peter |
309 |
assert(bam_); |
3630 |
14 Mar 17 |
peter |
310 |
if (data_capacity() >= static_cast<uint32_t>(n)) |
3031 |
27 Apr 13 |
peter |
311 |
return; |
3031 |
27 Apr 13 |
peter |
// at least double capacity |
3630 |
14 Mar 17 |
peter |
313 |
bam_->m_data = std::max(data_capacity()<<1, n); |
3031 |
27 Apr 13 |
peter |
314 |
bam_->data = (uint8_t*)realloc(bam_->data, bam_->m_data); |
3031 |
27 Apr 13 |
peter |
315 |
if (!bam_->data) { |
3031 |
27 Apr 13 |
peter |
316 |
throw utility::runtime_error("BamRead::reserve failed"); |
3031 |
27 Apr 13 |
peter |
317 |
} |
3031 |
27 Apr 13 |
peter |
318 |
} |
3031 |
27 Apr 13 |
peter |
319 |
|
3031 |
27 Apr 13 |
peter |
320 |
|
2883 |
03 Dec 12 |
peter |
321 |
std::string BamRead::sequence(void) const |
2883 |
03 Dec 12 |
peter |
322 |
{ |
2883 |
03 Dec 12 |
peter |
323 |
std::string result(sequence_length(), ' '); |
3395 |
23 Mar 15 |
peter |
324 |
for (size_t i=0; i<result.size(); ++i) |
3395 |
23 Mar 15 |
peter |
325 |
result[i] = sequence_str(i); |
3395 |
23 Mar 15 |
peter |
326 |
return result; |
3395 |
23 Mar 15 |
peter |
327 |
} |
3395 |
23 Mar 15 |
peter |
328 |
|
3395 |
23 Mar 15 |
peter |
329 |
|
3395 |
23 Mar 15 |
peter |
330 |
char BamRead::sequence_str(size_t i) const |
3395 |
23 Mar 15 |
peter |
331 |
{ |
3398 |
25 Mar 15 |
peter |
332 |
return nt16_str(sequence(i)); |
2883 |
03 Dec 12 |
peter |
333 |
} |
2883 |
03 Dec 12 |
peter |
334 |
|
2883 |
03 Dec 12 |
peter |
335 |
|
3031 |
27 Apr 13 |
peter |
336 |
void BamRead::sequence(const std::string& seq, |
3031 |
27 Apr 13 |
peter |
337 |
const std::vector<uint8_t>& qual) |
3031 |
27 Apr 13 |
peter |
338 |
{ |
3031 |
27 Apr 13 |
peter |
339 |
assert(seq.size()==qual.size()); |
3031 |
27 Apr 13 |
peter |
// data consist of |
3031 |
27 Apr 13 |
peter |
// |-- qname -->-- cigar -->-- seq -->-- qual -->-- aux --> |
3031 |
27 Apr 13 |
peter |
342 |
|
3883 |
24 Mar 20 |
peter |
343 |
int aux_pos = |
3883 |
24 Mar 20 |
peter |
344 |
bam_get_seq(bam_) - bam_->data + (seq.size()+1)/2 + qual.size(); |
3351 |
21 Nov 14 |
peter |
345 |
int len = aux_pos + aux_size(); |
3031 |
27 Apr 13 |
peter |
346 |
reserve(len); |
3031 |
27 Apr 13 |
peter |
// move aux to its new location |
3032 |
27 Apr 13 |
peter |
348 |
memmove(static_cast<void*>(bam_->data+aux_pos), |
3351 |
21 Nov 14 |
peter |
349 |
static_cast<const void*>(aux()), aux_size()); |
3031 |
27 Apr 13 |
peter |
// copy sequence |
3889 |
25 Mar 20 |
peter |
351 |
const size_t N = seq.size(); |
3889 |
25 Mar 20 |
peter |
352 |
core().l_qseq = N; |
3889 |
25 Mar 20 |
peter |
353 |
std::string::const_iterator first = seq.begin(); |
3889 |
25 Mar 20 |
peter |
354 |
std::string::const_iterator second = seq.begin()+1; |
3889 |
25 Mar 20 |
peter |
355 |
for (size_t i=0; second<seq.end(); ++i) { |
3889 |
25 Mar 20 |
peter |
356 |
sequence(i, nt16_table(*first), nt16_table(*second)); |
3889 |
25 Mar 20 |
peter |
357 |
first+=2; |
3889 |
25 Mar 20 |
peter |
358 |
second+=2; |
3889 |
25 Mar 20 |
peter |
359 |
} |
3889 |
25 Mar 20 |
peter |
360 |
if (N % 2) { |
3889 |
25 Mar 20 |
peter |
361 |
assert(seq.end() - first == 1); |
3889 |
25 Mar 20 |
peter |
362 |
sequence(N-1, nt16_table(*first)); |
3889 |
25 Mar 20 |
peter |
363 |
} |
3031 |
27 Apr 13 |
peter |
// copy quality |
3883 |
24 Mar 20 |
peter |
365 |
memcpy(static_cast<void*>(bam_get_qual(bam_)), |
3883 |
24 Mar 20 |
peter |
366 |
static_cast<const void*>(&qual[0]), qual.size()); |
3351 |
21 Nov 14 |
peter |
367 |
data_size() = len; |
3631 |
21 Mar 17 |
peter |
368 |
assert(static_cast<uint32_t>(data_size()) <= data_capacity()); |
3031 |
27 Apr 13 |
peter |
369 |
} |
3031 |
27 Apr 13 |
peter |
370 |
|
3031 |
27 Apr 13 |
peter |
371 |
|
2985 |
18 Feb 13 |
peter |
372 |
void BamRead::sequence(size_t i, uint8_t seq) |
2985 |
18 Feb 13 |
peter |
373 |
{ |
2985 |
18 Feb 13 |
peter |
374 |
assert(bam_); |
2985 |
18 Feb 13 |
peter |
375 |
assert(seq < 16); |
3883 |
24 Mar 20 |
peter |
376 |
bam1_seq_seti(bam_get_seq(bam_), i, seq); |
2985 |
18 Feb 13 |
peter |
377 |
} |
2985 |
18 Feb 13 |
peter |
378 |
|
2985 |
18 Feb 13 |
peter |
379 |
|
3889 |
25 Mar 20 |
peter |
380 |
void BamRead::sequence(size_t k, uint8_t first, uint8_t second) |
3889 |
25 Mar 20 |
peter |
381 |
{ |
3889 |
25 Mar 20 |
peter |
382 |
assert(bam_); |
3889 |
25 Mar 20 |
peter |
383 |
assert(first < 16); |
3889 |
25 Mar 20 |
peter |
384 |
assert(second < 16); |
3889 |
25 Mar 20 |
peter |
385 |
assert(2*k < sequence_length()); |
3889 |
25 Mar 20 |
peter |
386 |
|
3889 |
25 Mar 20 |
peter |
387 |
/* |
3889 |
25 Mar 20 |
peter |
sequence(2*k, c1); |
3889 |
25 Mar 20 |
peter |
sequence(2*k+1, c2); |
3889 |
25 Mar 20 |
peter |
390 |
|
3889 |
25 Mar 20 |
peter |
is equivalent to |
3889 |
25 Mar 20 |
peter |
s[(2*k)>>1] = |
3889 |
25 Mar 20 |
peter |
(s[(2*k)>>1] & 0xf<<(((2*k)&1)<<2)) | (c1)<<((~(2*k)&1)<<2) |
3889 |
25 Mar 20 |
peter |
s[(2*k+1)>>1] = |
3889 |
25 Mar 20 |
peter |
(s[(2*k+1)>>1] & 0xf<<(((2*k+1)&1)<<2)) | (c2)<<((~(2*k+1)&1)<<2) |
3889 |
25 Mar 20 |
peter |
396 |
|
3889 |
25 Mar 20 |
peter |
s[k] = (s[k] & 0xf<<(0<<2)) | (c1)<<(1<<2) |
3889 |
25 Mar 20 |
peter |
s[k] = (s[k] & 0xf<<(1<<2)) | (c2)<<(0<<2) |
3889 |
25 Mar 20 |
peter |
399 |
|
3889 |
25 Mar 20 |
peter |
s[k] = (s[k] & 0xf<<(0x0)) | (c1)<<(0x4) |
3889 |
25 Mar 20 |
peter |
s[k] = (s[k] & 0xf<<(0x4)) | (c2)<<(0x0) |
3889 |
25 Mar 20 |
peter |
402 |
|
3889 |
25 Mar 20 |
peter |
s[k] = (s[k] & 0xf ) | (c1<<0x4) |
3889 |
25 Mar 20 |
peter |
s[k] = (s[k] & 0xf0) | c2 |
3889 |
25 Mar 20 |
peter |
405 |
|
3889 |
25 Mar 20 |
peter |
s[k] = (c1<<0x4) | c2 |
3889 |
25 Mar 20 |
peter |
407 |
*/ |
3889 |
25 Mar 20 |
peter |
408 |
|
3889 |
25 Mar 20 |
peter |
409 |
bam_get_seq(bam_)[k] = (first << 4) | second; |
3889 |
25 Mar 20 |
peter |
410 |
} |
3889 |
25 Mar 20 |
peter |
411 |
|
3889 |
25 Mar 20 |
peter |
412 |
|
2883 |
03 Dec 12 |
peter |
413 |
uint8_t BamRead::sequence(size_t index) const |
2883 |
03 Dec 12 |
peter |
414 |
{ |
2883 |
03 Dec 12 |
peter |
415 |
assert(bam_); |
3883 |
24 Mar 20 |
peter |
416 |
return bam_seqi(bam_get_seq(bam_),index); |
2883 |
03 Dec 12 |
peter |
417 |
} |
2883 |
03 Dec 12 |
peter |
418 |
|
2883 |
03 Dec 12 |
peter |
419 |
|
2883 |
03 Dec 12 |
peter |
420 |
uint32_t BamRead::sequence_length(void) const |
2883 |
03 Dec 12 |
peter |
421 |
{ |
2883 |
03 Dec 12 |
peter |
422 |
assert(bam_); |
2883 |
03 Dec 12 |
peter |
423 |
return bam_->core.l_qseq; |
2883 |
03 Dec 12 |
peter |
424 |
} |
2883 |
03 Dec 12 |
peter |
425 |
|
2883 |
03 Dec 12 |
peter |
426 |
|
2883 |
03 Dec 12 |
peter |
427 |
int32_t BamRead::pos(void) const |
2883 |
03 Dec 12 |
peter |
428 |
{ |
2883 |
03 Dec 12 |
peter |
429 |
assert(bam_); |
2883 |
03 Dec 12 |
peter |
430 |
return bam_->core.pos; |
2883 |
03 Dec 12 |
peter |
431 |
} |
2883 |
03 Dec 12 |
peter |
432 |
|
2883 |
03 Dec 12 |
peter |
433 |
|
2883 |
03 Dec 12 |
peter |
434 |
int32_t BamRead::tid(void) const |
2883 |
03 Dec 12 |
peter |
435 |
{ |
2883 |
03 Dec 12 |
peter |
436 |
assert(bam_); |
2883 |
03 Dec 12 |
peter |
437 |
return bam_->core.tid; |
2883 |
03 Dec 12 |
peter |
438 |
} |
2883 |
03 Dec 12 |
peter |
439 |
|
2883 |
03 Dec 12 |
peter |
440 |
|
2986 |
18 Feb 13 |
peter |
441 |
int32_t BamRead::mtid(void) const |
2986 |
18 Feb 13 |
peter |
442 |
{ |
2986 |
18 Feb 13 |
peter |
443 |
assert(bam_); |
2986 |
18 Feb 13 |
peter |
444 |
return bam_->core.mtid; |
2986 |
18 Feb 13 |
peter |
445 |
} |
2883 |
03 Dec 12 |
peter |
446 |
|
2986 |
18 Feb 13 |
peter |
447 |
|
2883 |
03 Dec 12 |
peter |
448 |
int32_t BamRead::mpos(void) const |
2883 |
03 Dec 12 |
peter |
449 |
{ |
2883 |
03 Dec 12 |
peter |
450 |
assert(bam_); |
2883 |
03 Dec 12 |
peter |
451 |
return bam_->core.mpos; |
2883 |
03 Dec 12 |
peter |
452 |
} |
2883 |
03 Dec 12 |
peter |
453 |
|
2883 |
03 Dec 12 |
peter |
454 |
|
2886 |
05 Dec 12 |
peter |
455 |
void BamRead::swap(BamRead& other) |
2886 |
05 Dec 12 |
peter |
456 |
{ |
2886 |
05 Dec 12 |
peter |
457 |
std::swap(bam_, other.bam_); |
2886 |
05 Dec 12 |
peter |
458 |
} |
2886 |
05 Dec 12 |
peter |
459 |
|
2886 |
05 Dec 12 |
peter |
460 |
|
3733 |
11 Apr 18 |
peter |
461 |
bool BamRead::strand(void) const |
3733 |
11 Apr 18 |
peter |
462 |
{ |
3733 |
11 Apr 18 |
peter |
463 |
return !flag_bit_is_set(BAM_FREVERSE); |
3733 |
11 Apr 18 |
peter |
464 |
} |
3733 |
11 Apr 18 |
peter |
465 |
|
3733 |
11 Apr 18 |
peter |
466 |
|
3733 |
11 Apr 18 |
peter |
467 |
bool BamRead::mstrand(void) const |
3733 |
11 Apr 18 |
peter |
468 |
{ |
3733 |
11 Apr 18 |
peter |
469 |
return !flag_bit_is_set(BAM_FMREVERSE); |
3733 |
11 Apr 18 |
peter |
470 |
} |
3733 |
11 Apr 18 |
peter |
471 |
|
3733 |
11 Apr 18 |
peter |
472 |
|
3398 |
25 Mar 15 |
peter |
473 |
const unsigned char nt16_table(char c) |
3398 |
25 Mar 15 |
peter |
474 |
{ |
3398 |
25 Mar 15 |
peter |
475 |
return seq_nt16_table[static_cast<uint8_t>(c)]; |
3398 |
25 Mar 15 |
peter |
476 |
} |
3398 |
25 Mar 15 |
peter |
477 |
|
3398 |
25 Mar 15 |
peter |
478 |
|
3398 |
25 Mar 15 |
peter |
479 |
const char nt16_str(uint8_t x) |
3398 |
25 Mar 15 |
peter |
480 |
{ |
3398 |
25 Mar 15 |
peter |
481 |
assert(x<16); |
3398 |
25 Mar 15 |
peter |
482 |
return seq_nt16_str[x]; |
3398 |
25 Mar 15 |
peter |
483 |
} |
3398 |
25 Mar 15 |
peter |
484 |
|
3398 |
25 Mar 15 |
peter |
485 |
|
3384 |
11 Mar 15 |
peter |
486 |
bool less_query_name(const BamRead& lhs, const BamRead& rhs) |
3384 |
11 Mar 15 |
peter |
487 |
{ |
3384 |
11 Mar 15 |
peter |
488 |
return std::lexicographical_compare(lhs.name(), |
3384 |
11 Mar 15 |
peter |
489 |
lhs.name()+lhs.core().l_qname, |
3384 |
11 Mar 15 |
peter |
490 |
rhs.name(), |
3384 |
11 Mar 15 |
peter |
491 |
rhs.name()+rhs.core().l_qname); |
3384 |
11 Mar 15 |
peter |
492 |
} |
3384 |
11 Mar 15 |
peter |
493 |
|
3384 |
11 Mar 15 |
peter |
494 |
|
2886 |
05 Dec 12 |
peter |
495 |
void swap(BamRead& lhs, BamRead& rhs) |
2886 |
05 Dec 12 |
peter |
496 |
{ |
2886 |
05 Dec 12 |
peter |
497 |
lhs.swap(rhs); |
2886 |
05 Dec 12 |
peter |
498 |
} |
2886 |
05 Dec 12 |
peter |
499 |
|
3384 |
11 Mar 15 |
peter |
500 |
|
2883 |
03 Dec 12 |
peter |
501 |
bool same_query_name(const BamRead& lhs, const BamRead& rhs) |
2883 |
03 Dec 12 |
peter |
502 |
{ |
2883 |
03 Dec 12 |
peter |
503 |
if (lhs.core().l_qname != rhs.core().l_qname) |
2883 |
03 Dec 12 |
peter |
504 |
return false; |
2883 |
03 Dec 12 |
peter |
505 |
|
3323 |
06 Oct 14 |
peter |
506 |
return !strcmp(lhs.name(), rhs.name()); |
2883 |
03 Dec 12 |
peter |
507 |
} |
2883 |
03 Dec 12 |
peter |
508 |
|
2883 |
03 Dec 12 |
peter |
509 |
|
2883 |
03 Dec 12 |
peter |
510 |
bool soft_clipped(const BamRead& bam) |
2883 |
03 Dec 12 |
peter |
511 |
{ |
2883 |
03 Dec 12 |
peter |
512 |
return left_soft_clipped(bam) || right_soft_clipped(bam); |
2883 |
03 Dec 12 |
peter |
513 |
} |
2883 |
03 Dec 12 |
peter |
514 |
|
2883 |
03 Dec 12 |
peter |
515 |
|
2883 |
03 Dec 12 |
peter |
516 |
uint32_t left_soft_clipped(const BamRead& bam) |
2883 |
03 Dec 12 |
peter |
517 |
{ |
2883 |
03 Dec 12 |
peter |
518 |
for (size_t i=0; i<bam.core().n_cigar; ++i) { |
2883 |
03 Dec 12 |
peter |
519 |
if (bam.cigar_op(i) == BAM_CSOFT_CLIP) |
2883 |
03 Dec 12 |
peter |
520 |
return bam.cigar_oplen(i); |
2883 |
03 Dec 12 |
peter |
521 |
if (bam_cigar_type(bam.cigar_op(i))&1) |
2883 |
03 Dec 12 |
peter |
522 |
return 0; |
2883 |
03 Dec 12 |
peter |
523 |
} |
2883 |
03 Dec 12 |
peter |
524 |
return 0; |
2883 |
03 Dec 12 |
peter |
525 |
} |
2883 |
03 Dec 12 |
peter |
526 |
|
2883 |
03 Dec 12 |
peter |
527 |
|
2883 |
03 Dec 12 |
peter |
528 |
uint32_t right_soft_clipped(const BamRead& bam) |
2883 |
03 Dec 12 |
peter |
529 |
{ |
2883 |
03 Dec 12 |
peter |
530 |
if (bam.core().n_cigar==0) |
2883 |
03 Dec 12 |
peter |
531 |
return 0; |
2883 |
03 Dec 12 |
peter |
// right clip can't be at first (most left cigar element) |
2883 |
03 Dec 12 |
peter |
533 |
for (size_t i=bam.core().n_cigar-1; i>0; --i) { |
2883 |
03 Dec 12 |
peter |
534 |
if (bam.cigar_op(i) == BAM_CSOFT_CLIP) |
2883 |
03 Dec 12 |
peter |
535 |
return bam.cigar_oplen(i); |
2883 |
03 Dec 12 |
peter |
536 |
if (bam_cigar_type(bam.cigar_op(i))&1) |
2883 |
03 Dec 12 |
peter |
537 |
return 0; |
2883 |
03 Dec 12 |
peter |
538 |
} |
2883 |
03 Dec 12 |
peter |
539 |
return 0; |
2883 |
03 Dec 12 |
peter |
540 |
} |
2883 |
03 Dec 12 |
peter |
541 |
|
2883 |
03 Dec 12 |
peter |
542 |
|
2883 |
03 Dec 12 |
peter |
543 |
bool BamLessPos::operator()(const BamRead& lhs, const BamRead& rhs) const |
2883 |
03 Dec 12 |
peter |
544 |
{ |
2883 |
03 Dec 12 |
peter |
545 |
return lhs.tid() < rhs.tid() || |
2883 |
03 Dec 12 |
peter |
546 |
(lhs.tid() == rhs.tid() && lhs.pos() < rhs.pos()); |
2883 |
03 Dec 12 |
peter |
547 |
} |
2883 |
03 Dec 12 |
peter |
548 |
|
2883 |
03 Dec 12 |
peter |
549 |
|
2883 |
03 Dec 12 |
peter |
550 |
bool BamLessEnd::operator()(const BamRead& lhs, const BamRead& rhs) const |
2883 |
03 Dec 12 |
peter |
551 |
{ |
2883 |
03 Dec 12 |
peter |
552 |
return lhs.tid() < rhs.tid() || |
2883 |
03 Dec 12 |
peter |
553 |
(lhs.tid() == rhs.tid() && lhs.end() < rhs.end()); |
2883 |
03 Dec 12 |
peter |
554 |
} |
2883 |
03 Dec 12 |
peter |
555 |
|
3384 |
11 Mar 15 |
peter |
556 |
|
3384 |
11 Mar 15 |
peter |
557 |
bool BamLessName::operator()(const BamRead& lhs, const BamRead& rhs) const |
3384 |
11 Mar 15 |
peter |
558 |
{ |
3384 |
11 Mar 15 |
peter |
559 |
return less_query_name(lhs, rhs); |
3384 |
11 Mar 15 |
peter |
560 |
} |
3384 |
11 Mar 15 |
peter |
561 |
|
2883 |
03 Dec 12 |
peter |
562 |
}}} |