2883 |
03 Dec 12 |
peter |
// $Id$ |
2883 |
03 Dec 12 |
peter |
2 |
|
2993 |
03 Mar 13 |
peter |
3 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2020, 2023 Peter Johansson |
2993 |
03 Mar 13 |
peter |
5 |
|
2993 |
03 Mar 13 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2993 |
03 Mar 13 |
peter |
7 |
|
2993 |
03 Mar 13 |
peter |
The yat library is free software; you can redistribute it and/or |
2993 |
03 Mar 13 |
peter |
modify it under the terms of the GNU General Public License as |
2993 |
03 Mar 13 |
peter |
published by the Free Software Foundation; either version 3 of the |
2993 |
03 Mar 13 |
peter |
License, or (at your option) any later version. |
2993 |
03 Mar 13 |
peter |
12 |
|
2993 |
03 Mar 13 |
peter |
The yat library is distributed in the hope that it will be useful, |
2993 |
03 Mar 13 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2993 |
03 Mar 13 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2993 |
03 Mar 13 |
peter |
General Public License for more details. |
2993 |
03 Mar 13 |
peter |
17 |
|
2993 |
03 Mar 13 |
peter |
You should have received a copy of the GNU General Public License |
3752 |
17 Oct 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2993 |
03 Mar 13 |
peter |
20 |
*/ |
2993 |
03 Mar 13 |
peter |
21 |
|
2883 |
03 Dec 12 |
peter |
22 |
#include <config.h> |
2883 |
03 Dec 12 |
peter |
23 |
|
2883 |
03 Dec 12 |
peter |
24 |
#include "BamFile.h" |
2883 |
03 Dec 12 |
peter |
25 |
#include "BamHeader.h" |
3484 |
23 Mar 16 |
peter |
26 |
#include "BamReadIterator.h" |
2883 |
03 Dec 12 |
peter |
27 |
|
2897 |
11 Dec 12 |
peter |
28 |
#include "yat/utility/Exception.h" |
2897 |
11 Dec 12 |
peter |
29 |
|
4278 |
26 Jan 23 |
peter |
30 |
#include <htslib/hts.h> |
3883 |
24 Mar 20 |
peter |
31 |
#include <htslib/sam.h> |
2883 |
03 Dec 12 |
peter |
32 |
|
3048 |
12 Jun 13 |
peter |
33 |
#include <cassert> |
2883 |
03 Dec 12 |
peter |
34 |
#include <stdexcept> |
2883 |
03 Dec 12 |
peter |
35 |
#include <string> |
2883 |
03 Dec 12 |
peter |
36 |
#include <sstream> |
2883 |
03 Dec 12 |
peter |
37 |
|
2883 |
03 Dec 12 |
peter |
38 |
namespace theplu { |
2883 |
03 Dec 12 |
peter |
39 |
namespace yat { |
2883 |
03 Dec 12 |
peter |
40 |
namespace omic { |
2883 |
03 Dec 12 |
peter |
41 |
|
4278 |
26 Jan 23 |
peter |
// some workaround code for buggy htslib 1.10 (see ticket #990) |
4278 |
26 Jan 23 |
peter |
// This can be removed when we require 1.11 |
4278 |
26 Jan 23 |
peter |
44 |
namespace detail { |
4278 |
26 Jan 23 |
peter |
45 |
|
4278 |
26 Jan 23 |
peter |
// This function is only used here and not part of the API; we |
4278 |
26 Jan 23 |
peter |
// declare it here and avoid introducing a header file not part of |
4278 |
26 Jan 23 |
peter |
// the installation. |
4278 |
26 Jan 23 |
peter |
49 |
int sam_itr_next(htsFile *htsfp, hts_itr_t *itr, bam1_t *r); |
4278 |
26 Jan 23 |
peter |
50 |
|
4278 |
26 Jan 23 |
peter |
51 |
} // end of namespace detail |
4278 |
26 Jan 23 |
peter |
52 |
|
4278 |
26 Jan 23 |
peter |
53 |
|
2883 |
03 Dec 12 |
peter |
54 |
InBamFile::InBamFile(void) |
2895 |
10 Dec 12 |
peter |
55 |
: super_t(), index_(NULL) |
2883 |
03 Dec 12 |
peter |
56 |
{} |
2883 |
03 Dec 12 |
peter |
57 |
|
2883 |
03 Dec 12 |
peter |
58 |
|
2883 |
03 Dec 12 |
peter |
59 |
InBamFile::InBamFile(const std::string& fn) |
2895 |
10 Dec 12 |
peter |
60 |
: super_t(), index_(NULL) |
2883 |
03 Dec 12 |
peter |
61 |
{ |
2883 |
03 Dec 12 |
peter |
62 |
open(fn); |
2883 |
03 Dec 12 |
peter |
63 |
} |
2883 |
03 Dec 12 |
peter |
64 |
|
2883 |
03 Dec 12 |
peter |
65 |
|
2883 |
03 Dec 12 |
peter |
66 |
InBamFile::~InBamFile(void) |
2883 |
03 Dec 12 |
peter |
67 |
{ |
3353 |
22 Nov 14 |
peter |
68 |
hts_idx_destroy(index_); |
2883 |
03 Dec 12 |
peter |
69 |
} |
2883 |
03 Dec 12 |
peter |
70 |
|
2883 |
03 Dec 12 |
peter |
71 |
|
3674 |
01 Aug 17 |
peter |
72 |
void InBamFile::build_index(void) const |
3674 |
01 Aug 17 |
peter |
73 |
{ |
3674 |
01 Aug 17 |
peter |
74 |
build_index_base(); |
3674 |
01 Aug 17 |
peter |
75 |
} |
3674 |
01 Aug 17 |
peter |
76 |
|
3674 |
01 Aug 17 |
peter |
77 |
|
3464 |
09 Feb 16 |
peter |
78 |
uint64_t InBamFile::get_idx_stat(int tid, bool return_mapped) const |
3464 |
09 Feb 16 |
peter |
79 |
{ |
3464 |
09 Feb 16 |
peter |
80 |
uint64_t mapped; |
3464 |
09 Feb 16 |
peter |
81 |
uint64_t unmapped; |
3698 |
25 Sep 17 |
peter |
82 |
if (tid >= header().n_targets()) |
3464 |
09 Feb 16 |
peter |
83 |
throw utility::runtime_error("InBamFile::get_idx_stat"); |
3698 |
25 Sep 17 |
peter |
84 |
hts_idx_get_stat(index(), tid, &mapped, &unmapped); |
3464 |
09 Feb 16 |
peter |
85 |
if (return_mapped) |
3464 |
09 Feb 16 |
peter |
86 |
return mapped; |
3464 |
09 Feb 16 |
peter |
87 |
return unmapped; |
3464 |
09 Feb 16 |
peter |
88 |
} |
3464 |
09 Feb 16 |
peter |
89 |
|
3464 |
09 Feb 16 |
peter |
90 |
|
2883 |
03 Dec 12 |
peter |
91 |
const BamHeader& InBamFile::header(void) const |
2883 |
03 Dec 12 |
peter |
92 |
{ |
2883 |
03 Dec 12 |
peter |
93 |
return header_; |
2883 |
03 Dec 12 |
peter |
94 |
} |
2883 |
03 Dec 12 |
peter |
95 |
|
2883 |
03 Dec 12 |
peter |
96 |
|
3353 |
22 Nov 14 |
peter |
97 |
const InBamFile::index_type* InBamFile::index(void) const |
2883 |
03 Dec 12 |
peter |
98 |
{ |
2883 |
03 Dec 12 |
peter |
99 |
if (!index_) { |
3353 |
22 Nov 14 |
peter |
100 |
index_ = hts_idx_load(filename().c_str(), HTS_FMT_BAI); |
2883 |
03 Dec 12 |
peter |
101 |
if (!index_) { |
2883 |
03 Dec 12 |
peter |
102 |
std::ostringstream ss; |
2883 |
03 Dec 12 |
peter |
103 |
ss << "cannot load bam index '" << filename() << ".bai'"; |
2897 |
11 Dec 12 |
peter |
104 |
throw utility::runtime_error(ss.str()); |
2883 |
03 Dec 12 |
peter |
105 |
} |
2883 |
03 Dec 12 |
peter |
106 |
} |
2883 |
03 Dec 12 |
peter |
107 |
return index_; |
2883 |
03 Dec 12 |
peter |
108 |
} |
2883 |
03 Dec 12 |
peter |
109 |
|
2883 |
03 Dec 12 |
peter |
110 |
|
3464 |
09 Feb 16 |
peter |
111 |
uint64_t InBamFile::n_mapped(int tid) const |
3464 |
09 Feb 16 |
peter |
112 |
{ |
3464 |
09 Feb 16 |
peter |
113 |
return get_idx_stat(tid, true); |
3464 |
09 Feb 16 |
peter |
114 |
} |
3464 |
09 Feb 16 |
peter |
115 |
|
3464 |
09 Feb 16 |
peter |
116 |
|
3464 |
09 Feb 16 |
peter |
117 |
uint64_t InBamFile::n_unmapped(int tid) const |
3464 |
09 Feb 16 |
peter |
118 |
{ |
3464 |
09 Feb 16 |
peter |
119 |
return get_idx_stat(tid, false); |
3464 |
09 Feb 16 |
peter |
120 |
} |
3464 |
09 Feb 16 |
peter |
121 |
|
3464 |
09 Feb 16 |
peter |
122 |
|
3464 |
09 Feb 16 |
peter |
123 |
uint64_t InBamFile::n_no_coordinate(void) const |
3464 |
09 Feb 16 |
peter |
124 |
{ |
3464 |
09 Feb 16 |
peter |
125 |
return hts_idx_get_n_no_coor(index()); |
3464 |
09 Feb 16 |
peter |
126 |
} |
3464 |
09 Feb 16 |
peter |
127 |
|
3464 |
09 Feb 16 |
peter |
128 |
|
2883 |
03 Dec 12 |
peter |
129 |
void InBamFile::open(const std::string& fn) |
2883 |
03 Dec 12 |
peter |
130 |
{ |
3883 |
24 Mar 20 |
peter |
131 |
open_base(fn, "rb"); |
3357 |
23 Nov 14 |
peter |
132 |
header_.header_ = sam_hdr_read(sf_); |
3079 |
11 Sep 13 |
peter |
133 |
if (header_.header_ == NULL) { |
3079 |
11 Sep 13 |
peter |
134 |
std::ostringstream ss; |
3079 |
11 Sep 13 |
peter |
135 |
ss << "failed to read header from '" << filename() << "'"; |
3079 |
11 Sep 13 |
peter |
136 |
throw utility::runtime_error(ss.str()); |
3079 |
11 Sep 13 |
peter |
137 |
} |
2883 |
03 Dec 12 |
peter |
138 |
} |
2883 |
03 Dec 12 |
peter |
139 |
|
2883 |
03 Dec 12 |
peter |
140 |
|
2883 |
03 Dec 12 |
peter |
141 |
bool InBamFile::read(BamRead& read) |
2883 |
03 Dec 12 |
peter |
142 |
{ |
3353 |
22 Nov 14 |
peter |
143 |
int result_ = sam_read1(sf_, header_.header_, read.bam_); |
2883 |
03 Dec 12 |
peter |
144 |
if (result_<-1) { |
2883 |
03 Dec 12 |
peter |
145 |
std::ostringstream ss; |
3080 |
11 Sep 13 |
peter |
146 |
ss << "InBamFile::read(1): invalid bam file '" << filename() << "'"; |
2883 |
03 Dec 12 |
peter |
147 |
if (result_ == -2) |
2883 |
03 Dec 12 |
peter |
148 |
ss << " truncated file"; |
2883 |
03 Dec 12 |
peter |
149 |
else |
2883 |
03 Dec 12 |
peter |
150 |
ss << " error: " << result_; |
2897 |
11 Dec 12 |
peter |
151 |
throw utility::runtime_error(ss.str()); |
2883 |
03 Dec 12 |
peter |
152 |
} |
2883 |
03 Dec 12 |
peter |
153 |
return result_>=0; |
2883 |
03 Dec 12 |
peter |
154 |
} |
2883 |
03 Dec 12 |
peter |
155 |
|
2883 |
03 Dec 12 |
peter |
156 |
|
3353 |
22 Nov 14 |
peter |
157 |
bool InBamFile::read(BamRead& read, hts_itr_t* iter) |
3353 |
22 Nov 14 |
peter |
158 |
{ |
4278 |
26 Jan 23 |
peter |
159 |
assert(iter); |
4278 |
26 Jan 23 |
peter |
160 |
assert(sf_->is_bgzf); |
4278 |
26 Jan 23 |
peter |
161 |
#if HTS_VERSION < 101100 |
4278 |
26 Jan 23 |
peter |
162 |
int result = detail::sam_itr_next(sf_, iter, read.bam_); |
4278 |
26 Jan 23 |
peter |
163 |
#else |
3358 |
23 Nov 14 |
peter |
164 |
int result = sam_itr_next(sf_, iter, read.bam_); |
4278 |
26 Jan 23 |
peter |
165 |
#endif |
3358 |
23 Nov 14 |
peter |
166 |
if (result<-1) { |
2883 |
03 Dec 12 |
peter |
167 |
std::ostringstream ss; |
3693 |
23 Sep 17 |
peter |
168 |
ss << "BamRead::read(2): invalid bam file '" << filename() << "'"; |
3358 |
23 Nov 14 |
peter |
169 |
if (result == -2) |
2883 |
03 Dec 12 |
peter |
170 |
ss << " truncated file"; |
2883 |
03 Dec 12 |
peter |
171 |
else |
3358 |
23 Nov 14 |
peter |
172 |
ss << " error: " << result; |
2897 |
11 Dec 12 |
peter |
173 |
throw utility::runtime_error(ss.str()); |
2883 |
03 Dec 12 |
peter |
174 |
} |
3358 |
23 Nov 14 |
peter |
175 |
return result>=0; |
2883 |
03 Dec 12 |
peter |
176 |
} |
2883 |
03 Dec 12 |
peter |
177 |
|
2883 |
03 Dec 12 |
peter |
178 |
|
2883 |
03 Dec 12 |
peter |
179 |
OutBamFile::OutBamFile(void) |
2883 |
03 Dec 12 |
peter |
180 |
: super_t() {} |
2883 |
03 Dec 12 |
peter |
181 |
|
2883 |
03 Dec 12 |
peter |
182 |
|
2883 |
03 Dec 12 |
peter |
183 |
OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header) |
2883 |
03 Dec 12 |
peter |
184 |
{ |
2883 |
03 Dec 12 |
peter |
185 |
open(fn, header); |
2883 |
03 Dec 12 |
peter |
186 |
} |
2883 |
03 Dec 12 |
peter |
187 |
|
2883 |
03 Dec 12 |
peter |
188 |
|
3160 |
13 Jan 14 |
peter |
189 |
OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header, |
3160 |
13 Jan 14 |
peter |
190 |
unsigned int c) |
3160 |
13 Jan 14 |
peter |
191 |
{ |
3160 |
13 Jan 14 |
peter |
192 |
open(fn, header, c); |
3160 |
13 Jan 14 |
peter |
193 |
} |
3160 |
13 Jan 14 |
peter |
194 |
|
3160 |
13 Jan 14 |
peter |
195 |
|
3674 |
01 Aug 17 |
peter |
196 |
void OutBamFile::build_index(void) const |
3674 |
01 Aug 17 |
peter |
197 |
{ |
3674 |
01 Aug 17 |
peter |
198 |
if (is_open()) { |
3674 |
01 Aug 17 |
peter |
199 |
std::ostringstream msg; |
3674 |
01 Aug 17 |
peter |
200 |
msg << "failed building index for '" << filename() << "': file is open"; |
3674 |
01 Aug 17 |
peter |
201 |
throw utility::runtime_error(msg.str()); |
3674 |
01 Aug 17 |
peter |
202 |
} |
3674 |
01 Aug 17 |
peter |
203 |
build_index_base(); |
3674 |
01 Aug 17 |
peter |
204 |
} |
3674 |
01 Aug 17 |
peter |
205 |
|
3674 |
01 Aug 17 |
peter |
206 |
|
2883 |
03 Dec 12 |
peter |
207 |
void OutBamFile::open(const std::string& fn, const BamHeader& h) |
2883 |
03 Dec 12 |
peter |
208 |
{ |
3463 |
03 Feb 16 |
peter |
209 |
if (!h.header_) |
3463 |
03 Feb 16 |
peter |
210 |
throw utility::runtime_error("OutBamFile::open with null header"); |
3630 |
14 Mar 17 |
peter |
211 |
open(fn, std::string("wb"), h); |
2883 |
03 Dec 12 |
peter |
212 |
} |
2883 |
03 Dec 12 |
peter |
213 |
|
2883 |
03 Dec 12 |
peter |
214 |
|
3160 |
13 Jan 14 |
peter |
215 |
void OutBamFile::open(const std::string& fn, const BamHeader& h, |
3160 |
13 Jan 14 |
peter |
216 |
unsigned int compression) |
3160 |
13 Jan 14 |
peter |
217 |
{ |
3160 |
13 Jan 14 |
peter |
218 |
std::string mode("wb"); |
3160 |
13 Jan 14 |
peter |
219 |
if (compression > 9) { |
3160 |
13 Jan 14 |
peter |
220 |
std::stringstream oss; |
3160 |
13 Jan 14 |
peter |
221 |
oss << "OutBamFile::open( " << fn << ", <header>, " << compression |
3160 |
13 Jan 14 |
peter |
222 |
<< "): incorrect compression level\n"; |
3160 |
13 Jan 14 |
peter |
223 |
throw std::invalid_argument(oss.str()); |
3160 |
13 Jan 14 |
peter |
224 |
} |
3357 |
23 Nov 14 |
peter |
225 |
if (compression) |
3357 |
23 Nov 14 |
peter |
226 |
mode.push_back('0'+compression); |
3357 |
23 Nov 14 |
peter |
227 |
else |
3357 |
23 Nov 14 |
peter |
228 |
mode = "wu"; |
3630 |
14 Mar 17 |
peter |
229 |
open(fn, mode, h); |
3630 |
14 Mar 17 |
peter |
230 |
} |
3630 |
14 Mar 17 |
peter |
231 |
|
3630 |
14 Mar 17 |
peter |
232 |
|
3630 |
14 Mar 17 |
peter |
233 |
void OutBamFile::open(const std::string& fn, const std::string& mode, |
3630 |
14 Mar 17 |
peter |
234 |
const BamHeader& h) |
3630 |
14 Mar 17 |
peter |
235 |
{ |
3883 |
24 Mar 20 |
peter |
236 |
open_base(fn, mode); |
3630 |
14 Mar 17 |
peter |
237 |
if (sam_hdr_write(sf_, h.header_)) { |
3630 |
14 Mar 17 |
peter |
238 |
std::ostringstream ss; |
3630 |
14 Mar 17 |
peter |
239 |
ss << "failed open '" << fn << "'"; |
3630 |
14 Mar 17 |
peter |
240 |
throw utility::runtime_error(ss.str()); |
3630 |
14 Mar 17 |
peter |
241 |
} |
3160 |
13 Jan 14 |
peter |
242 |
} |
3160 |
13 Jan 14 |
peter |
243 |
|
3160 |
13 Jan 14 |
peter |
244 |
|
2883 |
03 Dec 12 |
peter |
245 |
void OutBamFile::write(const BamRead& read) |
2883 |
03 Dec 12 |
peter |
246 |
{ |
3359 |
23 Nov 14 |
peter |
247 |
assert(read.bam_); |
3359 |
23 Nov 14 |
peter |
248 |
if (is_open() && bam_write1(sf_->fp.bgzf, read.bam_)>0) |
3359 |
23 Nov 14 |
peter |
249 |
return; |
3359 |
23 Nov 14 |
peter |
250 |
throw OutBamFile::error(read); |
2883 |
03 Dec 12 |
peter |
251 |
} |
2883 |
03 Dec 12 |
peter |
252 |
|
3166 |
17 Jan 14 |
peter |
253 |
|
3166 |
17 Jan 14 |
peter |
// OutBamFile::error |
3166 |
17 Jan 14 |
peter |
255 |
|
3166 |
17 Jan 14 |
peter |
256 |
OutBamFile::error::error(const BamRead& r) |
3692 |
14 Sep 17 |
peter |
257 |
: IO_error("OutBamFile::write failed"), read_(r) {} |
3166 |
17 Jan 14 |
peter |
258 |
|
3166 |
17 Jan 14 |
peter |
259 |
|
3166 |
17 Jan 14 |
peter |
260 |
OutBamFile::error::~error(void) throw() {} |
3166 |
17 Jan 14 |
peter |
261 |
|
3166 |
17 Jan 14 |
peter |
262 |
|
3166 |
17 Jan 14 |
peter |
263 |
const BamRead& OutBamFile::error::read(void) const |
3166 |
17 Jan 14 |
peter |
264 |
{ return read_; } |
3166 |
17 Jan 14 |
peter |
265 |
|
2883 |
03 Dec 12 |
peter |
266 |
}}} |