2883 |
03 Dec 12 |
peter |
1 |
#ifndef theplu_yat_omic_bam_header |
2883 |
03 Dec 12 |
peter |
2 |
#define theplu_yat_omic_bam_header |
2883 |
03 Dec 12 |
peter |
3 |
|
2883 |
03 Dec 12 |
peter |
// $Id$ |
2883 |
03 Dec 12 |
peter |
5 |
|
2993 |
03 Mar 13 |
peter |
6 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2020, 2023 Peter Johansson |
2993 |
03 Mar 13 |
peter |
8 |
|
2993 |
03 Mar 13 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2993 |
03 Mar 13 |
peter |
10 |
|
2993 |
03 Mar 13 |
peter |
The yat library is free software; you can redistribute it and/or |
2993 |
03 Mar 13 |
peter |
modify it under the terms of the GNU General Public License as |
2993 |
03 Mar 13 |
peter |
published by the Free Software Foundation; either version 3 of the |
2993 |
03 Mar 13 |
peter |
License, or (at your option) any later version. |
2993 |
03 Mar 13 |
peter |
15 |
|
2993 |
03 Mar 13 |
peter |
The yat library is distributed in the hope that it will be useful, |
2993 |
03 Mar 13 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2993 |
03 Mar 13 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2993 |
03 Mar 13 |
peter |
General Public License for more details. |
2993 |
03 Mar 13 |
peter |
20 |
|
2993 |
03 Mar 13 |
peter |
You should have received a copy of the GNU General Public License |
3752 |
17 Oct 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2993 |
03 Mar 13 |
peter |
23 |
*/ |
2993 |
03 Mar 13 |
peter |
24 |
|
3408 |
16 Apr 15 |
peter |
25 |
#include "yat/utility/config_public.h" |
3477 |
09 Mar 16 |
peter |
26 |
#include "yat/utility/stl_utility.h" |
3477 |
09 Mar 16 |
peter |
27 |
|
3883 |
24 Mar 20 |
peter |
28 |
#include <htslib/sam.h> |
2943 |
04 Jan 13 |
peter |
29 |
|
3477 |
09 Mar 16 |
peter |
30 |
#include <boost/iterator/transform_iterator.hpp> |
3477 |
09 Mar 16 |
peter |
31 |
|
3410 |
21 Apr 15 |
peter |
32 |
#include <map> |
2999 |
14 Mar 13 |
peter |
33 |
#include <string> |
2999 |
14 Mar 13 |
peter |
34 |
|
2883 |
03 Dec 12 |
peter |
35 |
namespace theplu { |
2883 |
03 Dec 12 |
peter |
36 |
namespace yat { |
2883 |
03 Dec 12 |
peter |
37 |
namespace omic { |
2883 |
03 Dec 12 |
peter |
38 |
|
2883 |
03 Dec 12 |
peter |
39 |
/** |
3352 |
21 Nov 14 |
peter |
\brief Wrapper around bam_hdr_t struct. |
2883 |
03 Dec 12 |
peter |
41 |
|
3188 |
25 Mar 14 |
peter |
Class is typically created via InBamFile::header(). |
2884 |
04 Dec 12 |
peter |
43 |
|
3188 |
25 Mar 14 |
peter |
It is possible to copy and assign a BamHeader, but note that a |
3188 |
25 Mar 14 |
peter |
BamHeader does not own underlying data. The underlying data is |
3188 |
25 Mar 14 |
peter |
owned by the InBamFile and the BamHeader is thus invalid after |
3188 |
25 Mar 14 |
peter |
the corresponding InBamFile has been destroyed. |
3188 |
25 Mar 14 |
peter |
48 |
|
2884 |
04 Dec 12 |
peter |
\since New in yat 0.10 |
2883 |
03 Dec 12 |
peter |
50 |
*/ |
2883 |
03 Dec 12 |
peter |
51 |
class BamHeader |
2883 |
03 Dec 12 |
peter |
52 |
{ |
3477 |
09 Mar 16 |
peter |
53 |
typedef std::map<std::string, std::string> strMap; |
3477 |
09 Mar 16 |
peter |
54 |
typedef std::map<std::string, strMap> strMap2; |
2883 |
03 Dec 12 |
peter |
55 |
public: |
2883 |
03 Dec 12 |
peter |
56 |
/** |
2883 |
03 Dec 12 |
peter |
\brief Default constructor |
2883 |
03 Dec 12 |
peter |
58 |
*/ |
2883 |
03 Dec 12 |
peter |
59 |
BamHeader(void); |
2883 |
03 Dec 12 |
peter |
60 |
|
2883 |
03 Dec 12 |
peter |
61 |
/** |
3408 |
16 Apr 15 |
peter |
\brief Destructor |
3408 |
16 Apr 15 |
peter |
63 |
*/ |
3408 |
16 Apr 15 |
peter |
64 |
~BamHeader(void); |
3408 |
16 Apr 15 |
peter |
65 |
|
3408 |
16 Apr 15 |
peter |
66 |
/** |
3408 |
16 Apr 15 |
peter |
\brief Copy constructor |
3408 |
16 Apr 15 |
peter |
68 |
*/ |
3408 |
16 Apr 15 |
peter |
69 |
BamHeader(const BamHeader&); |
3408 |
16 Apr 15 |
peter |
70 |
|
3408 |
16 Apr 15 |
peter |
71 |
/** |
3600 |
22 Jan 17 |
peter |
\brief Move constructor |
3600 |
22 Jan 17 |
peter |
73 |
*/ |
3691 |
14 Sep 17 |
peter |
74 |
BamHeader(BamHeader&&) noexcept; |
3600 |
22 Jan 17 |
peter |
75 |
|
3600 |
22 Jan 17 |
peter |
76 |
/** |
2999 |
14 Mar 13 |
peter |
Parse a region in the format: 'chr2:100,000-200,000 and return |
2999 |
14 Mar 13 |
peter |
values in variables \a tid, \a begin and \a end. \a reg is |
2999 |
14 Mar 13 |
peter |
1-based and \a begin and \a end are 0-based, i.e., |
3188 |
25 Mar 14 |
peter |
"chr2:100,000-200,000" will set \a begin = 99999 and \a end = |
2999 |
14 Mar 13 |
peter |
200000. |
2999 |
14 Mar 13 |
peter |
82 |
|
2999 |
14 Mar 13 |
peter |
\throw utility::runtime_error on failure |
2999 |
14 Mar 13 |
peter |
84 |
|
2999 |
14 Mar 13 |
peter |
\since new in yat 0.11 |
2999 |
14 Mar 13 |
peter |
86 |
*/ |
2999 |
14 Mar 13 |
peter |
87 |
void parse_region(const std::string& reg, int& tid, int& begin, |
2999 |
14 Mar 13 |
peter |
88 |
int& end) const; |
2999 |
14 Mar 13 |
peter |
89 |
|
2999 |
14 Mar 13 |
peter |
90 |
/** |
3410 |
21 Apr 15 |
peter |
\brief Access value in \c \@PG lines. |
3410 |
21 Apr 15 |
peter |
92 |
|
3410 |
21 Apr 15 |
peter |
A program group line in the header typically looks like |
3410 |
21 Apr 15 |
peter |
94 |
|
3410 |
21 Apr 15 |
peter |
\code @PG ID:bwa PN:bwa VN:0.6.1-r104 \endcode |
3410 |
21 Apr 15 |
peter |
96 |
|
3410 |
21 Apr 15 |
peter |
and for this line \c program_group("bwa", "VN") returns |
3410 |
21 Apr 15 |
peter |
\c "0.6.1-r104" |
3410 |
21 Apr 15 |
peter |
99 |
|
3410 |
21 Apr 15 |
peter |
\return value for \a key for line with ID \a id. |
3410 |
21 Apr 15 |
peter |
101 |
|
3410 |
21 Apr 15 |
peter |
\since New in yat 0.13 |
3410 |
21 Apr 15 |
peter |
103 |
*/ |
3410 |
21 Apr 15 |
peter |
104 |
const std::string& program_group(const std::string& id, |
3410 |
21 Apr 15 |
peter |
105 |
const std::string& key) const; |
3410 |
21 Apr 15 |
peter |
106 |
|
3410 |
21 Apr 15 |
peter |
107 |
/** |
4290 |
03 Feb 23 |
peter |
\returns a map describing an \@PG line such as |
4290 |
03 Feb 23 |
peter |
109 |
|
4290 |
03 Feb 23 |
peter |
\code @PG ID:bwa PN:bwa VN:0.6.1-r104 \endcode |
4290 |
03 Feb 23 |
peter |
111 |
|
4290 |
03 Feb 23 |
peter |
for which the returned map contains two entries: one with key |
4290 |
03 Feb 23 |
peter |
\c PN and value \c bwa, and one with key \c VN and value |
4290 |
03 Feb 23 |
peter |
\c 0.6.1-r104. |
4290 |
03 Feb 23 |
peter |
115 |
|
4290 |
03 Feb 23 |
peter |
\since New in yat 0.21 |
4290 |
03 Feb 23 |
peter |
117 |
*/ |
4290 |
03 Feb 23 |
peter |
118 |
const std::map<std::string, std::string>& |
4290 |
03 Feb 23 |
peter |
119 |
program_group(const std::string& id) const; |
4290 |
03 Feb 23 |
peter |
120 |
|
4290 |
03 Feb 23 |
peter |
121 |
/** |
4290 |
03 Feb 23 |
peter |
program_group_iterator is used to iterate through program groups |
4290 |
03 Feb 23 |
peter |
123 |
|
4290 |
03 Feb 23 |
peter |
program_group_iterator is a \readable_iterator and |
4290 |
03 Feb 23 |
peter |
\bidirectional_traversal_iterator with value type \c |
4290 |
03 Feb 23 |
peter |
std::string |
4290 |
03 Feb 23 |
peter |
127 |
|
4290 |
03 Feb 23 |
peter |
\see program_group_begin |
4290 |
03 Feb 23 |
peter |
\see program_group_end |
4290 |
03 Feb 23 |
peter |
130 |
|
4290 |
03 Feb 23 |
peter |
\since New in yat 0.21 |
4290 |
03 Feb 23 |
peter |
132 |
*/ |
4290 |
03 Feb 23 |
peter |
133 |
typedef boost::transform_iterator< |
4290 |
03 Feb 23 |
peter |
134 |
utility::PairFirst<const strMap2::value_type>, |
4290 |
03 Feb 23 |
peter |
135 |
strMap2::const_iterator> |
4290 |
03 Feb 23 |
peter |
136 |
program_group_iterator; |
4290 |
03 Feb 23 |
peter |
137 |
|
4290 |
03 Feb 23 |
peter |
138 |
/** |
4290 |
03 Feb 23 |
peter |
\return first of available PG IDs |
4290 |
03 Feb 23 |
peter |
140 |
|
4290 |
03 Feb 23 |
peter |
\since New in yat 0.21 |
4290 |
03 Feb 23 |
peter |
142 |
*/ |
4290 |
03 Feb 23 |
peter |
143 |
program_group_iterator program_group_begin(void) const; |
4290 |
03 Feb 23 |
peter |
144 |
|
4290 |
03 Feb 23 |
peter |
145 |
/** |
4290 |
03 Feb 23 |
peter |
\return end of range of available PG IDs |
4290 |
03 Feb 23 |
peter |
147 |
|
4290 |
03 Feb 23 |
peter |
\see program_group_begin() |
4290 |
03 Feb 23 |
peter |
149 |
|
4290 |
03 Feb 23 |
peter |
\since New in yat 0.21 |
4290 |
03 Feb 23 |
peter |
151 |
*/ |
4290 |
03 Feb 23 |
peter |
152 |
program_group_iterator program_group_end(void) const; |
4290 |
03 Feb 23 |
peter |
153 |
|
4290 |
03 Feb 23 |
peter |
154 |
/** |
3410 |
21 Apr 15 |
peter |
\brief Access value in \c \@RG lines. |
3410 |
21 Apr 15 |
peter |
156 |
|
3410 |
21 Apr 15 |
peter |
A read group line in the header typically looks like |
3410 |
21 Apr 15 |
peter |
158 |
|
3410 |
21 Apr 15 |
peter |
\code @RG ID:foo PL:ILLUMINA SM:Tumour \endcode |
3410 |
21 Apr 15 |
peter |
160 |
|
3410 |
21 Apr 15 |
peter |
and for this line \c read_group("foo", "SM") returns \c "Tumour" |
3410 |
21 Apr 15 |
peter |
162 |
|
3477 |
09 Mar 16 |
peter |
To see which IDs are available in the header, iterate through |
3477 |
09 Mar 16 |
peter |
the set of IDs with read_group_begin() (and read_group_end()). |
3477 |
09 Mar 16 |
peter |
165 |
|
3410 |
21 Apr 15 |
peter |
\return value for \a key for line with ID \a id. |
3410 |
21 Apr 15 |
peter |
167 |
|
3410 |
21 Apr 15 |
peter |
\since New in yat 0.13 |
3410 |
21 Apr 15 |
peter |
169 |
*/ |
3410 |
21 Apr 15 |
peter |
170 |
const std::string& read_group(const std::string& id, |
3410 |
21 Apr 15 |
peter |
171 |
const std::string& key) const; |
3410 |
21 Apr 15 |
peter |
172 |
|
3410 |
21 Apr 15 |
peter |
173 |
/** |
3477 |
09 Mar 16 |
peter |
\returns a map describing an \@RG line such as |
3477 |
09 Mar 16 |
peter |
175 |
|
3477 |
09 Mar 16 |
peter |
\code @RG ID:foo PL:ILLUMINA SM:Tumour \endcode |
3477 |
09 Mar 16 |
peter |
177 |
|
3477 |
09 Mar 16 |
peter |
for which the returned map contains two entries: one with key |
3477 |
09 Mar 16 |
peter |
\c PL and value \c ILLUMINA, and one with key \c SM and value |
3477 |
09 Mar 16 |
peter |
\c Tumour. |
3477 |
09 Mar 16 |
peter |
181 |
|
3477 |
09 Mar 16 |
peter |
\since New in yat 0.14 |
3477 |
09 Mar 16 |
peter |
183 |
*/ |
3477 |
09 Mar 16 |
peter |
184 |
const std::map<std::string, std::string>& |
3477 |
09 Mar 16 |
peter |
185 |
read_group(const std::string& id) const; |
3477 |
09 Mar 16 |
peter |
186 |
|
3477 |
09 Mar 16 |
peter |
187 |
/** |
3477 |
09 Mar 16 |
peter |
read_group_iterator is used to iterate through read groups |
3477 |
09 Mar 16 |
peter |
189 |
|
3599 |
22 Jan 17 |
peter |
read_group_iterator is a \readable_iterator and |
3599 |
22 Jan 17 |
peter |
\bidirectional_traversal_iterator with value type \c |
3599 |
22 Jan 17 |
peter |
std::string |
3599 |
22 Jan 17 |
peter |
193 |
|
3477 |
09 Mar 16 |
peter |
\see read_group_begin |
3477 |
09 Mar 16 |
peter |
\see read_group_end |
3477 |
09 Mar 16 |
peter |
196 |
|
3477 |
09 Mar 16 |
peter |
\since New in yat 0.14 |
3477 |
09 Mar 16 |
peter |
198 |
*/ |
3477 |
09 Mar 16 |
peter |
199 |
typedef boost::transform_iterator< |
3477 |
09 Mar 16 |
peter |
200 |
utility::PairFirst<const strMap2::value_type>, |
3477 |
09 Mar 16 |
peter |
201 |
strMap2::const_iterator> |
3477 |
09 Mar 16 |
peter |
202 |
read_group_iterator; |
3477 |
09 Mar 16 |
peter |
203 |
|
3477 |
09 Mar 16 |
peter |
204 |
/** |
3477 |
09 Mar 16 |
peter |
\return first of available RG IDs |
3477 |
09 Mar 16 |
peter |
206 |
|
3477 |
09 Mar 16 |
peter |
\since New in yat 0.14 |
3477 |
09 Mar 16 |
peter |
208 |
*/ |
3477 |
09 Mar 16 |
peter |
209 |
read_group_iterator read_group_begin(void) const; |
3477 |
09 Mar 16 |
peter |
210 |
|
3477 |
09 Mar 16 |
peter |
211 |
/** |
3477 |
09 Mar 16 |
peter |
\return end of range of available RG IDs |
3477 |
09 Mar 16 |
peter |
213 |
|
3477 |
09 Mar 16 |
peter |
\see read_group_begin() |
3477 |
09 Mar 16 |
peter |
215 |
|
3477 |
09 Mar 16 |
peter |
\since New in yat 0.14 |
3477 |
09 Mar 16 |
peter |
217 |
*/ |
3477 |
09 Mar 16 |
peter |
218 |
read_group_iterator read_group_end(void) const; |
3477 |
09 Mar 16 |
peter |
219 |
|
3477 |
09 Mar 16 |
peter |
220 |
/** |
3408 |
16 Apr 15 |
peter |
\brief Exchanges the content in \c *this and \a other |
3408 |
16 Apr 15 |
peter |
222 |
|
3408 |
16 Apr 15 |
peter |
\since New in yat 0.13 |
3408 |
16 Apr 15 |
peter |
224 |
*/ |
3408 |
16 Apr 15 |
peter |
225 |
void swap(BamHeader& other); |
3408 |
16 Apr 15 |
peter |
226 |
|
3408 |
16 Apr 15 |
peter |
227 |
/** |
2883 |
03 Dec 12 |
peter |
Name of chromosome with ID \a tid |
2883 |
03 Dec 12 |
peter |
229 |
*/ |
2883 |
03 Dec 12 |
peter |
230 |
const char* target_name(size_t tid) const; |
2883 |
03 Dec 12 |
peter |
231 |
|
2883 |
03 Dec 12 |
peter |
232 |
/** |
2883 |
03 Dec 12 |
peter |
Length of chromosome with ID \a tid |
2883 |
03 Dec 12 |
peter |
234 |
*/ |
2883 |
03 Dec 12 |
peter |
235 |
uint32_t target_length(size_t tid) const; |
2883 |
03 Dec 12 |
peter |
236 |
|
2883 |
03 Dec 12 |
peter |
237 |
/** |
3408 |
16 Apr 15 |
peter |
\return text in header |
3408 |
16 Apr 15 |
peter |
239 |
|
3408 |
16 Apr 15 |
peter |
\since New in yat 0.13 |
3408 |
16 Apr 15 |
peter |
241 |
*/ |
3408 |
16 Apr 15 |
peter |
242 |
std::string text(void) const; |
3408 |
16 Apr 15 |
peter |
243 |
|
3408 |
16 Apr 15 |
peter |
244 |
/** |
3408 |
16 Apr 15 |
peter |
\brief set text in header |
3408 |
16 Apr 15 |
peter |
246 |
|
3408 |
16 Apr 15 |
peter |
This function parses \a txt and updates fields. |
3408 |
16 Apr 15 |
peter |
248 |
|
3479 |
10 Mar 16 |
peter |
Function invalidates object's iterators and reference returned |
3479 |
10 Mar 16 |
peter |
from read_group(const std::string&). |
3479 |
10 Mar 16 |
peter |
251 |
|
3408 |
16 Apr 15 |
peter |
\since New in yat 0.13 |
3408 |
16 Apr 15 |
peter |
253 |
*/ |
3408 |
16 Apr 15 |
peter |
254 |
void text(const std::string& txt); |
3408 |
16 Apr 15 |
peter |
255 |
|
3408 |
16 Apr 15 |
peter |
256 |
/** |
2999 |
14 Mar 13 |
peter |
\brief inverse of target_name(size_t) |
2999 |
14 Mar 13 |
peter |
258 |
|
4169 |
25 Mar 22 |
peter |
If \a name does not exist, a utility::runtime_error is thrown. |
2999 |
14 Mar 13 |
peter |
260 |
|
2999 |
14 Mar 13 |
peter |
\since new in yat 0.11 |
2999 |
14 Mar 13 |
peter |
262 |
*/ |
2999 |
14 Mar 13 |
peter |
263 |
int32_t tid(const std::string& name) const; |
2999 |
14 Mar 13 |
peter |
264 |
|
2999 |
14 Mar 13 |
peter |
265 |
/** |
3363 |
25 Nov 14 |
peter |
\return Number of chromosomes |
2883 |
03 Dec 12 |
peter |
267 |
*/ |
2883 |
03 Dec 12 |
peter |
268 |
int32_t n_targets(void) const; |
3408 |
16 Apr 15 |
peter |
269 |
|
3408 |
16 Apr 15 |
peter |
270 |
/** |
3408 |
16 Apr 15 |
peter |
\brief assignment operator |
3408 |
16 Apr 15 |
peter |
272 |
*/ |
3408 |
16 Apr 15 |
peter |
273 |
BamHeader& operator=(const BamHeader& rhs); |
3408 |
16 Apr 15 |
peter |
274 |
|
3600 |
22 Jan 17 |
peter |
275 |
/** |
3600 |
22 Jan 17 |
peter |
\brief move assignment operator |
3600 |
22 Jan 17 |
peter |
277 |
*/ |
3600 |
22 Jan 17 |
peter |
278 |
BamHeader& operator=(BamHeader&& rhs); |
3600 |
22 Jan 17 |
peter |
279 |
|
2883 |
03 Dec 12 |
peter |
280 |
private: |
3352 |
21 Nov 14 |
peter |
281 |
bam_hdr_t* header_; |
3410 |
21 Apr 15 |
peter |
282 |
mutable strMap2 read_group_; |
3410 |
21 Apr 15 |
peter |
283 |
mutable strMap2 program_group_; |
2883 |
03 Dec 12 |
peter |
284 |
|
4272 |
24 Jan 23 |
peter |
285 |
friend class BamReadIterator; |
2883 |
03 Dec 12 |
peter |
286 |
friend class InBamFile; |
2883 |
03 Dec 12 |
peter |
287 |
friend class OutBamFile; |
2883 |
03 Dec 12 |
peter |
288 |
|
4290 |
03 Feb 23 |
peter |
289 |
const std::map<std::string, std::string>& |
4290 |
03 Feb 23 |
peter |
290 |
group(strMap2&, const std::string& type, |
4290 |
03 Feb 23 |
peter |
291 |
const std::string& id) const; |
4290 |
03 Feb 23 |
peter |
292 |
|
4290 |
03 Feb 23 |
peter |
293 |
read_group_iterator |
4290 |
03 Feb 23 |
peter |
294 |
group_begin(strMap2&, const std::string& type) const; |
4290 |
03 Feb 23 |
peter |
295 |
|
4290 |
03 Feb 23 |
peter |
296 |
read_group_iterator |
4290 |
03 Feb 23 |
peter |
297 |
group_end(strMap2&, const std::string& type) const; |
4290 |
03 Feb 23 |
peter |
298 |
|
3410 |
21 Apr 15 |
peter |
299 |
const std::string& group(strMap2& map, const std::string& type, |
3410 |
21 Apr 15 |
peter |
300 |
const std::string& id, |
3410 |
21 Apr 15 |
peter |
301 |
const std::string& key) const; |
3410 |
21 Apr 15 |
peter |
302 |
|
3477 |
09 Mar 16 |
peter |
303 |
void update_group(strMap2& map, const std::string& type) const; |
2883 |
03 Dec 12 |
peter |
304 |
}; |
2883 |
03 Dec 12 |
peter |
305 |
|
3408 |
16 Apr 15 |
peter |
306 |
/** |
3408 |
16 Apr 15 |
peter |
Exchanges the content in the headers. |
3408 |
16 Apr 15 |
peter |
308 |
|
3408 |
16 Apr 15 |
peter |
\since New in yat 0.13 |
3408 |
16 Apr 15 |
peter |
310 |
|
3408 |
16 Apr 15 |
peter |
\relates BamHeader |
3408 |
16 Apr 15 |
peter |
312 |
*/ |
3408 |
16 Apr 15 |
peter |
313 |
void swap(BamHeader& lhs, BamHeader& rhs); |
3408 |
16 Apr 15 |
peter |
314 |
|
2883 |
03 Dec 12 |
peter |
315 |
}}} |
2883 |
03 Dec 12 |
peter |
316 |
#endif |