3747 |
14 Aug 18 |
peter |
// $Id$ |
3747 |
14 Aug 18 |
peter |
2 |
|
3747 |
14 Aug 18 |
peter |
3 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2018, 2019, 2020, 2022 Peter Johansson |
3747 |
14 Aug 18 |
peter |
5 |
|
3747 |
14 Aug 18 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
3747 |
14 Aug 18 |
peter |
7 |
|
3747 |
14 Aug 18 |
peter |
The yat library is free software; you can redistribute it and/or |
3747 |
14 Aug 18 |
peter |
modify it under the terms of the GNU General Public License as |
3747 |
14 Aug 18 |
peter |
published by the Free Software Foundation; either version 3 of the |
3747 |
14 Aug 18 |
peter |
License, or (at your option) any later version. |
3747 |
14 Aug 18 |
peter |
12 |
|
3747 |
14 Aug 18 |
peter |
The yat library is distributed in the hope that it will be useful, |
3747 |
14 Aug 18 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
3747 |
14 Aug 18 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3747 |
14 Aug 18 |
peter |
General Public License for more details. |
3747 |
14 Aug 18 |
peter |
17 |
|
3747 |
14 Aug 18 |
peter |
You should have received a copy of the GNU General Public License |
3755 |
17 Oct 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
3747 |
14 Aug 18 |
peter |
20 |
*/ |
3747 |
14 Aug 18 |
peter |
21 |
|
3747 |
14 Aug 18 |
peter |
22 |
#include <config.h> |
3747 |
14 Aug 18 |
peter |
23 |
|
3747 |
14 Aug 18 |
peter |
24 |
#include "VCF.h" |
3747 |
14 Aug 18 |
peter |
25 |
|
4145 |
24 Feb 22 |
peter |
26 |
#include <boost/iterator/permutation_iterator.hpp> |
4145 |
24 Feb 22 |
peter |
27 |
|
3747 |
14 Aug 18 |
peter |
28 |
#include <yat/utility/Exception.h> |
3747 |
14 Aug 18 |
peter |
29 |
#include <yat/utility/getvector.h> |
3747 |
14 Aug 18 |
peter |
30 |
#include <yat/utility/OstreamIterator.h> |
3747 |
14 Aug 18 |
peter |
31 |
#include <yat/utility/split.h> |
3747 |
14 Aug 18 |
peter |
32 |
#include <yat/utility/utility.h> |
3747 |
14 Aug 18 |
peter |
33 |
|
3747 |
14 Aug 18 |
peter |
34 |
#include <algorithm> |
3747 |
14 Aug 18 |
peter |
35 |
#include <cassert> |
3747 |
14 Aug 18 |
peter |
36 |
#include <cstdio> |
3747 |
14 Aug 18 |
peter |
37 |
#include <functional> |
4145 |
24 Feb 22 |
peter |
38 |
#include <iterator> |
3955 |
23 Jul 20 |
peter |
39 |
#include <utility> |
3747 |
14 Aug 18 |
peter |
40 |
|
3747 |
14 Aug 18 |
peter |
41 |
namespace theplu { |
3747 |
14 Aug 18 |
peter |
42 |
namespace yat { |
3747 |
14 Aug 18 |
peter |
43 |
namespace omic { |
3747 |
14 Aug 18 |
peter |
44 |
|
3747 |
14 Aug 18 |
peter |
45 |
VCF::VCF(void) |
3747 |
14 Aug 18 |
peter |
46 |
{} |
3747 |
14 Aug 18 |
peter |
47 |
|
3747 |
14 Aug 18 |
peter |
48 |
|
3747 |
14 Aug 18 |
peter |
49 |
VCF::VCF(const std::string& str) |
3747 |
14 Aug 18 |
peter |
50 |
{ |
3747 |
14 Aug 18 |
peter |
51 |
utility::split(vec_, str, '\t'); |
3747 |
14 Aug 18 |
peter |
52 |
init(vec_); |
3747 |
14 Aug 18 |
peter |
53 |
} |
3747 |
14 Aug 18 |
peter |
54 |
|
3747 |
14 Aug 18 |
peter |
55 |
|
3747 |
14 Aug 18 |
peter |
56 |
VCF::VCF(std::istream& is) |
3747 |
14 Aug 18 |
peter |
57 |
{ |
3747 |
14 Aug 18 |
peter |
58 |
utility::getvector(is, vec_, '\t'); |
3747 |
14 Aug 18 |
peter |
59 |
init(vec_); |
3747 |
14 Aug 18 |
peter |
60 |
} |
3747 |
14 Aug 18 |
peter |
61 |
|
3747 |
14 Aug 18 |
peter |
62 |
|
4145 |
24 Feb 22 |
peter |
63 |
VCF::VCF(size_t n) |
4145 |
24 Feb 22 |
peter |
64 |
: vec_(9+n) |
4145 |
24 Feb 22 |
peter |
65 |
{ |
4145 |
24 Feb 22 |
peter |
66 |
vec_[1] = "0"; |
4145 |
24 Feb 22 |
peter |
67 |
init(vec_); |
4145 |
24 Feb 22 |
peter |
68 |
assert(n_samples() == n); |
4145 |
24 Feb 22 |
peter |
69 |
} |
4145 |
24 Feb 22 |
peter |
70 |
|
4145 |
24 Feb 22 |
peter |
71 |
|
3747 |
14 Aug 18 |
peter |
72 |
void VCF::add_filter(const std::string& f) |
3747 |
14 Aug 18 |
peter |
73 |
{ |
3747 |
14 Aug 18 |
peter |
74 |
if (!vec_[6].empty()) { |
3747 |
14 Aug 18 |
peter |
75 |
if (filters_.empty()) |
3747 |
14 Aug 18 |
peter |
76 |
utility::split(filters_, filter(), ';'); |
3747 |
14 Aug 18 |
peter |
77 |
vec_[6] += ';'; |
3747 |
14 Aug 18 |
peter |
78 |
} |
3747 |
14 Aug 18 |
peter |
79 |
vec_[6] += f; |
3747 |
14 Aug 18 |
peter |
80 |
filters_.push_back(f); |
3747 |
14 Aug 18 |
peter |
81 |
} |
3747 |
14 Aug 18 |
peter |
82 |
|
3747 |
14 Aug 18 |
peter |
83 |
|
3747 |
14 Aug 18 |
peter |
84 |
void VCF::alt(const std::string& a) |
3747 |
14 Aug 18 |
peter |
85 |
{ |
3747 |
14 Aug 18 |
peter |
86 |
alts_.clear(); |
3747 |
14 Aug 18 |
peter |
87 |
assert(4 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
88 |
vec_[4] = a; |
3747 |
14 Aug 18 |
peter |
89 |
} |
3747 |
14 Aug 18 |
peter |
90 |
|
3747 |
14 Aug 18 |
peter |
91 |
|
3747 |
14 Aug 18 |
peter |
92 |
void VCF::alt(char a) |
3747 |
14 Aug 18 |
peter |
93 |
{ |
3747 |
14 Aug 18 |
peter |
94 |
alts_.clear(); |
3747 |
14 Aug 18 |
peter |
95 |
assert(4 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
96 |
vec_[4].assign(1, a); |
3747 |
14 Aug 18 |
peter |
97 |
} |
3747 |
14 Aug 18 |
peter |
98 |
|
3747 |
14 Aug 18 |
peter |
99 |
|
3747 |
14 Aug 18 |
peter |
100 |
const std::string& VCF::alt(void) const |
3747 |
14 Aug 18 |
peter |
101 |
{ |
3747 |
14 Aug 18 |
peter |
102 |
assert(4 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
103 |
return vec_[4]; |
3747 |
14 Aug 18 |
peter |
104 |
} |
3747 |
14 Aug 18 |
peter |
105 |
|
3747 |
14 Aug 18 |
peter |
106 |
|
3747 |
14 Aug 18 |
peter |
107 |
const std::vector<std::string>& VCF::alts(void) const |
3747 |
14 Aug 18 |
peter |
108 |
{ |
3747 |
14 Aug 18 |
peter |
109 |
if (alts_.empty()) { |
3747 |
14 Aug 18 |
peter |
110 |
utility::split(alts_, alt(), ','); |
3747 |
14 Aug 18 |
peter |
111 |
} |
3747 |
14 Aug 18 |
peter |
112 |
return alts_; |
3747 |
14 Aug 18 |
peter |
113 |
} |
3747 |
14 Aug 18 |
peter |
114 |
|
3747 |
14 Aug 18 |
peter |
115 |
|
3747 |
14 Aug 18 |
peter |
116 |
void VCF::alts(const std::vector<std::string>& a) |
3747 |
14 Aug 18 |
peter |
117 |
{ |
3747 |
14 Aug 18 |
peter |
118 |
alts_ = a; |
3747 |
14 Aug 18 |
peter |
119 |
update_alt(); |
3747 |
14 Aug 18 |
peter |
120 |
} |
3747 |
14 Aug 18 |
peter |
121 |
|
3747 |
14 Aug 18 |
peter |
122 |
|
3747 |
14 Aug 18 |
peter |
123 |
void VCF::alts(std::vector<std::string>&& a) |
3747 |
14 Aug 18 |
peter |
124 |
{ |
3747 |
14 Aug 18 |
peter |
125 |
alts_ = std::move(a); |
3747 |
14 Aug 18 |
peter |
126 |
update_alt(); |
3747 |
14 Aug 18 |
peter |
127 |
} |
3747 |
14 Aug 18 |
peter |
128 |
|
3747 |
14 Aug 18 |
peter |
129 |
|
3747 |
14 Aug 18 |
peter |
130 |
void VCF::update_alt(void) |
3747 |
14 Aug 18 |
peter |
131 |
{ |
3747 |
14 Aug 18 |
peter |
132 |
assert(vec_.size()>4); |
3747 |
14 Aug 18 |
peter |
133 |
std::string& alt = vec_[4]; |
3747 |
14 Aug 18 |
peter |
134 |
alt = ""; |
3747 |
14 Aug 18 |
peter |
135 |
for (size_t i=0; i<alts_.size(); ++i) { |
3747 |
14 Aug 18 |
peter |
136 |
if (i) |
3747 |
14 Aug 18 |
peter |
137 |
alt += ","; |
3747 |
14 Aug 18 |
peter |
138 |
alt += alts_[i]; |
3747 |
14 Aug 18 |
peter |
139 |
} |
3747 |
14 Aug 18 |
peter |
140 |
} |
3747 |
14 Aug 18 |
peter |
141 |
|
3747 |
14 Aug 18 |
peter |
142 |
|
3747 |
14 Aug 18 |
peter |
143 |
void VCF::chromosome(const std::string& chr) |
3747 |
14 Aug 18 |
peter |
144 |
{ |
3747 |
14 Aug 18 |
peter |
145 |
assert(0 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
146 |
vec_[0] = chr; |
3747 |
14 Aug 18 |
peter |
147 |
} |
3747 |
14 Aug 18 |
peter |
148 |
|
3747 |
14 Aug 18 |
peter |
149 |
|
3747 |
14 Aug 18 |
peter |
150 |
const std::string& VCF::chromosome(void) const |
3747 |
14 Aug 18 |
peter |
151 |
{ |
3747 |
14 Aug 18 |
peter |
152 |
assert(0 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
153 |
return vec_[0]; |
3747 |
14 Aug 18 |
peter |
154 |
} |
3747 |
14 Aug 18 |
peter |
155 |
|
3747 |
14 Aug 18 |
peter |
156 |
|
3747 |
14 Aug 18 |
peter |
157 |
VCF::Data& VCF::data(void) |
3747 |
14 Aug 18 |
peter |
158 |
{ |
3747 |
14 Aug 18 |
peter |
159 |
assert(vec_.size()>7); // allow no data (8 cols only) |
3747 |
14 Aug 18 |
peter |
160 |
if (!data_.initialised()) |
3747 |
14 Aug 18 |
peter |
161 |
data_.init(vec_.begin()+8, vec_.end()); |
3747 |
14 Aug 18 |
peter |
162 |
data_.validate(); |
3747 |
14 Aug 18 |
peter |
163 |
return data_; |
3747 |
14 Aug 18 |
peter |
164 |
} |
3747 |
14 Aug 18 |
peter |
165 |
|
3747 |
14 Aug 18 |
peter |
166 |
|
3747 |
14 Aug 18 |
peter |
167 |
const VCF::Data& VCF::data(void) const |
3747 |
14 Aug 18 |
peter |
168 |
{ |
3747 |
14 Aug 18 |
peter |
169 |
assert(vec_.size()>7); // allow no data (8 cols only) |
3747 |
14 Aug 18 |
peter |
// rather than declaring member mutable we only cast here to allow |
3747 |
14 Aug 18 |
peter |
// lazy init and still allow const check elsewhere. |
3747 |
14 Aug 18 |
peter |
172 |
if (!data_.initialised()) |
3747 |
14 Aug 18 |
peter |
173 |
const_cast<Data&>(data_).init(vec_.begin()+8, vec_.end()); |
3747 |
14 Aug 18 |
peter |
174 |
data_.validate(); |
3747 |
14 Aug 18 |
peter |
175 |
return data_; |
3747 |
14 Aug 18 |
peter |
176 |
} |
3747 |
14 Aug 18 |
peter |
177 |
|
3747 |
14 Aug 18 |
peter |
178 |
|
3747 |
14 Aug 18 |
peter |
179 |
void VCF::filter(const std::string& f) |
3747 |
14 Aug 18 |
peter |
180 |
{ |
3747 |
14 Aug 18 |
peter |
181 |
assert(6 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
182 |
filters_.clear(); |
3747 |
14 Aug 18 |
peter |
183 |
vec_[6] = f; |
3747 |
14 Aug 18 |
peter |
184 |
} |
3747 |
14 Aug 18 |
peter |
185 |
|
3747 |
14 Aug 18 |
peter |
186 |
|
3747 |
14 Aug 18 |
peter |
187 |
const std::string& VCF::filter(void) const |
3747 |
14 Aug 18 |
peter |
188 |
{ |
3747 |
14 Aug 18 |
peter |
189 |
assert(6 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
190 |
return vec_[6]; |
3747 |
14 Aug 18 |
peter |
191 |
} |
3747 |
14 Aug 18 |
peter |
192 |
|
3747 |
14 Aug 18 |
peter |
193 |
|
3747 |
14 Aug 18 |
peter |
194 |
const std::vector<std::string>& VCF::filters(void) const |
3747 |
14 Aug 18 |
peter |
195 |
{ |
3747 |
14 Aug 18 |
peter |
196 |
if (filters_.empty() && !filter().empty()) |
3747 |
14 Aug 18 |
peter |
197 |
utility::split(filters_, filter(), ';'); |
3747 |
14 Aug 18 |
peter |
198 |
return filters_; |
3747 |
14 Aug 18 |
peter |
199 |
} |
3747 |
14 Aug 18 |
peter |
200 |
|
3747 |
14 Aug 18 |
peter |
201 |
|
3747 |
14 Aug 18 |
peter |
202 |
void VCF::id(const std::string& id) |
3747 |
14 Aug 18 |
peter |
203 |
{ |
3747 |
14 Aug 18 |
peter |
204 |
assert(2 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
205 |
vec_[2] = id; |
3747 |
14 Aug 18 |
peter |
206 |
} |
3747 |
14 Aug 18 |
peter |
207 |
|
3747 |
14 Aug 18 |
peter |
208 |
|
3747 |
14 Aug 18 |
peter |
209 |
const std::string& VCF::id(void) const |
3747 |
14 Aug 18 |
peter |
210 |
{ |
3747 |
14 Aug 18 |
peter |
211 |
assert(2 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
212 |
return vec_[2]; |
3747 |
14 Aug 18 |
peter |
213 |
} |
3747 |
14 Aug 18 |
peter |
214 |
|
3747 |
14 Aug 18 |
peter |
215 |
|
3747 |
14 Aug 18 |
peter |
216 |
VCF::Info& VCF::info(void) |
3747 |
14 Aug 18 |
peter |
217 |
{ |
3747 |
14 Aug 18 |
peter |
218 |
return info_; |
3747 |
14 Aug 18 |
peter |
219 |
} |
3747 |
14 Aug 18 |
peter |
220 |
|
3747 |
14 Aug 18 |
peter |
221 |
|
3747 |
14 Aug 18 |
peter |
222 |
const VCF::Info& VCF::info(void) const |
3747 |
14 Aug 18 |
peter |
223 |
{ |
3747 |
14 Aug 18 |
peter |
224 |
return info_; |
3747 |
14 Aug 18 |
peter |
225 |
} |
3747 |
14 Aug 18 |
peter |
226 |
|
3747 |
14 Aug 18 |
peter |
227 |
|
3747 |
14 Aug 18 |
peter |
228 |
void VCF::init(std::vector<std::string>& vec) |
3747 |
14 Aug 18 |
peter |
229 |
{ |
3747 |
14 Aug 18 |
peter |
230 |
assert(vec.size() > 1); |
3747 |
14 Aug 18 |
peter |
231 |
position_ = utility::convert<int32_t>(vec_[1]); |
4145 |
24 Feb 22 |
peter |
232 |
info_ = Info(std::move(vec_[7])); |
3747 |
14 Aug 18 |
peter |
233 |
validate(); |
3747 |
14 Aug 18 |
peter |
234 |
} |
3747 |
14 Aug 18 |
peter |
235 |
|
3747 |
14 Aug 18 |
peter |
236 |
|
3747 |
14 Aug 18 |
peter |
237 |
unsigned int VCF::n_alleles(void) const |
3747 |
14 Aug 18 |
peter |
238 |
{ |
3747 |
14 Aug 18 |
peter |
239 |
return std::count(alt().begin(), alt().end(), ',') + 1; |
3747 |
14 Aug 18 |
peter |
240 |
} |
3747 |
14 Aug 18 |
peter |
241 |
|
3747 |
14 Aug 18 |
peter |
242 |
|
3747 |
14 Aug 18 |
peter |
243 |
unsigned int VCF::n_samples(void) const |
3747 |
14 Aug 18 |
peter |
244 |
{ |
4145 |
24 Feb 22 |
peter |
245 |
if (data_.initialised()) |
4145 |
24 Feb 22 |
peter |
246 |
return data_.n_samples(); |
3747 |
14 Aug 18 |
peter |
247 |
assert(vec_.size() == 8 || vec_.size()>9); |
3747 |
14 Aug 18 |
peter |
248 |
if (vec_.size()>8) |
3747 |
14 Aug 18 |
peter |
249 |
return vec_.size()-9; |
3747 |
14 Aug 18 |
peter |
250 |
return 0; |
3747 |
14 Aug 18 |
peter |
251 |
} |
3747 |
14 Aug 18 |
peter |
252 |
|
3747 |
14 Aug 18 |
peter |
253 |
|
3747 |
14 Aug 18 |
peter |
254 |
void VCF::n_samples(int n) |
3747 |
14 Aug 18 |
peter |
255 |
{ |
3747 |
14 Aug 18 |
peter |
256 |
assert(!data_.initialised()); |
3747 |
14 Aug 18 |
peter |
257 |
if (n==0) { |
3747 |
14 Aug 18 |
peter |
258 |
vec_.resize(8); |
3747 |
14 Aug 18 |
peter |
259 |
return; |
3747 |
14 Aug 18 |
peter |
260 |
} |
3747 |
14 Aug 18 |
peter |
261 |
vec_.resize(9+n); |
3747 |
14 Aug 18 |
peter |
262 |
} |
3747 |
14 Aug 18 |
peter |
263 |
|
3747 |
14 Aug 18 |
peter |
264 |
|
3747 |
14 Aug 18 |
peter |
265 |
void VCF::pos(int32_t p) |
3747 |
14 Aug 18 |
peter |
266 |
{ |
3747 |
14 Aug 18 |
peter |
267 |
position_ = p; |
3747 |
14 Aug 18 |
peter |
268 |
} |
3747 |
14 Aug 18 |
peter |
269 |
|
3747 |
14 Aug 18 |
peter |
270 |
|
3747 |
14 Aug 18 |
peter |
271 |
const int32_t& VCF::pos(void) const |
3747 |
14 Aug 18 |
peter |
272 |
{ |
3747 |
14 Aug 18 |
peter |
273 |
return position_; |
3747 |
14 Aug 18 |
peter |
274 |
} |
3747 |
14 Aug 18 |
peter |
275 |
|
3747 |
14 Aug 18 |
peter |
276 |
|
3747 |
14 Aug 18 |
peter |
277 |
void VCF::print(std::ostream& os) const |
3747 |
14 Aug 18 |
peter |
278 |
{ |
3747 |
14 Aug 18 |
peter |
279 |
os << chromosome() << "\t" << pos(); |
3747 |
14 Aug 18 |
peter |
280 |
for (size_t i=2; i<7; ++i) |
3747 |
14 Aug 18 |
peter |
281 |
os << "\t" << vec_[i]; |
3747 |
14 Aug 18 |
peter |
282 |
os << "\t"; |
3747 |
14 Aug 18 |
peter |
283 |
os << info_; |
3747 |
14 Aug 18 |
peter |
284 |
|
3747 |
14 Aug 18 |
peter |
285 |
if (data_.initialised()) |
3747 |
14 Aug 18 |
peter |
286 |
os << "\t" << data_; |
3747 |
14 Aug 18 |
peter |
287 |
else { |
3747 |
14 Aug 18 |
peter |
288 |
for (size_t i=8; i<vec_.size(); ++i) |
3747 |
14 Aug 18 |
peter |
289 |
os << "\t" << vec_[i]; |
3747 |
14 Aug 18 |
peter |
290 |
} |
3747 |
14 Aug 18 |
peter |
291 |
} |
3747 |
14 Aug 18 |
peter |
292 |
|
3747 |
14 Aug 18 |
peter |
293 |
|
3747 |
14 Aug 18 |
peter |
294 |
void VCF::qual(const std::string& q) |
3747 |
14 Aug 18 |
peter |
295 |
{ |
3747 |
14 Aug 18 |
peter |
296 |
assert(5 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
297 |
vec_[5] = q; |
3747 |
14 Aug 18 |
peter |
298 |
} |
3747 |
14 Aug 18 |
peter |
299 |
|
3747 |
14 Aug 18 |
peter |
300 |
|
3747 |
14 Aug 18 |
peter |
301 |
void VCF::qual(unsigned int q) |
3747 |
14 Aug 18 |
peter |
302 |
{ |
3747 |
14 Aug 18 |
peter |
303 |
assert(5 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
304 |
vec_[5] = utility::convert(q); |
3747 |
14 Aug 18 |
peter |
305 |
} |
3747 |
14 Aug 18 |
peter |
306 |
|
3747 |
14 Aug 18 |
peter |
307 |
|
3747 |
14 Aug 18 |
peter |
308 |
const std::string& VCF::qual(void) const |
3747 |
14 Aug 18 |
peter |
309 |
{ |
3747 |
14 Aug 18 |
peter |
310 |
assert(5 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
311 |
return vec_[5]; |
3747 |
14 Aug 18 |
peter |
312 |
} |
3747 |
14 Aug 18 |
peter |
313 |
|
3747 |
14 Aug 18 |
peter |
314 |
|
3747 |
14 Aug 18 |
peter |
315 |
void VCF::ref(const std::string& r) |
3747 |
14 Aug 18 |
peter |
316 |
{ |
3747 |
14 Aug 18 |
peter |
317 |
assert(3 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
318 |
vec_[3] = r; |
3747 |
14 Aug 18 |
peter |
319 |
} |
3747 |
14 Aug 18 |
peter |
320 |
|
3747 |
14 Aug 18 |
peter |
321 |
|
3747 |
14 Aug 18 |
peter |
322 |
void VCF::ref(char r) |
3747 |
14 Aug 18 |
peter |
323 |
{ |
3747 |
14 Aug 18 |
peter |
324 |
assert(3 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
325 |
vec_[3].assign(1, r); |
3747 |
14 Aug 18 |
peter |
326 |
} |
3747 |
14 Aug 18 |
peter |
327 |
|
3747 |
14 Aug 18 |
peter |
328 |
|
3747 |
14 Aug 18 |
peter |
329 |
const std::string& VCF::ref(void) const |
3747 |
14 Aug 18 |
peter |
330 |
{ |
3747 |
14 Aug 18 |
peter |
331 |
assert(3 < vec_.size()); |
3747 |
14 Aug 18 |
peter |
332 |
return vec_[3]; |
3747 |
14 Aug 18 |
peter |
333 |
} |
3747 |
14 Aug 18 |
peter |
334 |
|
3747 |
14 Aug 18 |
peter |
335 |
|
4145 |
24 Feb 22 |
peter |
336 |
void VCF::subset(const std::vector<size_t>& index) |
4145 |
24 Feb 22 |
peter |
337 |
{ |
4145 |
24 Feb 22 |
peter |
338 |
if (data_.initialised()) |
4145 |
24 Feb 22 |
peter |
339 |
data().subset(index); |
4145 |
24 Feb 22 |
peter |
340 |
else { |
4145 |
24 Feb 22 |
peter |
341 |
std::vector<std::string> |
4145 |
24 Feb 22 |
peter |
342 |
tmp(boost::make_permutation_iterator(vec_.begin()+9,index.begin()), |
4145 |
24 Feb 22 |
peter |
343 |
boost::make_permutation_iterator(vec_.begin()+9,index.end())); |
4145 |
24 Feb 22 |
peter |
344 |
vec_.resize(9+index.size()); |
4145 |
24 Feb 22 |
peter |
345 |
std::copy(std::make_move_iterator(tmp.begin()), |
4145 |
24 Feb 22 |
peter |
346 |
std::make_move_iterator(tmp.end()), vec_.begin()+9); |
4145 |
24 Feb 22 |
peter |
347 |
} |
4145 |
24 Feb 22 |
peter |
348 |
|
4145 |
24 Feb 22 |
peter |
349 |
update_AC_AN(); |
4145 |
24 Feb 22 |
peter |
350 |
} |
4145 |
24 Feb 22 |
peter |
351 |
|
4145 |
24 Feb 22 |
peter |
352 |
|
4145 |
24 Feb 22 |
peter |
353 |
void VCF::update_AC_AN(void) |
4145 |
24 Feb 22 |
peter |
354 |
{ |
4145 |
24 Feb 22 |
peter |
// Update INFO fields AC and AN |
4145 |
24 Feb 22 |
peter |
// AC: (A) allele count in each ALT allele |
4145 |
24 Feb 22 |
peter |
// AN: (1) total number of called alleles in GT (not counting '.') |
4145 |
24 Feb 22 |
peter |
358 |
bool have_AC = info().count("AC"); |
4145 |
24 Feb 22 |
peter |
359 |
bool have_AN = info().count("AN"); |
4145 |
24 Feb 22 |
peter |
360 |
if (have_AC==false && have_AN==false) |
4145 |
24 Feb 22 |
peter |
361 |
return; |
4145 |
24 Feb 22 |
peter |
362 |
if (data().count("GT")==0) |
4145 |
24 Feb 22 |
peter |
363 |
return; |
4145 |
24 Feb 22 |
peter |
364 |
std::vector<std::string> GT; |
4145 |
24 Feb 22 |
peter |
365 |
data().get("GT", GT); |
4145 |
24 Feb 22 |
peter |
366 |
|
4145 |
24 Feb 22 |
peter |
367 |
std::vector<unsigned int> AC(n_alleles(), 0); |
4145 |
24 Feb 22 |
peter |
368 |
unsigned int AN=0; |
4145 |
24 Feb 22 |
peter |
369 |
for (const std::string& gt : GT) { |
4145 |
24 Feb 22 |
peter |
370 |
std::vector<std::string> gts; |
4145 |
24 Feb 22 |
peter |
371 |
utility::split(gts, gt, "/|"); |
4145 |
24 Feb 22 |
peter |
372 |
for (const std::string& x : gts) { |
4145 |
24 Feb 22 |
peter |
373 |
if (x != ".") { |
4145 |
24 Feb 22 |
peter |
374 |
++AN; |
4145 |
24 Feb 22 |
peter |
375 |
if (x != "0" && have_AC) { |
4145 |
24 Feb 22 |
peter |
376 |
unsigned int y = utility::convert<unsigned int>(x); |
4145 |
24 Feb 22 |
peter |
377 |
assert(y); |
4145 |
24 Feb 22 |
peter |
378 |
assert(y-1 < AC.size()); |
4145 |
24 Feb 22 |
peter |
379 |
++AC[y-1]; |
4145 |
24 Feb 22 |
peter |
380 |
} |
4145 |
24 Feb 22 |
peter |
381 |
} |
4145 |
24 Feb 22 |
peter |
382 |
} |
4145 |
24 Feb 22 |
peter |
383 |
} |
4145 |
24 Feb 22 |
peter |
384 |
|
4145 |
24 Feb 22 |
peter |
385 |
if (have_AC) |
4145 |
24 Feb 22 |
peter |
386 |
info().set("AC", AC); |
4145 |
24 Feb 22 |
peter |
387 |
if (have_AN) |
4145 |
24 Feb 22 |
peter |
388 |
info().set("AN", AN); |
4145 |
24 Feb 22 |
peter |
389 |
} |
4145 |
24 Feb 22 |
peter |
390 |
|
4145 |
24 Feb 22 |
peter |
391 |
|
3747 |
14 Aug 18 |
peter |
392 |
void VCF::validate(void) const |
3747 |
14 Aug 18 |
peter |
393 |
{ |
3747 |
14 Aug 18 |
peter |
394 |
#ifndef NDEBUG |
3747 |
14 Aug 18 |
peter |
395 |
if (vec_.size()==8) |
3747 |
14 Aug 18 |
peter |
396 |
return; |
3747 |
14 Aug 18 |
peter |
397 |
|
3747 |
14 Aug 18 |
peter |
398 |
if (vec_.size()==9) |
3747 |
14 Aug 18 |
peter |
399 |
throw utility::runtime_error("VCF: no data"); |
3747 |
14 Aug 18 |
peter |
400 |
|
3747 |
14 Aug 18 |
peter |
401 |
info_.validate(); |
3747 |
14 Aug 18 |
peter |
402 |
if (data_.initialised()) |
3747 |
14 Aug 18 |
peter |
403 |
data_.validate(); |
3747 |
14 Aug 18 |
peter |
404 |
|
3747 |
14 Aug 18 |
peter |
405 |
#endif |
3747 |
14 Aug 18 |
peter |
406 |
} |
3747 |
14 Aug 18 |
peter |
407 |
|
3747 |
14 Aug 18 |
peter |
408 |
|
3747 |
14 Aug 18 |
peter |
409 |
bool is_indel(const VCF& vcf) |
3747 |
14 Aug 18 |
peter |
410 |
{ |
3747 |
14 Aug 18 |
peter |
411 |
for (size_t i=0; i<vcf.alts().size(); ++i) |
3920 |
31 May 20 |
peter |
412 |
if (vcf.alts()[i].size() != vcf.ref().size() && |
3920 |
31 May 20 |
peter |
413 |
vcf.alts()[i][0]!='<') |
3747 |
14 Aug 18 |
peter |
414 |
return true; |
3747 |
14 Aug 18 |
peter |
415 |
return false; |
3747 |
14 Aug 18 |
peter |
416 |
} |
3747 |
14 Aug 18 |
peter |
417 |
|
3747 |
14 Aug 18 |
peter |
418 |
|
3747 |
14 Aug 18 |
peter |
419 |
bool is_snv(const VCF& vcf) |
3747 |
14 Aug 18 |
peter |
420 |
{ |
3747 |
14 Aug 18 |
peter |
421 |
if (vcf.ref().size()!=1) |
3747 |
14 Aug 18 |
peter |
422 |
return false; |
3747 |
14 Aug 18 |
peter |
423 |
|
3747 |
14 Aug 18 |
peter |
424 |
for (size_t i=0; i<vcf.alts().size(); ++i) |
3919 |
31 May 20 |
peter |
425 |
if (vcf.alts()[i].size()==1 && vcf.alts()[i]!="*") |
3747 |
14 Aug 18 |
peter |
426 |
return true; |
3747 |
14 Aug 18 |
peter |
427 |
|
3747 |
14 Aug 18 |
peter |
428 |
return false; |
3747 |
14 Aug 18 |
peter |
429 |
} |
3747 |
14 Aug 18 |
peter |
430 |
|
3747 |
14 Aug 18 |
peter |
431 |
|
3747 |
14 Aug 18 |
peter |
432 |
bool is_dnv(const VCF& vcf) |
3747 |
14 Aug 18 |
peter |
433 |
{ |
3747 |
14 Aug 18 |
peter |
434 |
if (vcf.ref().size()!=2) |
3747 |
14 Aug 18 |
peter |
435 |
return false; |
3747 |
14 Aug 18 |
peter |
436 |
|
3747 |
14 Aug 18 |
peter |
437 |
for (size_t i=0; i<vcf.alts().size(); ++i) |
3747 |
14 Aug 18 |
peter |
438 |
if (vcf.alts()[i].size()==2) |
3747 |
14 Aug 18 |
peter |
439 |
return true; |
3747 |
14 Aug 18 |
peter |
440 |
|
3747 |
14 Aug 18 |
peter |
441 |
return false; |
3747 |
14 Aug 18 |
peter |
442 |
} |
3747 |
14 Aug 18 |
peter |
443 |
|
3747 |
14 Aug 18 |
peter |
444 |
|
3747 |
14 Aug 18 |
peter |
445 |
bool is_mnv(const VCF& vcf) |
3747 |
14 Aug 18 |
peter |
446 |
{ |
3747 |
14 Aug 18 |
peter |
447 |
if (vcf.ref().size()<=2) |
3747 |
14 Aug 18 |
peter |
448 |
return false; |
3747 |
14 Aug 18 |
peter |
449 |
|
3747 |
14 Aug 18 |
peter |
450 |
for (size_t i=0; i<vcf.alts().size(); ++i) |
3920 |
31 May 20 |
peter |
451 |
if (vcf.alts()[i].size()==vcf.ref().size() && |
3920 |
31 May 20 |
peter |
452 |
vcf.alts()[i][0]!='<') |
3747 |
14 Aug 18 |
peter |
453 |
return true; |
3747 |
14 Aug 18 |
peter |
454 |
|
3747 |
14 Aug 18 |
peter |
455 |
return false; |
3747 |
14 Aug 18 |
peter |
456 |
} |
3747 |
14 Aug 18 |
peter |
457 |
|
3747 |
14 Aug 18 |
peter |
458 |
|
3747 |
14 Aug 18 |
peter |
459 |
std::istream& operator>>(std::istream& is, VCF& vcf) |
3747 |
14 Aug 18 |
peter |
460 |
{ |
3747 |
14 Aug 18 |
peter |
461 |
if (is && is.peek() != EOF) |
3747 |
14 Aug 18 |
peter |
462 |
vcf = VCF(is); |
3747 |
14 Aug 18 |
peter |
463 |
else |
3747 |
14 Aug 18 |
peter |
464 |
is.get(); // read the last character to transform status into bad |
3747 |
14 Aug 18 |
peter |
465 |
return is; |
3747 |
14 Aug 18 |
peter |
466 |
} |
3747 |
14 Aug 18 |
peter |
467 |
|
3747 |
14 Aug 18 |
peter |
468 |
|
3747 |
14 Aug 18 |
peter |
469 |
std::ostream& operator<<(std::ostream& os, const VCF& vcf) |
3747 |
14 Aug 18 |
peter |
470 |
{ |
3747 |
14 Aug 18 |
peter |
471 |
vcf.print(os); |
3747 |
14 Aug 18 |
peter |
472 |
return os; |
3747 |
14 Aug 18 |
peter |
473 |
} |
3747 |
14 Aug 18 |
peter |
474 |
|
3747 |
14 Aug 18 |
peter |
475 |
|
3747 |
14 Aug 18 |
peter |
// VCF::Info |
4145 |
24 Feb 22 |
peter |
477 |
VCF::Info::Info(std::string&& str) |
4145 |
24 Feb 22 |
peter |
478 |
: str_(std::move(str)) |
3747 |
14 Aug 18 |
peter |
479 |
{ |
4145 |
24 Feb 22 |
peter |
480 |
str = ""; |
3747 |
14 Aug 18 |
peter |
481 |
} |
3747 |
14 Aug 18 |
peter |
482 |
|
3747 |
14 Aug 18 |
peter |
483 |
|
4145 |
24 Feb 22 |
peter |
484 |
void VCF::Info::add(const std::string& key) |
3747 |
14 Aug 18 |
peter |
485 |
{ |
4145 |
24 Feb 22 |
peter |
486 |
index(); |
4145 |
24 Feb 22 |
peter |
487 |
str_ = ""; |
4145 |
24 Feb 22 |
peter |
488 |
keys_.push_back(key); |
4145 |
24 Feb 22 |
peter |
489 |
map_[key]; |
3747 |
14 Aug 18 |
peter |
490 |
} |
3747 |
14 Aug 18 |
peter |
491 |
|
3747 |
14 Aug 18 |
peter |
492 |
|
4145 |
24 Feb 22 |
peter |
493 |
void VCF::Info::clear(void) |
3790 |
04 Apr 19 |
peter |
494 |
{ |
4145 |
24 Feb 22 |
peter |
495 |
str_.clear(); |
4145 |
24 Feb 22 |
peter |
496 |
map_.clear(); |
3790 |
04 Apr 19 |
peter |
497 |
} |
3790 |
04 Apr 19 |
peter |
498 |
|
3790 |
04 Apr 19 |
peter |
499 |
|
3790 |
04 Apr 19 |
peter |
500 |
size_t VCF::Info::count(const std::string& key) const |
3790 |
04 Apr 19 |
peter |
501 |
{ |
4145 |
24 Feb 22 |
peter |
502 |
index(); |
4145 |
24 Feb 22 |
peter |
503 |
return map_.count(key); |
3790 |
04 Apr 19 |
peter |
504 |
} |
3790 |
04 Apr 19 |
peter |
505 |
|
3790 |
04 Apr 19 |
peter |
506 |
|
4145 |
24 Feb 22 |
peter |
507 |
bool VCF::Info::indexed(void) const |
3790 |
04 Apr 19 |
peter |
508 |
{ |
4145 |
24 Feb 22 |
peter |
509 |
return !map_.empty(); |
3790 |
04 Apr 19 |
peter |
510 |
} |
3790 |
04 Apr 19 |
peter |
511 |
|
3790 |
04 Apr 19 |
peter |
512 |
|
4145 |
24 Feb 22 |
peter |
513 |
void VCF::Info::index(void) const |
3747 |
14 Aug 18 |
peter |
514 |
{ |
4145 |
24 Feb 22 |
peter |
515 |
if (indexed()) |
4145 |
24 Feb 22 |
peter |
516 |
return; |
4145 |
24 Feb 22 |
peter |
517 |
if (str_.empty()) |
4145 |
24 Feb 22 |
peter |
518 |
return; |
4145 |
24 Feb 22 |
peter |
519 |
std::map<std::string, std::string>& map = |
4145 |
24 Feb 22 |
peter |
520 |
const_cast<std::map<std::string, std::string>&>(map_); |
4145 |
24 Feb 22 |
peter |
521 |
std::vector<std::string>& keys = |
4145 |
24 Feb 22 |
peter |
522 |
const_cast<std::vector<std::string>&>(keys_); |
3747 |
14 Aug 18 |
peter |
523 |
|
4145 |
24 Feb 22 |
peter |
524 |
auto begin = str_.begin(); |
4145 |
24 Feb 22 |
peter |
525 |
auto end = begin; |
4145 |
24 Feb 22 |
peter |
526 |
for (; end!=str_.end(); begin = end+1) { |
4145 |
24 Feb 22 |
peter |
527 |
end = std::find(begin, str_.end(), ';'); |
4145 |
24 Feb 22 |
peter |
528 |
auto middle = std::find(begin, end, '='); |
4145 |
24 Feb 22 |
peter |
529 |
std::string key(begin, middle); |
4145 |
24 Feb 22 |
peter |
530 |
if (middle == end) |
4145 |
24 Feb 22 |
peter |
531 |
map[key]; |
4145 |
24 Feb 22 |
peter |
532 |
else |
4145 |
24 Feb 22 |
peter |
533 |
map[key] = std::string(middle+1, end); |
4145 |
24 Feb 22 |
peter |
534 |
keys.push_back(std::move(key)); |
4145 |
24 Feb 22 |
peter |
535 |
} |
3747 |
14 Aug 18 |
peter |
536 |
} |
3747 |
14 Aug 18 |
peter |
537 |
|
3747 |
14 Aug 18 |
peter |
538 |
|
3747 |
14 Aug 18 |
peter |
539 |
void VCF::Info::remove(const std::string& key) |
3747 |
14 Aug 18 |
peter |
540 |
{ |
4145 |
24 Feb 22 |
peter |
541 |
index(); |
4145 |
24 Feb 22 |
peter |
542 |
str_ = ""; |
4145 |
24 Feb 22 |
peter |
543 |
map_.erase(key); |
4145 |
24 Feb 22 |
peter |
544 |
auto it = std::find(keys_.begin(), keys_.end(), key); |
4145 |
24 Feb 22 |
peter |
545 |
if (it != keys_.end()) |
4145 |
24 Feb 22 |
peter |
546 |
keys_.erase(it); |
3747 |
14 Aug 18 |
peter |
547 |
} |
3747 |
14 Aug 18 |
peter |
548 |
|
3747 |
14 Aug 18 |
peter |
549 |
|
3747 |
14 Aug 18 |
peter |
550 |
void VCF::Info::set(const std::string& s) |
3747 |
14 Aug 18 |
peter |
551 |
{ |
4145 |
24 Feb 22 |
peter |
552 |
clear(); |
3747 |
14 Aug 18 |
peter |
553 |
str_ = s; |
3747 |
14 Aug 18 |
peter |
554 |
assert(str_ == s); |
3747 |
14 Aug 18 |
peter |
555 |
} |
3747 |
14 Aug 18 |
peter |
556 |
|
3747 |
14 Aug 18 |
peter |
557 |
|
3747 |
14 Aug 18 |
peter |
558 |
void VCF::Info::set(std::string&& s) |
3747 |
14 Aug 18 |
peter |
559 |
{ |
4145 |
24 Feb 22 |
peter |
560 |
clear(); |
3747 |
14 Aug 18 |
peter |
561 |
str_ = std::move(s); |
3747 |
14 Aug 18 |
peter |
562 |
} |
3747 |
14 Aug 18 |
peter |
563 |
|
3790 |
04 Apr 19 |
peter |
564 |
|
3747 |
14 Aug 18 |
peter |
565 |
const std::string& VCF::Info::str(void) const |
3747 |
14 Aug 18 |
peter |
566 |
{ |
4145 |
24 Feb 22 |
peter |
567 |
if (str_.empty()) { |
4146 |
25 Feb 22 |
peter |
568 |
std::ostringstream os; |
4146 |
25 Feb 22 |
peter |
569 |
os << *this; |
4145 |
24 Feb 22 |
peter |
570 |
std::string& str = const_cast<std::string&>(str_); |
4146 |
25 Feb 22 |
peter |
571 |
str = os.str(); |
4145 |
24 Feb 22 |
peter |
572 |
} |
3747 |
14 Aug 18 |
peter |
573 |
return str_; |
3747 |
14 Aug 18 |
peter |
574 |
} |
3747 |
14 Aug 18 |
peter |
575 |
|
3747 |
14 Aug 18 |
peter |
576 |
|
3747 |
14 Aug 18 |
peter |
577 |
void VCF::Info::validate(void) const |
3747 |
14 Aug 18 |
peter |
578 |
{ |
3747 |
14 Aug 18 |
peter |
579 |
#ifndef NDEBUG |
3747 |
14 Aug 18 |
peter |
580 |
#endif |
3747 |
14 Aug 18 |
peter |
581 |
} |
3747 |
14 Aug 18 |
peter |
582 |
|
3747 |
14 Aug 18 |
peter |
583 |
|
3747 |
14 Aug 18 |
peter |
584 |
std::ostream& operator<<(std::ostream& os, const VCF::Info& info) |
3747 |
14 Aug 18 |
peter |
585 |
{ |
4146 |
25 Feb 22 |
peter |
586 |
if (!info.indexed()) { |
4146 |
25 Feb 22 |
peter |
587 |
os << info.str_; |
4146 |
25 Feb 22 |
peter |
588 |
} |
4146 |
25 Feb 22 |
peter |
589 |
else { |
4146 |
25 Feb 22 |
peter |
590 |
for (auto key = info.keys_.begin(); key!=info.keys_.end(); ++key) { |
4146 |
25 Feb 22 |
peter |
591 |
if (key != info.keys_.begin()) |
4146 |
25 Feb 22 |
peter |
592 |
os << ';'; |
4146 |
25 Feb 22 |
peter |
593 |
os << *key; |
4146 |
25 Feb 22 |
peter |
// FIXME, to avoid this logN look-up, do not store keys as |
4146 |
25 Feb 22 |
peter |
// values, but as iterators/pointers to map elements. |
4146 |
25 Feb 22 |
peter |
596 |
auto it = info.map_.find(*key); |
4146 |
25 Feb 22 |
peter |
597 |
assert(it != info.map_.end()); |
4146 |
25 Feb 22 |
peter |
598 |
if (it->second != "") |
4146 |
25 Feb 22 |
peter |
599 |
os << "=" << it->second; |
4146 |
25 Feb 22 |
peter |
600 |
} |
4146 |
25 Feb 22 |
peter |
601 |
} |
3747 |
14 Aug 18 |
peter |
602 |
return os; |
3747 |
14 Aug 18 |
peter |
603 |
} |
3747 |
14 Aug 18 |
peter |
604 |
|
3747 |
14 Aug 18 |
peter |
605 |
|
3747 |
14 Aug 18 |
peter |
// VCF::Data |
3747 |
14 Aug 18 |
peter |
607 |
VCF::Data::Data(void) |
3747 |
14 Aug 18 |
peter |
608 |
: initialised_(false) |
3747 |
14 Aug 18 |
peter |
609 |
{} |
3747 |
14 Aug 18 |
peter |
610 |
|
3747 |
14 Aug 18 |
peter |
611 |
|
4145 |
24 Feb 22 |
peter |
612 |
VCF::Data::Data(size_t n) |
4145 |
24 Feb 22 |
peter |
613 |
: initialised_(false) |
4145 |
24 Feb 22 |
peter |
614 |
{ |
4145 |
24 Feb 22 |
peter |
615 |
std::vector<std::string> tmp(n+1); |
4145 |
24 Feb 22 |
peter |
616 |
init(tmp.begin(), tmp.end()); |
4145 |
24 Feb 22 |
peter |
617 |
} |
4145 |
24 Feb 22 |
peter |
618 |
|
4145 |
24 Feb 22 |
peter |
619 |
|
3747 |
14 Aug 18 |
peter |
620 |
void VCF::Data::clear(void) |
3747 |
14 Aug 18 |
peter |
621 |
{ |
3747 |
14 Aug 18 |
peter |
// set format to empty string |
3747 |
14 Aug 18 |
peter |
623 |
format_.clear(); |
3747 |
14 Aug 18 |
peter |
// clear each element in data_ (to empty vector) |
3747 |
14 Aug 18 |
peter |
625 |
std::for_each(data_.begin(), data_.end(), |
4252 |
18 Nov 22 |
peter |
626 |
std::mem_fn(&std::vector<std::string>::clear)); |
3747 |
14 Aug 18 |
peter |
627 |
} |
3747 |
14 Aug 18 |
peter |
628 |
|
3747 |
14 Aug 18 |
peter |
629 |
|
3790 |
04 Apr 19 |
peter |
630 |
size_t VCF::Data::count(const std::string& key) const |
3747 |
14 Aug 18 |
peter |
631 |
{ |
3790 |
04 Apr 19 |
peter |
632 |
if (std::find(format_.begin(), format_.end(), key) != format_.end()) |
3790 |
04 Apr 19 |
peter |
633 |
return 1; |
3790 |
04 Apr 19 |
peter |
634 |
return 0; |
3747 |
14 Aug 18 |
peter |
635 |
} |
3747 |
14 Aug 18 |
peter |
636 |
|
3747 |
14 Aug 18 |
peter |
637 |
|
3747 |
14 Aug 18 |
peter |
638 |
const std::vector<std::string>& VCF::Data::format(void) const |
3747 |
14 Aug 18 |
peter |
639 |
{ |
3747 |
14 Aug 18 |
peter |
640 |
return format_; |
3747 |
14 Aug 18 |
peter |
641 |
} |
3747 |
14 Aug 18 |
peter |
642 |
|
3747 |
14 Aug 18 |
peter |
643 |
|
3747 |
14 Aug 18 |
peter |
644 |
size_t VCF::Data::index(const std::string& key) const |
3747 |
14 Aug 18 |
peter |
645 |
{ |
3747 |
14 Aug 18 |
peter |
646 |
std::vector<std::string>::const_iterator it = |
3747 |
14 Aug 18 |
peter |
647 |
std::find(format_.begin(), format_.end(), key); |
3747 |
14 Aug 18 |
peter |
648 |
if (it == format_.end()) { |
3747 |
14 Aug 18 |
peter |
649 |
std::ostringstream msg; |
3747 |
14 Aug 18 |
peter |
650 |
msg << "VCF::Data::get: unknown key: " << key; |
3747 |
14 Aug 18 |
peter |
651 |
throw utility::runtime_error(msg.str()); |
3747 |
14 Aug 18 |
peter |
652 |
} |
3747 |
14 Aug 18 |
peter |
653 |
|
3747 |
14 Aug 18 |
peter |
654 |
return it - format_.begin(); |
3747 |
14 Aug 18 |
peter |
655 |
} |
3747 |
14 Aug 18 |
peter |
656 |
|
3747 |
14 Aug 18 |
peter |
657 |
|
3747 |
14 Aug 18 |
peter |
658 |
bool VCF::Data::initialised(void) const |
3747 |
14 Aug 18 |
peter |
659 |
{ |
3747 |
14 Aug 18 |
peter |
660 |
return initialised_; |
3747 |
14 Aug 18 |
peter |
661 |
} |
3747 |
14 Aug 18 |
peter |
662 |
|
3747 |
14 Aug 18 |
peter |
663 |
|
3747 |
14 Aug 18 |
peter |
664 |
void VCF::Data::init(std::vector<std::string>::const_iterator it, |
3747 |
14 Aug 18 |
peter |
665 |
std::vector<std::string>::const_iterator end) |
3747 |
14 Aug 18 |
peter |
666 |
{ |
3747 |
14 Aug 18 |
peter |
// only call init once |
3747 |
14 Aug 18 |
peter |
668 |
assert(!initialised()); |
3747 |
14 Aug 18 |
peter |
669 |
if (it==end) { |
3747 |
14 Aug 18 |
peter |
670 |
initialised_ = true; |
3747 |
14 Aug 18 |
peter |
671 |
return; |
3747 |
14 Aug 18 |
peter |
672 |
} |
3747 |
14 Aug 18 |
peter |
673 |
format_.clear(); |
3747 |
14 Aug 18 |
peter |
674 |
if (!it->empty()) |
3747 |
14 Aug 18 |
peter |
675 |
utility::split(format_, *it, ':'); |
3747 |
14 Aug 18 |
peter |
676 |
++it; |
3747 |
14 Aug 18 |
peter |
677 |
size_t n = end - it; |
3747 |
14 Aug 18 |
peter |
678 |
data_.resize(n); |
3747 |
14 Aug 18 |
peter |
679 |
for (size_t i=0; i<n; ++i) { |
3747 |
14 Aug 18 |
peter |
680 |
data_[i].clear(); |
3747 |
14 Aug 18 |
peter |
681 |
if (!it[i].empty()) |
3747 |
14 Aug 18 |
peter |
682 |
utility::split(data_[i], it[i], ':'); |
3747 |
14 Aug 18 |
peter |
683 |
} |
3747 |
14 Aug 18 |
peter |
684 |
initialised_ = true; |
3747 |
14 Aug 18 |
peter |
685 |
validate(); |
3747 |
14 Aug 18 |
peter |
686 |
} |
3747 |
14 Aug 18 |
peter |
687 |
|
3747 |
14 Aug 18 |
peter |
688 |
|
4145 |
24 Feb 22 |
peter |
689 |
size_t VCF::Data::n_samples(void) const |
4145 |
24 Feb 22 |
peter |
690 |
{ |
4145 |
24 Feb 22 |
peter |
691 |
return data_.size(); |
4145 |
24 Feb 22 |
peter |
692 |
} |
4145 |
24 Feb 22 |
peter |
693 |
|
4145 |
24 Feb 22 |
peter |
694 |
|
4145 |
24 Feb 22 |
peter |
695 |
void VCF::Data::subset(const std::vector<size_t>& index) |
4145 |
24 Feb 22 |
peter |
696 |
{ |
4145 |
24 Feb 22 |
peter |
697 |
std::vector<std::vector<std::string>> |
4145 |
24 Feb 22 |
peter |
698 |
tmp(boost::make_permutation_iterator(data_.begin(), index.begin()), |
4145 |
24 Feb 22 |
peter |
699 |
boost::make_permutation_iterator(data_.begin(), index.end())); |
4145 |
24 Feb 22 |
peter |
700 |
data_ = std::move(tmp); |
4145 |
24 Feb 22 |
peter |
701 |
} |
4145 |
24 Feb 22 |
peter |
702 |
|
4145 |
24 Feb 22 |
peter |
703 |
|
3747 |
14 Aug 18 |
peter |
704 |
void VCF::Data::validate(void) const |
3747 |
14 Aug 18 |
peter |
705 |
{ |
3747 |
14 Aug 18 |
peter |
706 |
#ifndef NDEBUG |
3747 |
14 Aug 18 |
peter |
707 |
assert(initialised()); |
3747 |
14 Aug 18 |
peter |
708 |
for (size_t i=0; i<format_.size(); ++i) { |
3747 |
14 Aug 18 |
peter |
709 |
if (format_[i].empty()) { |
3747 |
14 Aug 18 |
peter |
710 |
std::ostringstream msg; |
3747 |
14 Aug 18 |
peter |
711 |
msg << "VCF: empty format element in '"; |
3747 |
14 Aug 18 |
peter |
712 |
for (size_t j=0; j<format_.size(); ++j) { |
3747 |
14 Aug 18 |
peter |
713 |
if (j) |
3747 |
14 Aug 18 |
peter |
714 |
msg << ':'; |
3747 |
14 Aug 18 |
peter |
715 |
msg << format_[j]; |
3747 |
14 Aug 18 |
peter |
716 |
} |
3747 |
14 Aug 18 |
peter |
717 |
msg << "'"; |
3747 |
14 Aug 18 |
peter |
718 |
throw utility::runtime_error(msg.str()); |
3747 |
14 Aug 18 |
peter |
719 |
} |
3747 |
14 Aug 18 |
peter |
720 |
} |
3747 |
14 Aug 18 |
peter |
721 |
|
3747 |
14 Aug 18 |
peter |
// check that each DATA element has same size as FORMAT |
3747 |
14 Aug 18 |
peter |
723 |
for (size_t sample=0; sample<data_.size(); ++sample) { |
3747 |
14 Aug 18 |
peter |
724 |
const std::vector<std::string>& vec = data_[sample]; |
3747 |
14 Aug 18 |
peter |
725 |
if (vec.size() != format_.size()) { |
3747 |
14 Aug 18 |
peter |
726 |
std::ostringstream msg; |
3747 |
14 Aug 18 |
peter |
727 |
msg << "wrong size: "; |
3747 |
14 Aug 18 |
peter |
728 |
for (size_t i=0; i<vec.size(); ++i) { |
3747 |
14 Aug 18 |
peter |
729 |
if (i) |
3747 |
14 Aug 18 |
peter |
730 |
msg << ":"; |
3747 |
14 Aug 18 |
peter |
731 |
msg << vec[i]; |
3747 |
14 Aug 18 |
peter |
732 |
} |
3747 |
14 Aug 18 |
peter |
733 |
msg << "\n"; |
3747 |
14 Aug 18 |
peter |
734 |
msg << "expected " << format_.size() << " fields\n"; |
3747 |
14 Aug 18 |
peter |
735 |
throw utility::runtime_error(msg.str()); |
3747 |
14 Aug 18 |
peter |
736 |
} |
3747 |
14 Aug 18 |
peter |
737 |
} |
3747 |
14 Aug 18 |
peter |
738 |
#endif |
3747 |
14 Aug 18 |
peter |
739 |
} |
3747 |
14 Aug 18 |
peter |
740 |
|
3747 |
14 Aug 18 |
peter |
741 |
|
3747 |
14 Aug 18 |
peter |
742 |
std::ostream& operator<<(std::ostream& os, const VCF::Data& data) |
3747 |
14 Aug 18 |
peter |
743 |
{ |
3747 |
14 Aug 18 |
peter |
744 |
if (data.format().empty()) |
3747 |
14 Aug 18 |
peter |
745 |
return os; |
3747 |
14 Aug 18 |
peter |
746 |
std::copy(data.format().begin(), data.format().end(), |
3747 |
14 Aug 18 |
peter |
747 |
utility::OstreamIterator<std::string>(os, ":")); |
3747 |
14 Aug 18 |
peter |
748 |
|
3747 |
14 Aug 18 |
peter |
749 |
for (size_t i=0; i<data.data_.size(); ++i) { |
3747 |
14 Aug 18 |
peter |
750 |
os << "\t"; |
3747 |
14 Aug 18 |
peter |
751 |
std::copy(data.data_[i].begin(), data.data_[i].end(), |
3747 |
14 Aug 18 |
peter |
752 |
utility::OstreamIterator<std::string>(os, ":")); |
3747 |
14 Aug 18 |
peter |
753 |
} |
3747 |
14 Aug 18 |
peter |
754 |
return os; |
3747 |
14 Aug 18 |
peter |
755 |
} |
3747 |
14 Aug 18 |
peter |
756 |
|
3747 |
14 Aug 18 |
peter |
757 |
}}} |