yat  0.16.4pre
VCF.h
1 #ifndef theplu_yat_omic_vcf
2 #define theplu_yat_omic_vcf
3 
4 // $Id: VCF.h 3807 2019-07-04 22:37:01Z peter $
5 
6 /*
7  Copyright (C) 2018, 2019 Peter Johansson
8 
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10 
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15 
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23 */
24 
26 
27 #include "yat/utility/Exception.h"
28 #include "yat/utility/split.h"
29 #include "yat/utility/utility.h"
30 #include "yat/utility/yat_assert.h"
31 
32 #include <boost/cstdint.hpp>
33 
34 #include <algorithm>
35 #include <vector>
36 #include <string>
37 
38 namespace theplu {
39 namespace yat {
40 namespace omic {
41 
48  class VCF
49  {
50  public:
54  VCF(void);
55 
59  explicit VCF(const std::string&);
60 
69  explicit VCF(std::istream& is);
70 
74  void chromosome(const std::string& chr);
75 
79  const std::string& chromosome(void) const;
80 
84  void pos(int32_t p);
85 
89  const int32_t& pos(void) const;
90 
96  void id(const std::string& id);
97 
101  const std::string& id(void) const;
102 
106  void ref(const std::string& r);
107 
111  void ref(char r);
112 
116  const std::string& ref(void) const;
117 
123  void alt(const std::string& a);
124 
130  void alt(char a);
131 
135  const std::string& alt(void) const;
136 
140  const std::vector<std::string>& alts(void) const;
141 
145  void alts(const std::vector<std::string>& a);
146 
147 #ifdef YAT_HAVE_RVALUE
148 
151  void alts(std::vector<std::string>&& a);
152 #endif
153 
157  unsigned int n_alleles(void) const;
158 
162  const std::string& qual(void) const;
163 
167  void qual(const std::string& q);
168 
172  void qual(unsigned int q);
173 
177  const std::string& filter(void) const;
178 
182  const std::vector<std::string>& filters(void) const;
183 
187  void add_filter(const std::string&);
188 
192  void filter(const std::string&);
193 
197  class Info
198  {
199  public:
203  void add(const std::string& key);
204 
209  template<typename T>
210  void add(const std::string& key, const T& value);
211 
215  void clear(void);
216 
220  size_t count(const std::string& key) const;
221 
227  template<typename T>
228  void get(T& result, const std::string& key) const;
229 
233  void remove(const std::string& key);
234 
238  void set(const std::string& s);
239 
240 #ifdef YAT_HAVE_RVALUE
241 
244  void set(std::string&& s);
245 #endif
246 
250  const std::string& str(void) const;
251 
252  private:
253  friend std::ostream& operator<<(std::ostream&, const VCF::Info&);
254  friend class VCF;
255 
256  // find range [begin, end) such that
257  // 1) [begin, end) == key
258  // 2) begin is equal to str_.begin() or begin[-1] is ';'
259  // 3) end is equal to str_.end() or end[0] == '=' or end[0] == ';'
260  std::string::iterator find(const std::string& key);
261  // same as find (above) but returns a const_iterator
262  std::string::const_iterator cfind(const std::string& key) const;
263 
264  void init(std::string& str);
265 #ifdef YAT_HAVE_RVALUE
266  void init(std::string&& str);
267 #endif
268 
269  // For now we store data as raw string (as it's stored in
270  // file). This has no overhead and is thus the fastest in case
271  // when the info field is not accessed nor modified. For the
272  // case when heavily accessed and modified, it might be
273  // preferable to also store data in a map<string, string> with a
274  // lazy implementation switching between storages depending on
275  // usage (i.e. avoid buoilding the map until needed).
276  std::string str_;
277 
278  // only call if initialised
279  void validate(void) const;
280  }; // end class Info
281 
282 
286  VCF::Info& info(void);
287 
291  const VCF::Info& info(void) const;
292 
296  class Data
297  {
298  public:
300  Data(void);
301 
303  template<typename T>
304  void add(const std::string& key, const std::vector<T>& data);
305 
310  void clear(void);
311 
315  size_t count(const std::string& key) const;
316 
322  const std::vector<std::string>& format(void) const;
323 
325  template<typename T>
326  void get(const std::string& key, std::vector<T>& data) const;
327 
328  private:
329  friend std::ostream& operator<<(std::ostream&, const VCF::Data&);
330  friend class VCF;
331 
332  // return true if class has been initialised
333  bool initialised(void) const;
334  void init(std::vector<std::string>::const_iterator,
335  std::vector<std::string>::const_iterator);
336  size_t index(const std::string& key) const;
337  bool initialised_;
338  std::vector<std::string> format_;
339  std::vector<std::vector<std::string> > data_;
340 
341  // only call if initialised
342  void validate(void) const;
343  }; // end class Data
344 
348  const Data& data(void) const;
349 
353  Data& data(void);
354 
358  unsigned int n_samples(void) const;
359 
363  void n_samples(int n);
364 
365  private:
366  friend std::ostream& operator<<(std::ostream&, const VCF&);
367  std::vector<std::string> vec_;
368  int32_t position_;
369 
370  Info info_;
371  Data data_;
372 
373  void init(std::vector<std::string>& vec);
374  // update vec_[4] from alts_
375  void update_alt(void);
376  void print(std::ostream& os) const;
377 
378  // if NDEBUG no-op
379  // otherwise throw if not valid
380  void validate(void) const;
381 
382  // private lazy variables
383  mutable std::vector<std::string> alts_;
384  mutable std::vector<std::string> filters_;
385 
386  // using compiler generated copy
387  // VCF(const VCF&)
388  // VCF& operator=(const VCF&)
389  // using compiler generated move
390  // VCF(VCF&&)
391  // VCF& operator=(VCF&&)
392  };
393 
400  std::ostream& operator<<(std::ostream& os, const VCF& vcf);
401 
412  std::istream& operator>>(std::istream& is, VCF& vcf);
413 
420  std::ostream& operator<<(std::ostream& os, const VCF::Info& info);
421 
428  std::ostream& operator<<(std::ostream& os, const VCF::Data& data);
429 
434  bool is_indel(const VCF&);
435 
441  bool is_snv(const VCF&);
442 
448  bool is_dnv(const VCF&);
449 
455  bool is_mnv(const VCF&);
456 
457 
458  // We would prefer to hide this privately with VCF, but explicit
459  // template specialization must be in namespace scope.
460  //
462  namespace detail {
463 
464  // class that append a string from T; T might be string, numerical
465  // or a vector of above.
466  template <typename T>
467  struct Appender
468  {
469  Appender(char delim) : delim_(delim) {}
470  void operator()(std::string& val, const T& x) const
471  {
472  val += utility::convert(x);
473  }
474  private:
475  char delim_;
476  };
477 
478 
479  template <>
480  struct Appender<std::string>
481  {
482  Appender(char delim) : delim_(delim) {}
483  void operator()(std::string& val, const std::string& x) const
484  {
485  val += x;
486  }
487  private:
488  char delim_;
489  };
490 
491 
492  template <>
493  struct Appender<std::vector<std::string> >
494  {
495  Appender(char delim) : delim_(delim) {}
496  void operator()(std::string& val,const std::vector<std::string>& x) const
497  {
498  for (size_t i=0; i<x.size(); ++i) {
499  if (i)
500  val += delim_;
501  val += x[i];
502  }
503  }
504  private:
505  char delim_;
506  };
507 
508 
509  template <typename T>
510  struct Appender<std::vector<T> >
511  {
512  Appender(char delim) : delim_(delim) {}
513  void operator()(std::string& val, const std::vector<T>& x) const
514  {
515  for (size_t i=0; i<x.size(); ++i) {
516  if (i)
517  val += delim_;
518  val += utility::convert(x[i]);
519  }
520  }
521  private:
522  char delim_;
523  };
524 
525 
526  // Class that converts a string to T
527  template<typename T>
528  struct Converter
529  {
530  void operator()(const std::string& x, T& result) const
531  {
532  result = utility::convert<T>(x);
533  }
534  };
535 
536 
537  template<>
538  struct Converter<std::string>
539  {
540  void operator()(const std::string& x, std::string& result) const
541  {
542  result = x;
543  }
544  };
545 
546 
547  template<typename T>
548  struct Converter<std::vector<T> >
549  {
550  void operator()(const std::string& x, std::vector<T>& result) const
551  {
552  std::vector<std::string> vec;
553  utility::split(vec, x, ',');
554  result.clear();
555  result.reserve(vec.size());
556  for (size_t i=0; i<vec.size(); ++i)
557  result.push_back(utility::convert<T>(vec[i]));
558  }
559  };
560 
561 
562  template<>
563  struct Converter<std::vector<std::string> >
564  {
565  void
566  operator()(const std::string& x, std::vector<std::string>& result) const
567  {
568  result.clear();
569  utility::split(result, x, ',');
570  }
571  };
572 
573  }
574 
576 
577 
578  // template implementations
579 
580  // VCF::Info
581 
582  template<typename T>
583  void VCF::Info::add(const std::string& key, const T& value)
584  {
585  add(key);
586  str_ += '=';
587  detail::Appender<T> append(',');
588  append(str_, value);
589  }
590 
591 
592  template<typename T>
593  void VCF::Info::get(T& result, const std::string& key) const
594  {
595  detail::Converter<T> converter;
596  std::string value;
597  std::string k = key + "=";
598  size_t begin = str_.find(k);
599 
600  if (begin == std::string::npos) {
601  converter(value, result);
602  return;
603  }
604 
605  if (begin && str_[begin-1]!=';') {
606  k = ";" + key + "=";
607  begin = str_.find(k, begin+key.size());
608  if (begin == std::string::npos) {
609  converter(value, result);
610  return;
611  }
612  }
613 
614  begin += k.size();
615  int end = str_.find(";", begin);
616  value = str_.substr(begin, end-begin);
617  converter(value, result);
618  }
619 
620 
621  // VCF::Data
622 
623  template<typename T>
624  void VCF::Data::add(const std::string& key, const std::vector<T>& input)
625  {
626  YAT_ASSERT(key != "");
627  format_.push_back(key);
628  detail::Appender<T> append(',');
629  for (size_t i=0; i<input.size(); ++i) {
630  data_[i].push_back("");
631  append(data_[i].back(), input[i]);
632  }
633  }
634 
635 
636  template<typename T>
637  void VCF::Data::get(const std::string& key, std::vector<T>& data) const
638  {
639  size_t idx = index(key);
640  detail::Converter<T> converter;
641  data.resize(data_.size());
642  for (size_t i=0; i<data.size(); ++i)
643  converter(data_[i][idx], data[i]);
644  }
645 
646 
647  /*
648  template<typename T>
649  void VCF::add_data(const std::string& key, const std::vector<T>& val)
650  {
651  if (format() != "")
652  format() += ":";
653  format() += key;
654 
655 
656  YAT_ASSERT(val.size()+9 == vec_.size());
657  for (size_t i=0; i<val.size(); ++i) {
658  std::string& data = vec_[i+9];
659  if (data != "")
660  data += ":";
661  append(data, val[i]);
662  }
663  }
664  */
665 }}}
666 #endif
const std::string & ref(void) const
const Data & data(void) const
access data
bool is(const std::string &s)
check if string is convertible to (numerical) type T
Definition: utility.h:541
const std::vector< std::string > & alts(void) const
Definition: VCF.h:48
const std::string & chromosome(void) const
void get(const std::string &key, std::vector< T > &data) const
T is string, numeric or vector string or numeric.
Definition: VCF.h:637
The Department of Theoretical Physics namespace as we define it.
Some useful functions are placed here.
void add(const std::string &key)
bool is_snv(const VCF &)
void get(T &result, const std::string &key) const
Definition: VCF.h:593
VCF(void)
default constructor
size_t count(const std::string &key) const
const std::string & alt(void) const
unsigned int n_alleles(void) const
const int32_t & pos(void) const
std::istream & operator>>(std::istream &is, VCF &vcf)
input operator for VCF
VCF::Info & info(void)
set info
class holiding data stored in FORMAT and data fields
Definition: VCF.h:296
const std::string & str(void) const
const std::vector< std::string > & filters(void) const
void split(std::vector< std::string > &result, const std::string &str, char delim)
split a string into several substrings
const std::string & qual(void) const
phred-scaled probability that all samples are wildtype
class handling the INFO field in an VCF entry
Definition: VCF.h:197
const std::string & filter(void) const
bool is_indel(const VCF &)
bool is_dnv(const VCF &)
std::string convert(T input)
convert T to a string
Definition: utility.h:521
const std::string & id(void) const
void add_filter(const std::string &)
append list of filters
unsigned int n_samples(void) const
bool is_mnv(const VCF &)
void add(const std::string &key, const std::vector< T > &data)
T is string, numeric or vector string or numeric.
Definition: VCF.h:624

Generated on Thu Dec 12 2019 03:12:08 for yat by  doxygen 1.8.11