yat  0.17.3pre
Pileup.h
1 #ifndef theplu_yat_omic_pileup
2 #define theplu_yat_omic_pileup
3 
4 // $Id: Pileup.h 3855 2020-01-02 01:11:34Z peter $
5 
6 /*
7  Copyright (C) 2014, 2016, 2017, 2018, 2019 Peter Johansson
8 
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10 
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15 
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23 */
24 
25 #include "BamRead.h"
26 
27 #include "yat/utility/CigarIterator.h"
28 #include "yat/utility/yat_assert.h"
29 
30 #include <boost/iterator/iterator_traits.hpp>
31 #include <boost/iterator/iterator_concepts.hpp>
32 #include <boost/concept/assert.hpp>
33 #include <boost/shared_ptr.hpp>
34 
35 #include <algorithm>
36 #include <list>
37 
38 namespace theplu {
39 namespace yat {
40 namespace omic {
41 
56  template<typename Iterator>
57  class Pileup
58  {
59  public:
64  class Entry
65  {
66  public:
70  Entry(void) {}
71 
76  Entry(const BamRead& b);
77 
81  const BamRead& bam(void) const
82  {
83  YAT_ASSERT(bam_.get());
84  return *bam_;
85  }
86 
90  uint8_t cigar_op(void) const { return *cigar_; }
91 
92 
96  uint32_t cigar_oplen(void) const
97  {
98  YAT_ASSERT(bam_.get());
99  YAT_ASSERT(cigar_.base() >= bam_->cigar());
100  return bam_cigar_oplen(*cigar_.base());
101  }
102 
103 
107  unsigned short qual(void) const
108  {
109  YAT_ASSERT(bam_.get());
110  YAT_ASSERT(qpos_ < bam_->sequence_length());
111  return bam_->qual(qpos_);
112  }
113 
120  uint8_t sequence(void) const;
121 
126  void increment(void);
127 
131  bool is_del(void) const
132  {
133  return cigar_type()==2;
134  }
135 
139  size_t qpos(void) const { return qpos_; }
140  private:
141  boost::shared_ptr<BamRead> bam_;
142  // index of base pointed to
143  size_t qpos_;
145  size_t skip_;
146  // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h
147  uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); }
148  friend class Pileup;
149  };
150 
151  public:
156  typedef typename std::list<Entry>::const_iterator const_iterator;
157 
165  typedef typename std::list<Entry>::const_reverse_iterator
167 
171  Pileup(void) {}
172 
179  Pileup(Iterator first, Iterator last);
180 
188  const_iterator begin(void) const;
189 
194  const_iterator end(void) const;
195 
199  bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
200 
211  void increment(void);
212 
219  size_t size(void) const;
220 
228  const_reverse_iterator rbegin(void) const;
229 
238  const_reverse_iterator rend(void) const;
239 
243  int32_t tid(void) const { return tid_; }
244 
248  int32_t pos(void) const { return pos_; }
249 
254  bool skip_ref(void) const;
255  private:
256  typedef typename std::list<Entry>::iterator iterator;
257 
258  Iterator bam_;
259  Iterator bam_end_;
260  int32_t pos_;
261  int32_t tid_;
262  std::list<Entry> data_;
263  uint32_t skip_ref_;
264  };
265 
266 
272  template<typename Iterator>
273  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
274  {
275  return Pileup<Iterator>(first, last);
276  }
277 
278  // implementations
279 
280  template<typename Iterator>
281  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
282  : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
283  {
284  BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Iterator>));
285  BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Iterator>));
286  using boost::Convertible;
287  typedef typename boost::iterator_value<Iterator>::type value_type;
288  BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
289  increment();
290  }
291 
292 
293  template<typename Iterator>
295  {
296  return data_.begin();
297  }
298 
299 
300  template<typename Iterator>
303  {
304  return data_.rbegin();
305  }
306 
307 
308  template<typename Iterator>
310  {
311  return data_.end();
312  }
313 
314 
315  template<typename Iterator>
318  {
319  return data_.rend();
320  }
321 
322 
323  template<typename Iterator>
325  {
326  if (!data_.empty()) {
327  // in insertion
328  if (skip_ref()) {
329  for (iterator it = data_.begin(); it!=data_.end(); ++it) {
330  if (it->cigar_type() == 1) {
331  it->increment();
332  }
333  --it->skip_;
334  }
335  --skip_ref_;
336  }
337  // walk one base forward on reference
338  else {
339  ++pos_;
340  typename std::list<Entry>::iterator read = data_.begin();
341  while (read != data_.end()) {
342  read->increment();
343  // remove entry if read is entirely left of pos
344  if (read->bam().end() <= pos_) { // FIXME speedup possible
345  data_.erase(read++);
346  continue;
347  }
348  // check if we're stepping into insertion
349  if (read->cigar_op() == BAM_CINS) {
350  skip_ref_ = std::max(skip_ref_, read->cigar_oplen());
351  }
352  ++read;
353  }
354 
355  // stepped into an insertion
356  if (skip_ref_)
357  for (iterator iter=data_.begin(); iter!=data_.end(); ++iter)
358  iter->skip_ = skip_ref_;
359  }
360  }
361 
362 
363  // fast forward to next covered locus
364  if (data_.empty()) {
365  if (bam_==bam_end_)
366  return;
367  tid_ = bam_->core().tid;
368  pos_ = bam_->core().pos;
369  }
370 
371  // read data
372  while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
373  // skip unmapped reads
374  if (!(bam_->core().flag & BAM_FUNMAP)) {
375  data_.push_back(Entry(*bam_));
376  }
377  ++bam_;
378  }
379  }
380 
381 
382  template<typename Iterator>
383  bool Pileup<Iterator>::skip_ref(void) const
384  {
385  return skip_ref_;
386  }
387 
388 
389  template<typename Iterator>
390  size_t Pileup<Iterator>::size(void) const
391  {
392  return data_.size();
393  }
394 
395 
396  template<typename Iterator>
398  : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0)
399  {
400  YAT_ASSERT(cigar_.base() == bam_->cigar());
401  if (bam_->core().n_cigar==0)
402  return;
403  // iterate cigar to first match
404  while (qpos_ < bam_->sequence_length() && cigar_type() != 3) {
405  // type is either 0 (hardclipped) or 1 (softclipped)
406  YAT_ASSERT(cigar_type()!=2);
407  if (cigar_type() == 1)
408  ++qpos_;
409  ++cigar_;
410  }
411  }
412 
413 
414  template<typename Iterator>
416  {
417  YAT_ASSERT(cigar_.base() >= bam_->cigar());
418  if (cigar_type() & 1)
419  ++qpos_;
420  ++cigar_;
421  YAT_ASSERT(cigar_.base() >= bam_->cigar());
422  }
423 
424 
425  template<typename Iterator>
427  {
428  if (skip_==0) {
429  if (cigar_type() & 1)
430  return bam_->sequence(qpos_);
431  return 0;
432  }
433  if (cigar_op()!=BAM_CINS)
434  return 0;
435  return bam_->sequence(qpos_);
436  }
437 
438 }}}
439 #endif
The Department of Theoretical Physics namespace as we define it.
size_t size(void) const
Definition: Pileup.h:390
std::list< Entry >::const_iterator const_iterator
Definition: Pileup.h:156
void increment(void)
step to next position.
Definition: Pileup.h:324
Class holding a bam query.
Definition: BamRead.h:53
uint8_t cigar_op(void) const
Definition: Pileup.h:90
Pileup< Iterator > make_pileup(Iterator first, Iterator last)
Definition: Pileup.h:273
T max(const T &a, const T &b, const T &c)
Definition: stl_utility.h:699
bool is_del(void) const
Definition: Pileup.h:131
int32_t tid(void) const
Definition: Pileup.h:243
Definition: Pileup.h:57
BASE base(void) const
Definition: CigarIterator.h:119
size_t qpos(void) const
Definition: Pileup.h:139
Definition: Pileup.h:64
bool good(void) const
Definition: Pileup.h:199
Entry(void)
default constructor
Definition: Pileup.h:70
const_reverse_iterator rbegin(void) const
Definition: Pileup.h:302
uint32_t cigar_oplen(void) const
Definition: Pileup.h:96
bool skip_ref(void) const
Definition: Pileup.h:383
uint8_t sequence(void) const
Definition: Pileup.h:426
int32_t pos(void) const
Definition: Pileup.h:248
const_iterator begin(void) const
Definition: Pileup.h:294
void increment(void)
Definition: Pileup.h:415
std::list< Entry >::const_reverse_iterator const_reverse_iterator
Definition: Pileup.h:166
const BamRead & bam(void) const
Definition: Pileup.h:81
const_iterator end(void) const
Definition: Pileup.h:309
unsigned short qual(void) const
Definition: Pileup.h:107
Pileup(void)
Definition: Pileup.h:171
const_reverse_iterator rend(void) const
Definition: Pileup.h:317

Generated on Thu Aug 27 2020 03:33:18 for yat by  doxygen 1.8.11