yat  0.21pre
Pileup.h
1 #ifndef theplu_yat_omic_pileup
2 #define theplu_yat_omic_pileup
3 
4 // $Id: Pileup.h 4019 2020-11-06 01:55:37Z peter $
5 
6 /*
7  Copyright (C) 2014, 2016, 2017, 2018, 2019, 2020 Peter Johansson
8 
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10 
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15 
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23 */
24 
25 #include "BamRead.h"
26 
27 #include "yat/utility/CigarIterator.h"
28 #include "yat/utility/yat_assert.h"
29 
30 #include <boost/iterator/iterator_traits.hpp>
31 #include <boost/iterator/iterator_concepts.hpp>
32 #include <boost/concept/assert.hpp>
33 
34 #include <algorithm>
35 #include <list>
36 #include <memory>
37 
38 namespace theplu {
39 namespace yat {
40 namespace omic {
41 
56  template<typename Iterator>
57  class Pileup
58  {
59  public:
64  class Entry
65  {
66  public:
70  Entry(void) {}
71 
76  Entry(const BamRead& b);
77 
81  const BamRead& bam(void) const
82  {
83  YAT_ASSERT(bam_.get());
84  return *bam_;
85  }
86 
90  uint8_t cigar_op(void) const { return *cigar_; }
91 
92 
96  uint32_t cigar_oplen(void) const
97  {
98  YAT_ASSERT(bam_.get());
99  YAT_ASSERT(cigar_.base() >= bam_->cigar());
100  return bam_cigar_oplen(*cigar_.base());
101  }
102 
103 
107  unsigned short qual(void) const
108  {
109  YAT_ASSERT(bam_.get());
110  YAT_ASSERT(qpos_ < bam_->sequence_length());
111  return bam_->qual(qpos_);
112  }
113 
120  uint8_t sequence(void) const;
121 
126  void increment(void);
127 
131  bool is_del(void) const
132  {
133  return cigar_type()==2;
134  }
135 
139  size_t qpos(void) const { return qpos_; }
140  private:
141  std::shared_ptr<BamRead> bam_;
142  // index of base pointed to
143  size_t qpos_;
145  size_t skip_;
146  // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h
147  uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); }
148  friend class Pileup;
149  };
150 
151  public:
156  typedef typename std::list<Entry>::const_iterator const_iterator;
157 
165  typedef typename std::list<Entry>::const_reverse_iterator
167 
171  Pileup(void) {}
172 
179  Pileup(Iterator first, Iterator last);
180 
188  const_iterator begin(void) const;
189 
194  const_iterator end(void) const;
195 
199  bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
200 
211  void increment(void);
212 
219  size_t size(void) const;
220 
228  const_reverse_iterator rbegin(void) const;
229 
238  const_reverse_iterator rend(void) const;
239 
243  int32_t tid(void) const { return tid_; }
244 
251  int32_t pos(void) const { return pos_; }
252 
257  bool skip_ref(void) const;
258  private:
259  typedef typename std::list<Entry>::iterator iterator;
260 
261  Iterator bam_;
262  Iterator bam_end_;
263  int32_t pos_;
264  int32_t tid_;
265  std::list<Entry> data_;
266  uint32_t skip_ref_;
267  };
268 
269 
275  template<typename Iterator>
276  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
277  {
278  return Pileup<Iterator>(first, last);
279  }
280 
281  // implementations
282 
283  template<typename Iterator>
284  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
285  : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
286  {
287  BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Iterator>));
288  BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Iterator>));
289  using boost::Convertible;
290  typedef typename boost::iterator_value<Iterator>::type value_type;
291  BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
292  increment();
293  }
294 
295 
296  template<typename Iterator>
298  {
299  return data_.begin();
300  }
301 
302 
303  template<typename Iterator>
306  {
307  return data_.rbegin();
308  }
309 
310 
311  template<typename Iterator>
313  {
314  return data_.end();
315  }
316 
317 
318  template<typename Iterator>
321  {
322  return data_.rend();
323  }
324 
325 
326  template<typename Iterator>
328  {
329  if (!data_.empty()) {
330  // in insertion
331  if (skip_ref()) {
332  for (iterator it = data_.begin(); it!=data_.end(); ++it) {
333  if (it->cigar_type() == 1) {
334  it->increment();
335  }
336  --it->skip_;
337  }
338  --skip_ref_;
339  }
340  // walk one base forward on reference
341  else {
342  ++pos_;
343  typename std::list<Entry>::iterator read = data_.begin();
344  while (read != data_.end()) {
345  // remove entry if read is entirely left of pos
346  if (read->bam().end() <= pos_) {
347  data_.erase(read++);
348  continue;
349  }
350  read->increment();
351  // check if we're stepping into insertion
352  if (read->cigar_op() == BAM_CINS) {
353  skip_ref_ = std::max(skip_ref_, read->cigar_oplen());
354  }
355  ++read;
356  }
357 
358  // stepped into an insertion
359  if (skip_ref_)
360  for (iterator iter=data_.begin(); iter!=data_.end(); ++iter)
361  iter->skip_ = skip_ref_;
362  }
363  }
364 
365 
366  // fast forward to next covered locus
367  if (data_.empty()) {
368  if (bam_==bam_end_)
369  return;
370  tid_ = bam_->core().tid;
371  pos_ = bam_->core().pos;
372  }
373 
374  // read data
375  while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
376  // skip unmapped reads
377  if (!(bam_->core().flag & BAM_FUNMAP)) {
378  data_.push_back(Entry(*bam_));
379  }
380  ++bam_;
381  }
382  }
383 
384 
385  template<typename Iterator>
386  bool Pileup<Iterator>::skip_ref(void) const
387  {
388  return skip_ref_;
389  }
390 
391 
392  template<typename Iterator>
393  size_t Pileup<Iterator>::size(void) const
394  {
395  return data_.size();
396  }
397 
398 
399  template<typename Iterator>
401  : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0)
402  {
403  YAT_ASSERT(cigar_.base() == bam_->cigar());
404  if (bam_->core().n_cigar==0)
405  return;
406  // iterate cigar to first match
407  while (qpos_ < bam_->sequence_length() && cigar_type() != 3) {
408  // type is either 0 (hardclipped) or 1 (softclipped)
409  YAT_ASSERT(cigar_type()!=2);
410  if (cigar_type() == 1)
411  ++qpos_;
412  ++cigar_;
413  }
414  }
415 
416 
417  template<typename Iterator>
419  {
420  YAT_ASSERT(cigar_.base() >= bam_->cigar());
421  if (cigar_type() & 1)
422  ++qpos_;
423  ++cigar_;
424  YAT_ASSERT(cigar_.base() >= bam_->cigar());
425  }
426 
427 
428  template<typename Iterator>
430  {
431  if (skip_==0) {
432  if (cigar_type() & 1)
433  return bam_->sequence(qpos_);
434  return 0;
435  }
436  if (cigar_op()!=BAM_CINS)
437  return 0;
438  return bam_->sequence(qpos_);
439  }
440 
441 }}}
442 #endif
size_t size(void) const
Definition: Pileup.h:393
bool skip_ref(void) const
Definition: Pileup.h:386
uint8_t cigar_op(void) const
Definition: Pileup.h:90
bool is_del(void) const
Definition: Pileup.h:131
const_reverse_iterator rbegin(void) const
Definition: Pileup.h:305
The Department of Theoretical Physics namespace as we define it.
std::list< Entry >::const_iterator const_iterator
Definition: Pileup.h:156
void increment(void)
step to next position.
Definition: Pileup.h:327
const_reverse_iterator rend(void) const
Definition: Pileup.h:320
Class holding a bam query.
Definition: BamRead.h:51
Pileup< Iterator > make_pileup(Iterator first, Iterator last)
Definition: Pileup.h:276
T max(const T &a, const T &b, const T &c)
Definition: stl_utility.h:699
size_t qpos(void) const
Definition: Pileup.h:139
unsigned short qual(void) const
Definition: Pileup.h:107
Iterator over a CIGAR.
Definition: CigarIterator.h:60
Definition: Pileup.h:57
Definition: Pileup.h:64
Entry(void)
default constructor
Definition: Pileup.h:70
int32_t tid(void) const
Definition: Pileup.h:243
BASE base(void) const
Definition: CigarIterator.h:119
uint8_t sequence(void) const
Definition: Pileup.h:429
int32_t pos(void) const
Definition: Pileup.h:251
bool good(void) const
Definition: Pileup.h:199
void increment(void)
Definition: Pileup.h:418
std::list< Entry >::const_reverse_iterator const_reverse_iterator
Definition: Pileup.h:166
uint32_t cigar_oplen(void) const
Definition: Pileup.h:96
const BamRead & bam(void) const
Definition: Pileup.h:81
const_iterator begin(void) const
Definition: Pileup.h:297
const_iterator end(void) const
Definition: Pileup.h:312
Pileup(void)
Definition: Pileup.h:171

Generated on Wed Jan 25 2023 03:34:29 for yat by  doxygen 1.8.14