yat  0.13.2pre
BamRead.h
1 #ifndef theplu_yat_omic_bam_read
2 #define theplu_yat_omic_bam_read
3 
4 // $Id: BamRead.h 3398 2015-03-25 07:57:32Z peter $
5 
6 /*
7  Copyright (C) 2012, 2013, 2014, 2015 Peter Johansson
8 
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10 
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15 
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 */
24 
25 #include "config_bam.h"
26 
27 #include "yat/utility/Aligner.h"
28 // This file has to be included to keep compatibility with yat 0.11
29 #include "yat/utility/Cigar.h"
30 #include "yat/utility/deprecate.h"
31 
32 #include YAT_SAM_HEADER
33 
34 #include <functional>
35 #include <string>
36 #include <vector>
37 
38 namespace theplu {
39 namespace yat {
40 namespace omic {
41 
53  class BamRead
54  {
55  public:
62  BamRead(void);
63 
67  BamRead(const BamRead& other);
68 
72  virtual ~BamRead(void);
73 
77  const BamRead& operator=(const BamRead& rhs);
78 
84  const uint8_t* aux(void) const;
85 
97  const uint8_t* aux(const char tag[2]) const;
98 
111  void aux_append(const char tag[2], char type, int len, uint8_t* data);
112 
120  void aux_del(const char tag[2]);
121 
127  int aux_size(void) const;
128 
134  const bam1_core_t& core(void) const;
135 
141  bam1_core_t& core(void);
142 
151  const uint32_t* cigar(void) const;
152 
156  uint32_t cigar(size_t i) const;
157 
161  uint32_t cigar_op(size_t i) const;
162 
166  uint32_t cigar_oplen(size_t i) const;
167 
171  std::string cigar_str(void) const;
172 
181  void cigar(const std::vector<uint32_t>& c) YAT_DEPRECATE;
182 
190  void cigar(const utility::Aligner::Cigar& cigar);
191 
200  int32_t end(void) const;
201 
209  uint16_t flag(void) const;
210 
214  int32_t mpos(void) const;
215 
219  int32_t mtid(void) const;
220 
226  const char* name(void) const;
227 
233  void name(const std::string& n);
234 
238  int32_t pos(void) const;
239 
243  uint8_t qual(size_t i) const;
244 
253  void qual(size_t i, uint8_t q);
254 
260  std::string sequence(void) const;
261 
267  uint8_t sequence(size_t index) const;
268 
274  char sequence_str(size_t index) const;
275 
285  void sequence(size_t i, uint8_t x);
286 
292  void sequence(const std::string& seq, const std::vector<uint8_t>& qual);
293 
297  uint32_t sequence_length(void) const;
298 
304  void swap(BamRead& other);
305 
309  int32_t tid(void) const;
310 
311  private:
312  // ensure capacity of data pointer is (at least) n
313  void reserve(int n);
314 
315  bam1_t* bam_;
316 
317  friend class InBamFile;
318  friend class OutBamFile;
319  friend class BamReadIterator;
320 #ifndef YAT_HAVE_HTSLIB
321  uint32_t calend(const bam1_core_t *c, const uint32_t *cigar) const;
322 #endif
323  // access data length, current length of data
324  int& data_size(void);
325  const int& data_size(void) const;
326  };
327 
341  const unsigned char nt16_table(char);
342 
355  const char nt16_str(uint8_t);
356 
365  void swap(BamRead& lhs, BamRead& rhs);
366 
375  bool soft_clipped(const BamRead& bam);
376 
385  uint32_t left_soft_clipped(const BamRead& bam);
386 
395  uint32_t right_soft_clipped(const BamRead& bam);
396 
406  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
407 
417  bool less_query_name(const BamRead& lhs, const BamRead& rhs);
418 
427  struct BamLessPos
428  : public std::binary_function<const BamRead&, const BamRead&, bool>
429  {
436  bool operator()(const BamRead& lhs, const BamRead& rhs) const;
437  };
438 
439 
448  struct BamLessEnd
449  : public std::binary_function<const BamRead&, const BamRead&, bool>
450  {
457  bool operator()(const BamRead& lhs, const BamRead& rhs) const;
458  };
459 
468  struct BamLessName
469  : public std::binary_function<const BamRead&, const BamRead&, bool>
470  {
474  bool operator()(const BamRead& lhs, const BamRead& rhs) const;
475  };
476 
477 
491  template<typename RandomAccessIterator>
492  double alignment_score(const BamRead& bam, RandomAccessIterator ref,
493  double mismatch, double gap=0.0, double open_gap=0.0)
494  {
495  double res = 0;
496  size_t q = 0;
497 
498  for (uint32_t i = 0; i<bam.core().n_cigar; ++i) {
499  switch (bam.cigar_op(i)) {
500  case (BAM_CMATCH): {
501  RandomAccessIterator end = ref + bam.cigar_oplen(i);
502  for (; ref!=end; ++ref) {
503  if (bam.sequence_str(q) == *ref)
504  res += 1.0;
505  else
506  res -= mismatch;
507  ++q;
508  }
509  break;
510  }
511  case (BAM_CEQUAL): {
512  q += bam.cigar_oplen(i);
513  ref += bam.cigar_oplen(i);
514  res += bam.cigar_oplen(i);
515  break;
516  }
517  case (BAM_CDIFF): {
518  q += bam.cigar_oplen(i);
519  ref += bam.cigar_oplen(i);
520  res -= bam.cigar_oplen(i)*mismatch;
521  break;
522  }
523  case (BAM_CSOFT_CLIP): {
524  q += bam.cigar_oplen(i);
525  break;
526  }
527  case (BAM_CINS): {
528  q += bam.cigar_oplen(i);
529  res -= open_gap + bam.cigar_oplen(i) * gap;
530  break;
531  }
532  case (BAM_CDEL): {
533  ref += bam.cigar_oplen(i);
534  res -= open_gap + bam.cigar_oplen(i) * gap;
535  break;
536  }
537  } // end switch
538  }
539  return res;
540  }
541 
542 }}}
543 #endif
void aux_del(const char tag[2])
remove a tag in aux field
virtual ~BamRead(void)
Destructor.
uint32_t sequence_length(void) const
int32_t tid(void) const
chromosome ID
BamRead(void)
default constructor
const char nt16_str(uint8_t)
bool operator()(const BamRead &lhs, const BamRead &rhs) const
bool operator()(const BamRead &lhs, const BamRead &rhs) const
int32_t end(void) const
rightmost coordinate
const char * name(void) const
const bam1_core_t & core(void) const
access core data struct
Class holding a bam query.
Definition: BamRead.h:53
uint32_t cigar_op(size_t i) const
double alignment_score(const BamRead &bam, RandomAccessIterator ref, double mismatch, double gap=0.0, double open_gap=0.0)
Definition: BamRead.h:492
const BamRead & operator=(const BamRead &rhs)
assignment operator
char sequence_str(size_t index) const
Definition: BamRead.h:427
int aux_size(void) const
int32_t mtid(void) const
Chromosome ID for mate.
std::string sequence(void) const
void aux_append(const char tag[2], char type, int len, uint8_t *data)
append a new tag to aux field
const uint8_t * aux(void) const
Compact Idiosyncratic Gapped Alignment Report.
Definition: Aligner.h:179
Definition: BamRead.h:468
Definition: BamFile.h:213
int32_t mpos(void) const
leftmost position for mate
Definition: BamRead.h:448
bool operator()(const BamRead &lhs, const BamRead &rhs) const
Definition: BamFile.h:118
void swap(BamRead &other)
uint32_t cigar_oplen(size_t i) const
class to iterate through a InBamFile
Definition: BamReadIterator.h:60
uint16_t flag(void) const
bitwise flag
uint8_t qual(size_t i) const
const unsigned char nt16_table(char)
int32_t pos(void) const
0-based laftmost coordinate
const uint32_t * cigar(void) const
access CIGAR array
std::string cigar_str(void) const

Generated on Wed Jan 4 2017 02:23:07 for yat by  doxygen 1.8.5