3475 |
08 Mar 16 |
peter |
1 |
#ifndef _theplu_yat_omic_fasta |
3475 |
08 Mar 16 |
peter |
2 |
#define _theplu_yat_omic_fasta |
3475 |
08 Mar 16 |
peter |
3 |
|
3475 |
08 Mar 16 |
peter |
// $Id$ |
3475 |
08 Mar 16 |
peter |
5 |
|
4346 |
24 Apr 23 |
peter |
6 |
/* |
4346 |
24 Apr 23 |
peter |
Copyright (C) 2016, 2017, 2018, 2020, 2023 Peter Johansson |
4346 |
24 Apr 23 |
peter |
8 |
|
4346 |
24 Apr 23 |
peter |
This file is part of the yat library, https://dev.thep.lu.se/yat |
4346 |
24 Apr 23 |
peter |
10 |
|
4346 |
24 Apr 23 |
peter |
The yat library is free software; you can redistribute it and/or |
4346 |
24 Apr 23 |
peter |
modify it under the terms of the GNU General Public License as |
4346 |
24 Apr 23 |
peter |
published by the Free Software Foundation; either version 3 of the |
4346 |
24 Apr 23 |
peter |
License, or (at your option) any later version. |
4346 |
24 Apr 23 |
peter |
15 |
|
4346 |
24 Apr 23 |
peter |
The yat library is distributed in the hope that it will be useful, |
4346 |
24 Apr 23 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
4346 |
24 Apr 23 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
4346 |
24 Apr 23 |
peter |
General Public License for more details. |
4346 |
24 Apr 23 |
peter |
20 |
|
4346 |
24 Apr 23 |
peter |
You should have received a copy of the GNU General Public License |
4346 |
24 Apr 23 |
peter |
along with yat. If not, see <https://www.gnu.org/licenses/>. |
4346 |
24 Apr 23 |
peter |
23 |
*/ |
4346 |
24 Apr 23 |
peter |
24 |
|
3883 |
24 Mar 20 |
peter |
25 |
#include <htslib/faidx.h> |
3475 |
08 Mar 16 |
peter |
26 |
|
3727 |
09 Feb 18 |
peter |
27 |
#include <iosfwd> |
3475 |
08 Mar 16 |
peter |
28 |
#include <map> |
4019 |
06 Nov 20 |
peter |
29 |
#include <memory> |
3475 |
08 Mar 16 |
peter |
30 |
#include <string> |
3475 |
08 Mar 16 |
peter |
31 |
#include <vector> |
3475 |
08 Mar 16 |
peter |
32 |
|
3475 |
08 Mar 16 |
peter |
33 |
namespace theplu { |
3475 |
08 Mar 16 |
peter |
34 |
namespace yat { |
3475 |
08 Mar 16 |
peter |
35 |
namespace omic { |
3475 |
08 Mar 16 |
peter |
36 |
|
3475 |
08 Mar 16 |
peter |
37 |
/** |
3475 |
08 Mar 16 |
peter |
\brief wrapper class around struct faidx_t in libhts |
3475 |
08 Mar 16 |
peter |
39 |
|
3475 |
08 Mar 16 |
peter |
Class handling a fasta file. Class assumes the fasta file has an |
3475 |
08 Mar 16 |
peter |
index file created by e.g. 'samtools faidx'. |
3475 |
08 Mar 16 |
peter |
42 |
|
3475 |
08 Mar 16 |
peter |
\since New in yat 0.14 |
3475 |
08 Mar 16 |
peter |
44 |
*/ |
3475 |
08 Mar 16 |
peter |
45 |
class Fasta |
3475 |
08 Mar 16 |
peter |
46 |
{ |
3475 |
08 Mar 16 |
peter |
47 |
public: |
3475 |
08 Mar 16 |
peter |
48 |
/** |
3475 |
08 Mar 16 |
peter |
Tries to open index for fasta file \a fn. It is assumed that |
3475 |
08 Mar 16 |
peter |
the index file is called fn + ".fai". If open fails, |
3475 |
08 Mar 16 |
peter |
constructor tries to create an index file from fasta file \a fn |
3475 |
08 Mar 16 |
peter |
(see fai_build in htslib). If that also fails, an exception is |
3475 |
08 Mar 16 |
peter |
thrown. |
3475 |
08 Mar 16 |
peter |
54 |
|
3475 |
08 Mar 16 |
peter |
\throw utility::runtime_error if open fails |
3475 |
08 Mar 16 |
peter |
56 |
*/ |
3475 |
08 Mar 16 |
peter |
57 |
Fasta(const std::string& fn); |
3475 |
08 Mar 16 |
peter |
58 |
|
3475 |
08 Mar 16 |
peter |
59 |
/** |
3475 |
08 Mar 16 |
peter |
\return true if there exist a sequence called \a name |
3475 |
08 Mar 16 |
peter |
61 |
*/ |
3475 |
08 Mar 16 |
peter |
62 |
bool present(const std::string& name) const; |
3475 |
08 Mar 16 |
peter |
63 |
|
3475 |
08 Mar 16 |
peter |
64 |
/** |
3475 |
08 Mar 16 |
peter |
\return name of sequence \a i |
3475 |
08 Mar 16 |
peter |
66 |
*/ |
3475 |
08 Mar 16 |
peter |
67 |
std::string name(size_t i) const; |
3475 |
08 Mar 16 |
peter |
68 |
|
3475 |
08 Mar 16 |
peter |
69 |
/** |
3475 |
08 Mar 16 |
peter |
\return number of sequences |
3475 |
08 Mar 16 |
peter |
71 |
*/ |
3475 |
08 Mar 16 |
peter |
72 |
int nseq(void) const; |
3475 |
08 Mar 16 |
peter |
73 |
|
3475 |
08 Mar 16 |
peter |
74 |
/** |
3475 |
08 Mar 16 |
peter |
\return length of sequence with name \a name |
3475 |
08 Mar 16 |
peter |
76 |
|
3475 |
08 Mar 16 |
peter |
\throw utility::runtime_error if there is no sequence named \a name |
3475 |
08 Mar 16 |
peter |
78 |
*/ |
3475 |
08 Mar 16 |
peter |
79 |
int sequence_length(const std::string& name) const; |
3475 |
08 Mar 16 |
peter |
80 |
|
3475 |
08 Mar 16 |
peter |
81 |
/** |
3475 |
08 Mar 16 |
peter |
Class holding sequence fetch from fasta file. |
3475 |
08 Mar 16 |
peter |
83 |
|
3475 |
08 Mar 16 |
peter |
Class is const and its copy and assignment are shallow, i.e., |
3475 |
08 Mar 16 |
peter |
they only copy pointers to the underlying data and do not alter |
3475 |
08 Mar 16 |
peter |
the underlying data. |
3475 |
08 Mar 16 |
peter |
87 |
|
3475 |
08 Mar 16 |
peter |
Class is typically constructed in functions Fasta::sequence. |
3475 |
08 Mar 16 |
peter |
89 |
*/ |
3475 |
08 Mar 16 |
peter |
90 |
class Sequence |
3475 |
08 Mar 16 |
peter |
91 |
{ |
3475 |
08 Mar 16 |
peter |
92 |
public: |
3475 |
08 Mar 16 |
peter |
/// const iterator is a \c const \c char* |
3475 |
08 Mar 16 |
peter |
94 |
typedef const char* const_iterator; |
3475 |
08 Mar 16 |
peter |
95 |
|
3475 |
08 Mar 16 |
peter |
96 |
/** |
3475 |
08 Mar 16 |
peter |
\return iterator to first element |
3475 |
08 Mar 16 |
peter |
98 |
*/ |
3475 |
08 Mar 16 |
peter |
99 |
const_iterator begin(void) const; |
3475 |
08 Mar 16 |
peter |
100 |
|
3475 |
08 Mar 16 |
peter |
101 |
/** |
3475 |
08 Mar 16 |
peter |
\return iterator to one-passed last element |
3475 |
08 Mar 16 |
peter |
103 |
*/ |
3475 |
08 Mar 16 |
peter |
104 |
const_iterator end(void) const; |
3475 |
08 Mar 16 |
peter |
105 |
|
3475 |
08 Mar 16 |
peter |
106 |
/** |
3475 |
08 Mar 16 |
peter |
\return number of elements contained in sequence |
3475 |
08 Mar 16 |
peter |
108 |
*/ |
3475 |
08 Mar 16 |
peter |
109 |
int size(void) const; |
3475 |
08 Mar 16 |
peter |
110 |
|
3475 |
08 Mar 16 |
peter |
111 |
/** |
3475 |
08 Mar 16 |
peter |
\return ith element |
3475 |
08 Mar 16 |
peter |
113 |
*/ |
3475 |
08 Mar 16 |
peter |
114 |
char operator[](size_t i) const; |
3475 |
08 Mar 16 |
peter |
115 |
private: |
3475 |
08 Mar 16 |
peter |
116 |
friend class Fasta; |
3475 |
08 Mar 16 |
peter |
117 |
friend Sequence reverse_complement(const Fasta::Sequence&); |
4019 |
06 Nov 20 |
peter |
118 |
std::shared_ptr<char> seq_; |
3475 |
08 Mar 16 |
peter |
119 |
int size_; |
3475 |
08 Mar 16 |
peter |
120 |
}; |
3475 |
08 Mar 16 |
peter |
121 |
|
3475 |
08 Mar 16 |
peter |
122 |
/** |
3475 |
08 Mar 16 |
peter |
\return entire sequence from sequence \a chr |
3475 |
08 Mar 16 |
peter |
124 |
*/ |
3475 |
08 Mar 16 |
peter |
125 |
Sequence sequence(const std::string& chr) const; |
3475 |
08 Mar 16 |
peter |
126 |
|
3475 |
08 Mar 16 |
peter |
127 |
/** |
3475 |
08 Mar 16 |
peter |
\param chr sequence name |
3475 |
08 Mar 16 |
peter |
\param beg beginning position (0-based) |
3475 |
08 Mar 16 |
peter |
\param end end position (0-based) |
3475 |
08 Mar 16 |
peter |
131 |
|
3700 |
04 Oct 17 |
peter |
\return sub-sequence of \a chr starting at \a beg and with last |
3700 |
04 Oct 17 |
peter |
base \a end (length of sequence is end-begin+1). |
3475 |
08 Mar 16 |
peter |
134 |
*/ |
3475 |
08 Mar 16 |
peter |
135 |
Sequence sequence(const std::string& chr, int beg, int end) const; |
3475 |
08 Mar 16 |
peter |
136 |
|
3475 |
08 Mar 16 |
peter |
137 |
private: |
4019 |
06 Nov 20 |
peter |
138 |
std::shared_ptr<faidx_t> faidx_; |
3475 |
08 Mar 16 |
peter |
// using compiler generated copy |
3475 |
08 Mar 16 |
peter |
//FastaIndex(const FastaIndex& other); |
3475 |
08 Mar 16 |
peter |
//FastaIndex& operator=(const FastaIndex& rhs); |
3475 |
08 Mar 16 |
peter |
142 |
void fetch(Sequence& seq, const std::string& chr, int begin, int end) const; |
3475 |
08 Mar 16 |
peter |
// throw exception; called when incorrect chr is passed |
3475 |
08 Mar 16 |
peter |
144 |
void throw_unknown_chr(const std::string&) const; |
3475 |
08 Mar 16 |
peter |
145 |
}; |
3475 |
08 Mar 16 |
peter |
146 |
|
3475 |
08 Mar 16 |
peter |
147 |
|
3475 |
08 Mar 16 |
peter |
148 |
/** |
3475 |
08 Mar 16 |
peter |
Create a new Sequence that reflects the reverse complement of \a sequence. |
3475 |
08 Mar 16 |
peter |
150 |
|
3475 |
08 Mar 16 |
peter |
\see dna_reverse_complement |
3475 |
08 Mar 16 |
peter |
152 |
|
3475 |
08 Mar 16 |
peter |
\relates Fasta::Sequence |
3475 |
08 Mar 16 |
peter |
154 |
*/ |
3475 |
08 Mar 16 |
peter |
155 |
Fasta::Sequence reverse_complement(const Fasta::Sequence& sequence); |
3727 |
09 Feb 18 |
peter |
156 |
|
3727 |
09 Feb 18 |
peter |
157 |
/** |
3727 |
09 Feb 18 |
peter |
\brief outpout operator for Fasta:Sequence |
3727 |
09 Feb 18 |
peter |
159 |
|
3727 |
09 Feb 18 |
peter |
\relates Fasta::Sequence |
3727 |
09 Feb 18 |
peter |
161 |
|
3727 |
09 Feb 18 |
peter |
\since New in yat 0.16 |
3727 |
09 Feb 18 |
peter |
163 |
*/ |
3727 |
09 Feb 18 |
peter |
164 |
std::ostream& operator<<(std::ostream& os, const Fasta::Sequence&); |
3727 |
09 Feb 18 |
peter |
165 |
|
3475 |
08 Mar 16 |
peter |
166 |
}}} |
3475 |
08 Mar 16 |
peter |
167 |
#endif |