4278 |
26 Jan 23 |
peter |
// $Id$ |
4278 |
26 Jan 23 |
peter |
2 |
|
4278 |
26 Jan 23 |
peter |
// This file is taken from hts.c in htslib v1.16 and modified. |
4278 |
26 Jan 23 |
peter |
4 |
|
4278 |
26 Jan 23 |
peter |
// Backporting some functions when compiling against htslib |
4278 |
26 Jan 23 |
peter |
// 1.10. Hence this file can be removed when we assume 1.11 |
4278 |
26 Jan 23 |
peter |
7 |
|
4278 |
26 Jan 23 |
peter |
8 |
/* |
4278 |
26 Jan 23 |
peter |
Copyright (C) 2023 Peter Johansson |
4278 |
26 Jan 23 |
peter |
10 |
|
4278 |
26 Jan 23 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
4278 |
26 Jan 23 |
peter |
12 |
|
4278 |
26 Jan 23 |
peter |
The yat library is free software; you can redistribute it and/or |
4278 |
26 Jan 23 |
peter |
modify it under the terms of the GNU General Public License as |
4278 |
26 Jan 23 |
peter |
published by the Free Software Foundation; either version 3 of the |
4278 |
26 Jan 23 |
peter |
License, or (at your option) any later version. |
4278 |
26 Jan 23 |
peter |
17 |
|
4278 |
26 Jan 23 |
peter |
The yat library is distributed in the hope that it will be useful, |
4278 |
26 Jan 23 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
4278 |
26 Jan 23 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
4278 |
26 Jan 23 |
peter |
General Public License for more details. |
4278 |
26 Jan 23 |
peter |
22 |
|
4278 |
26 Jan 23 |
peter |
You should have received a copy of the GNU General Public License |
4278 |
26 Jan 23 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
4278 |
26 Jan 23 |
peter |
25 |
*/ |
4278 |
26 Jan 23 |
peter |
26 |
|
4278 |
26 Jan 23 |
peter |
/* hts.c -- format-neutral I/O, indexing, and iterator API functions. |
4278 |
26 Jan 23 |
peter |
28 |
|
4278 |
26 Jan 23 |
peter |
Copyright (C) 2008, 2009, 2012-2022 Genome Research Ltd. |
4278 |
26 Jan 23 |
peter |
Copyright (C) 2012, 2013 Broad Institute. |
4278 |
26 Jan 23 |
peter |
31 |
|
4278 |
26 Jan 23 |
peter |
Author: Heng Li <lh3@sanger.ac.uk> |
4278 |
26 Jan 23 |
peter |
33 |
|
4278 |
26 Jan 23 |
peter |
Permission is hereby granted, free of charge, to any person obtaining a copy |
4278 |
26 Jan 23 |
peter |
of this software and associated documentation files (the "Software"), to deal |
4278 |
26 Jan 23 |
peter |
in the Software without restriction, including without limitation the rights |
4278 |
26 Jan 23 |
peter |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
4278 |
26 Jan 23 |
peter |
copies of the Software, and to permit persons to whom the Software is |
4278 |
26 Jan 23 |
peter |
furnished to do so, subject to the following conditions: |
4278 |
26 Jan 23 |
peter |
40 |
|
4278 |
26 Jan 23 |
peter |
The above copyright notice and this permission notice shall be included in |
4278 |
26 Jan 23 |
peter |
all copies or substantial portions of the Software. |
4278 |
26 Jan 23 |
peter |
43 |
|
4278 |
26 Jan 23 |
peter |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
4278 |
26 Jan 23 |
peter |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
4278 |
26 Jan 23 |
peter |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
4278 |
26 Jan 23 |
peter |
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
4278 |
26 Jan 23 |
peter |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
4278 |
26 Jan 23 |
peter |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
4278 |
26 Jan 23 |
peter |
DEALINGS IN THE SOFTWARE. */ |
4278 |
26 Jan 23 |
peter |
51 |
|
4278 |
26 Jan 23 |
peter |
52 |
#include <config.h> |
4278 |
26 Jan 23 |
peter |
53 |
|
4278 |
26 Jan 23 |
peter |
54 |
#include <htslib/hts.h> |
4278 |
26 Jan 23 |
peter |
55 |
#include <htslib/sam.h> |
4278 |
26 Jan 23 |
peter |
56 |
|
4278 |
26 Jan 23 |
peter |
57 |
#include <cassert> |
4278 |
26 Jan 23 |
peter |
58 |
#include <cstdlib> |
4278 |
26 Jan 23 |
peter |
59 |
|
4278 |
26 Jan 23 |
peter |
60 |
namespace theplu { |
4278 |
26 Jan 23 |
peter |
61 |
namespace yat { |
4278 |
26 Jan 23 |
peter |
62 |
namespace omic { |
4278 |
26 Jan 23 |
peter |
63 |
namespace detail { |
4278 |
26 Jan 23 |
peter |
64 |
|
4278 |
26 Jan 23 |
peter |
65 |
/**************** |
4278 |
26 Jan 23 |
peter |
*** Iterator *** |
4278 |
26 Jan 23 |
peter |
67 |
****************/ |
4278 |
26 Jan 23 |
peter |
68 |
|
4278 |
26 Jan 23 |
peter |
69 |
int compare_regions(const void *r1, const void *r2) |
4278 |
26 Jan 23 |
peter |
70 |
{ |
4278 |
26 Jan 23 |
peter |
71 |
hts_reglist_t *reg1 = (hts_reglist_t *)r1; |
4278 |
26 Jan 23 |
peter |
72 |
hts_reglist_t *reg2 = (hts_reglist_t *)r2; |
4278 |
26 Jan 23 |
peter |
73 |
|
4278 |
26 Jan 23 |
peter |
74 |
if (reg1->tid < 0 && reg2->tid >= 0) |
4278 |
26 Jan 23 |
peter |
75 |
return 1; |
4278 |
26 Jan 23 |
peter |
76 |
else if (reg1->tid >= 0 && reg2->tid < 0) |
4278 |
26 Jan 23 |
peter |
77 |
return -1; |
4278 |
26 Jan 23 |
peter |
78 |
else |
4278 |
26 Jan 23 |
peter |
79 |
return reg1->tid - reg2->tid; |
4278 |
26 Jan 23 |
peter |
80 |
} |
4278 |
26 Jan 23 |
peter |
81 |
|
4278 |
26 Jan 23 |
peter |
82 |
|
4278 |
26 Jan 23 |
peter |
83 |
int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, bam1_t *r) |
4278 |
26 Jan 23 |
peter |
84 |
{ |
4278 |
26 Jan 23 |
peter |
85 |
assert(!iter->is_cram); |
4278 |
26 Jan 23 |
peter |
// int ret, tid, i, cr, ci; |
4278 |
26 Jan 23 |
peter |
//hts_pos_t beg, end; |
4278 |
26 Jan 23 |
peter |
88 |
|
4278 |
26 Jan 23 |
peter |
89 |
if (iter == NULL || iter->finished) return -1; |
4278 |
26 Jan 23 |
peter |
90 |
|
4278 |
26 Jan 23 |
peter |
91 |
int ret = 0; |
4278 |
26 Jan 23 |
peter |
92 |
int tid = 0; |
4278 |
26 Jan 23 |
peter |
93 |
hts_pos_t beg = 0; |
4278 |
26 Jan 23 |
peter |
94 |
hts_pos_t end = 0; |
4278 |
26 Jan 23 |
peter |
95 |
hts_reglist_t *found_reg = NULL; |
4278 |
26 Jan 23 |
peter |
96 |
BGZF* fp = fd->fp.bgzf; |
4278 |
26 Jan 23 |
peter |
97 |
if (iter->read_rest) { |
4278 |
26 Jan 23 |
peter |
98 |
if (iter->curr_off) { // seek to the start |
4278 |
26 Jan 23 |
peter |
99 |
if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { |
4278 |
26 Jan 23 |
peter |
100 |
return -1; |
4278 |
26 Jan 23 |
peter |
101 |
} |
4278 |
26 Jan 23 |
peter |
102 |
iter->curr_off = 0; // only seek once |
4278 |
26 Jan 23 |
peter |
103 |
} |
4278 |
26 Jan 23 |
peter |
104 |
|
4278 |
26 Jan 23 |
peter |
105 |
ret = iter->readrec(fp, fd, r, &tid, &beg, &end); |
4278 |
26 Jan 23 |
peter |
106 |
if (ret < 0) |
4278 |
26 Jan 23 |
peter |
107 |
iter->finished = 1; |
4278 |
26 Jan 23 |
peter |
108 |
|
4278 |
26 Jan 23 |
peter |
109 |
iter->curr_tid = tid; |
4278 |
26 Jan 23 |
peter |
110 |
iter->curr_beg = beg; |
4278 |
26 Jan 23 |
peter |
111 |
iter->curr_end = end; |
4278 |
26 Jan 23 |
peter |
112 |
|
4278 |
26 Jan 23 |
peter |
113 |
return ret; |
4278 |
26 Jan 23 |
peter |
114 |
} |
4278 |
26 Jan 23 |
peter |
// A NULL iter->off should always be accompanied by iter->finished. |
4278 |
26 Jan 23 |
peter |
116 |
assert(iter->off != NULL || iter->nocoor != 0); |
4278 |
26 Jan 23 |
peter |
117 |
|
4278 |
26 Jan 23 |
peter |
118 |
int next_range = 0; |
4278 |
26 Jan 23 |
peter |
119 |
for (;;) { |
4278 |
26 Jan 23 |
peter |
// Note that due to the way bam indexing works, iter->off may contain |
4278 |
26 Jan 23 |
peter |
// file chunks that are not actually needed as they contain data |
4278 |
26 Jan 23 |
peter |
// beyond the end of the requested region. These are filtered out |
4278 |
26 Jan 23 |
peter |
// by comparing the tid and index into hts_reglist_t::intervals |
4278 |
26 Jan 23 |
peter |
// (packed for reasons of convenience into iter->off[iter->i].max) |
4278 |
26 Jan 23 |
peter |
// associated with the file region with iter->curr_tid and |
4278 |
26 Jan 23 |
peter |
// iter->curr_intv. |
4278 |
26 Jan 23 |
peter |
127 |
|
4278 |
26 Jan 23 |
peter |
128 |
if (next_range |
4278 |
26 Jan 23 |
peter |
129 |
|| iter->curr_off == 0 |
4278 |
26 Jan 23 |
peter |
130 |
|| iter->i >= iter->n_off |
4278 |
26 Jan 23 |
peter |
131 |
|| iter->curr_off >= iter->off[iter->i].v |
4278 |
26 Jan 23 |
peter |
132 |
|| (iter->off[iter->i].max >> 32 == static_cast<uint64_t>(iter->curr_tid) |
4278 |
26 Jan 23 |
peter |
133 |
&& (iter->off[iter->i].max & 0xffffffff) < static_cast<uint64_t>(iter->curr_intv))) { |
4278 |
26 Jan 23 |
peter |
134 |
|
4278 |
26 Jan 23 |
peter |
// Jump to the next chunk. It may be necessary to skip more |
4278 |
26 Jan 23 |
peter |
// than one as the iter->off list can include overlapping entries. |
4278 |
26 Jan 23 |
peter |
137 |
do { |
4278 |
26 Jan 23 |
peter |
138 |
iter->i++; |
4278 |
26 Jan 23 |
peter |
139 |
} while (iter->i < iter->n_off |
4278 |
26 Jan 23 |
peter |
140 |
&& (iter->curr_off >= iter->off[iter->i].v |
4278 |
26 Jan 23 |
peter |
141 |
|| (iter->off[iter->i].max >> 32 == static_cast<uint64_t>(iter->curr_tid) |
4278 |
26 Jan 23 |
peter |
142 |
&& (iter->off[iter->i].max & 0xffffffff) < static_cast<uint64_t>(iter->curr_intv)))); |
4278 |
26 Jan 23 |
peter |
143 |
|
4278 |
26 Jan 23 |
peter |
144 |
if (iter->i >= iter->n_off) { // no more chunks, except NOCOORs |
4278 |
26 Jan 23 |
peter |
145 |
if (iter->nocoor) { |
4278 |
26 Jan 23 |
peter |
146 |
next_range = 0; |
4278 |
26 Jan 23 |
peter |
147 |
if (iter->seek(fp, iter->nocoor_off, SEEK_SET) < 0) { |
4278 |
26 Jan 23 |
peter |
148 |
hts_log_error("Seek at offset %" PRIu64 " failed.", iter->nocoor_off); |
4278 |
26 Jan 23 |
peter |
149 |
return -1; |
4278 |
26 Jan 23 |
peter |
150 |
} |
4278 |
26 Jan 23 |
peter |
151 |
|
4278 |
26 Jan 23 |
peter |
// The first slice covering the unmapped reads might |
4278 |
26 Jan 23 |
peter |
// contain a few mapped reads, so scroll |
4278 |
26 Jan 23 |
peter |
// forward until finding the first unmapped read. |
4278 |
26 Jan 23 |
peter |
155 |
do { |
4278 |
26 Jan 23 |
peter |
156 |
ret = iter->readrec(fp, fd, r, &tid, &beg, &end); |
4278 |
26 Jan 23 |
peter |
157 |
} while (tid >= 0 && ret >=0); |
4278 |
26 Jan 23 |
peter |
158 |
|
4278 |
26 Jan 23 |
peter |
159 |
if (ret < 0) |
4278 |
26 Jan 23 |
peter |
160 |
iter->finished = 1; |
4278 |
26 Jan 23 |
peter |
161 |
else |
4278 |
26 Jan 23 |
peter |
162 |
iter->read_rest = 1; |
4278 |
26 Jan 23 |
peter |
163 |
|
4278 |
26 Jan 23 |
peter |
164 |
iter->curr_off = 0; // don't seek any more |
4278 |
26 Jan 23 |
peter |
165 |
iter->curr_tid = tid; |
4278 |
26 Jan 23 |
peter |
166 |
iter->curr_beg = beg; |
4278 |
26 Jan 23 |
peter |
167 |
iter->curr_end = end; |
4278 |
26 Jan 23 |
peter |
168 |
|
4278 |
26 Jan 23 |
peter |
169 |
return ret; |
4278 |
26 Jan 23 |
peter |
170 |
} else { |
4278 |
26 Jan 23 |
peter |
171 |
ret = -1; break; |
4278 |
26 Jan 23 |
peter |
172 |
} |
4278 |
26 Jan 23 |
peter |
173 |
} else if (iter->i < iter->n_off) { |
4278 |
26 Jan 23 |
peter |
// New chunk may overlap the last one, so ensure we |
4278 |
26 Jan 23 |
peter |
// only seek forwards. |
4278 |
26 Jan 23 |
peter |
176 |
if (iter->curr_off < iter->off[iter->i].u || next_range) { |
4278 |
26 Jan 23 |
peter |
177 |
iter->curr_off = iter->off[iter->i].u; |
4278 |
26 Jan 23 |
peter |
178 |
|
4278 |
26 Jan 23 |
peter |
179 |
if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { |
4278 |
26 Jan 23 |
peter |
180 |
return -1; |
4278 |
26 Jan 23 |
peter |
181 |
} |
4278 |
26 Jan 23 |
peter |
182 |
} |
4278 |
26 Jan 23 |
peter |
183 |
} |
4278 |
26 Jan 23 |
peter |
184 |
} |
4278 |
26 Jan 23 |
peter |
185 |
|
4278 |
26 Jan 23 |
peter |
186 |
ret = iter->readrec(fp, fd, r, &tid, &beg, &end); |
4278 |
26 Jan 23 |
peter |
187 |
if (ret < 0) { |
4278 |
26 Jan 23 |
peter |
188 |
break; |
4278 |
26 Jan 23 |
peter |
189 |
} |
4278 |
26 Jan 23 |
peter |
190 |
|
4278 |
26 Jan 23 |
peter |
191 |
iter->curr_off = iter->tell(fp); |
4278 |
26 Jan 23 |
peter |
192 |
|
4278 |
26 Jan 23 |
peter |
193 |
if (tid != iter->curr_tid) { |
4278 |
26 Jan 23 |
peter |
194 |
hts_reglist_t key; |
4278 |
26 Jan 23 |
peter |
195 |
key.tid = tid; |
4278 |
26 Jan 23 |
peter |
196 |
|
4278 |
26 Jan 23 |
peter |
197 |
found_reg = (hts_reglist_t *)bsearch(&key, iter->reg_list, |
4278 |
26 Jan 23 |
peter |
198 |
iter->n_reg, |
4278 |
26 Jan 23 |
peter |
199 |
sizeof(hts_reglist_t), |
4278 |
26 Jan 23 |
peter |
200 |
compare_regions); |
4278 |
26 Jan 23 |
peter |
201 |
if (!found_reg) |
4278 |
26 Jan 23 |
peter |
202 |
continue; |
4278 |
26 Jan 23 |
peter |
203 |
|
4278 |
26 Jan 23 |
peter |
204 |
iter->curr_reg = (found_reg - iter->reg_list); |
4278 |
26 Jan 23 |
peter |
205 |
iter->curr_tid = tid; |
4278 |
26 Jan 23 |
peter |
206 |
iter->curr_intv = 0; |
4278 |
26 Jan 23 |
peter |
207 |
} |
4278 |
26 Jan 23 |
peter |
208 |
|
4278 |
26 Jan 23 |
peter |
209 |
int cr = iter->curr_reg; |
4278 |
26 Jan 23 |
peter |
210 |
int ci = iter->curr_intv; |
4278 |
26 Jan 23 |
peter |
211 |
|
4278 |
26 Jan 23 |
peter |
212 |
for (size_t i = ci; i < iter->reg_list[cr].count; i++) { |
4278 |
26 Jan 23 |
peter |
213 |
if (end > iter->reg_list[cr].intervals[i].beg && |
4278 |
26 Jan 23 |
peter |
214 |
iter->reg_list[cr].intervals[i].end > beg) { |
4278 |
26 Jan 23 |
peter |
215 |
iter->curr_beg = beg; |
4278 |
26 Jan 23 |
peter |
216 |
iter->curr_end = end; |
4278 |
26 Jan 23 |
peter |
217 |
iter->curr_intv = i; |
4278 |
26 Jan 23 |
peter |
218 |
|
4278 |
26 Jan 23 |
peter |
219 |
return ret; |
4278 |
26 Jan 23 |
peter |
220 |
} |
4278 |
26 Jan 23 |
peter |
221 |
|
4278 |
26 Jan 23 |
peter |
// Check if the read starts beyond intervals[i].end |
4278 |
26 Jan 23 |
peter |
// If so, the interval is finished so move on to the next. |
4278 |
26 Jan 23 |
peter |
224 |
if (beg > iter->reg_list[cr].intervals[i].end) |
4278 |
26 Jan 23 |
peter |
225 |
iter->curr_intv = i + 1; |
4278 |
26 Jan 23 |
peter |
226 |
|
4278 |
26 Jan 23 |
peter |
// No need to keep searching if the read ends before intervals[i].beg |
4278 |
26 Jan 23 |
peter |
228 |
if (end < iter->reg_list[cr].intervals[i].beg) |
4278 |
26 Jan 23 |
peter |
229 |
break; |
4278 |
26 Jan 23 |
peter |
230 |
} |
4278 |
26 Jan 23 |
peter |
231 |
} |
4278 |
26 Jan 23 |
peter |
232 |
iter->finished = 1; |
4278 |
26 Jan 23 |
peter |
233 |
|
4278 |
26 Jan 23 |
peter |
234 |
return ret; |
4278 |
26 Jan 23 |
peter |
235 |
} |
4278 |
26 Jan 23 |
peter |
236 |
|
4278 |
26 Jan 23 |
peter |
237 |
|
4278 |
26 Jan 23 |
peter |
// Taken from sam.h in htslib v1.16 |
4278 |
26 Jan 23 |
peter |
// Author: Heng Li |
4278 |
26 Jan 23 |
peter |
240 |
int sam_itr_next(htsFile *htsfp, hts_itr_t *itr, bam1_t *r) |
4278 |
26 Jan 23 |
peter |
241 |
{ |
4278 |
26 Jan 23 |
peter |
242 |
if (itr->multi) |
4278 |
26 Jan 23 |
peter |
243 |
return detail::hts_itr_multi_next(htsfp, itr, r); |
4278 |
26 Jan 23 |
peter |
// single region iterator works in htslib 1.10 so we can use it |
4278 |
26 Jan 23 |
peter |
245 |
return hts_itr_next(htsfp->fp.bgzf, itr, r, htsfp); |
4278 |
26 Jan 23 |
peter |
246 |
} |
4278 |
26 Jan 23 |
peter |
247 |
|
4278 |
26 Jan 23 |
peter |
248 |
}}}} |