yat  0.21pre
qQuantileNormalizer.h
1 #ifndef _theplu_yat_normalizer_qquantile_normalizer_
2 #define _theplu_yat_normalizer_qquantile_normalizer_
3 
4 /*
5  Copyright (C) 2009 Jari Häkkinen, Peter Johansson
6  Copyright (C) 2010, 2016, 2017 Peter Johansson
7 
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9 
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14 
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22 */
23 
24 #include "utility.h"
25 
26 #include "yat/regression/CSplineInterpolation.h"
27 #include "yat/utility/concept_check.h"
28 #include "yat/utility/DataIterator.h"
29 #include "yat/utility/DataWeight.h"
30 #include "yat/utility/iterator_traits.h"
31 #include "yat/utility/sort_index.h"
32 #include "yat/utility/Vector.h"
33 #include "yat/utility/WeightIterator.h"
34 #include "yat/utility/yat_assert.h"
35 
36 #include <boost/concept_check.hpp>
37 
38 #include <algorithm>
39 #include <cmath>
40 #include <iterator>
41 #include <limits>
42 #include <numeric>
43 #include <stdexcept>
44 #include <vector>
45 
46 namespace theplu {
47 namespace yat {
48 namespace utility {
49  class VectorBase;
50 }
51 namespace normalizer {
52 
88  {
89  public:
116  template<typename ForwardIterator>
117  qQuantileNormalizer(ForwardIterator first, ForwardIterator last,
118  unsigned int Q);
119 
142  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
143  void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last,
144  RandomAccessIterator2 result) const;
145 
146  private:
147 
157  class Partitioner
158  {
159  public:
163  template<typename ForwardIterator>
164  Partitioner(ForwardIterator first, ForwardIterator last,
165  unsigned int N);
166 
172  const utility::Vector& averages(void) const;
173 
187  const utility::Vector& quantiles(void) const;
188 
192  size_t size(void) const;
193 
194  private:
195  // unweighted "constructor"
196  template<typename ForwardIterator>
197  void build(ForwardIterator first, ForwardIterator last, unsigned int N,
199  // weighted "constructor"
200  template<typename ForwardIterator>
201  void build(ForwardIterator first, ForwardIterator last, unsigned int N,
203  void init(const utility::VectorBase&, unsigned int N);
204  void init(const std::vector<utility::DataWeight>&, unsigned int N);
205 
206  utility::Vector average_;
207  utility::Vector quantiles_;
208  };
209 
210  Partitioner target_;
211 
212  // unweighted version
213  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
214  void normalize(const Partitioner& source,RandomAccessIterator1 first,
215  RandomAccessIterator1 last, RandomAccessIterator2 result,
217 
218  // weighted version
219  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
220  void normalize(const Partitioner& source,RandomAccessIterator1 first,
221  RandomAccessIterator1 last, RandomAccessIterator2 result,
223  };
224 
225 
226  // template implementations
227 
228  template<typename ForwardIterator>
230  ForwardIterator last,
231  unsigned int Q)
232  : target_(Partitioner(first, last, Q))
233  {
234  YAT_ASSERT(Q>2);
235  }
236 
237 
238  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
239  void qQuantileNormalizer::operator()(RandomAccessIterator1 first,
240  RandomAccessIterator1 last,
241  RandomAccessIterator2 result) const
242  {
243  BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator1>));
244  using boost_concepts::RandomAccessTraversal;
245  BOOST_CONCEPT_ASSERT((RandomAccessTraversal<RandomAccessIterator1>));
246  BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator2>));
247  BOOST_CONCEPT_ASSERT((RandomAccessTraversal<RandomAccessIterator2>));
248  using boost_concepts::WritableIterator;
249  BOOST_CONCEPT_ASSERT((WritableIterator<RandomAccessIterator2>));
250 
251  Partitioner source(first, last, target_.size());
253  normalize(source, first, last, result, tag);
254  }
255 
256 
257  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
258  void
259  qQuantileNormalizer::normalize(const qQuantileNormalizer::Partitioner& source,
260  RandomAccessIterator1 first,
261  RandomAccessIterator1 last,
262  RandomAccessIterator2 result,
264  {
266  // copy the weights if needed
267  detail::copy_weight_if_weighted(first, last, result);
268 
269  size_t N = last-first;
270  YAT_ASSERT(N >= target_.size());
271 
272  std::vector<size_t> sorted_index(last-first);
273  utility::sort_index(first, last, sorted_index);
274 
275  utility::Vector diff(source.averages());
276  diff-=target_.averages();
277  const utility::Vector& idx=target_.quantiles();
278  regression::CSplineInterpolation cspline(idx,diff);
279 
280  // linear extrapolation for first part, i.e., use first diff for
281  // all points in the first part.
282  size_t start=0;
283  size_t end = static_cast<size_t>(std::ceil(N*idx(0) - 0.5));
284  // take care of limiting case number of parts approximately equal
285  // to the number of elements in range.
288  if (end==0)
289  ++end;
290  for (size_t i=start; i<end; ++i) {
291  size_t si = sorted_index[i];
292  traits2.data(result+si) = traits1.data(first+si) - diff(0);
293  }
294 
295  using utility::yat_assert;
296 
297  // cspline interpolation for all data between the mid points of
298  // the first and last part
299  start=end;
300  end = static_cast<size_t>(std::ceil(N*idx(idx.size()-1) - 0.5));
301  if (end>=N)
302  end = N-1;
303  for ( size_t i=start; i<end; ++i) {
304  size_t si = sorted_index[i];
305 
306  YAT_ASSERT((i+0.5)/N>idx(0));
307  traits2.data(result+si) =
308  traits1.data(first+si) - cspline.evaluate((i+0.5)/N);
309  }
310 
311  // linear extrapolation for last part, i.e., use last diff for
312  // all points in the last part.
313  for (size_t i=end ; i<N; ++i) {
314  size_t si = sorted_index[i];
315  traits2.data(result+si) = traits1.data(first+si) - diff(diff.size()-1);
316  }
317  }
318 
319 
320  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
321  void qQuantileNormalizer::normalize(const Partitioner& source,
322  RandomAccessIterator1 first,
323  RandomAccessIterator1 last,
324  RandomAccessIterator2 result,
325  utility::weighted_iterator_tag tag) const
326  {
327  // copy the weights
328  detail::copy_weight_if_weighted(first, last, result);
329 
330  double total_w = std::accumulate(utility::weight_iterator(first),
331  utility::weight_iterator(last), 0.0);
332 
333  std::vector<size_t> sorted_index(last-first);
334  utility::sort_index(first, last, sorted_index);
335 
336  utility::Vector diff(source.averages());
337  diff-=target_.averages();
338  const utility::Vector& idx=target_.quantiles();
339  regression::CSplineInterpolation cspline(idx,diff);
340 
341  double sum_w=0;
342  utility::iterator_traits<RandomAccessIterator1> traits1;
343  utility::iterator_traits<RandomAccessIterator2> traits2;
344  for (size_t i=0; i<sorted_index.size(); ++i) {
345  size_t si = sorted_index[i];
346  double w = (sum_w + 0.5*traits1.weight(first+si))/total_w;
347  double correction = 0;
348  if (w <= idx(0)) {
349  // linear extrapolation for first part, i.e., use first diff for
350  // all points in the first part.
351  correction = diff(0);
352  }
353  else if (w < idx(idx.size()-1) ) {
354  // cspline interpolation for all data between the mid points of
355  // the first and last part
356  correction = cspline.evaluate(w);
357  }
358  else {
359  // linear extrapolation for last part, i.e., use last diff for
360  // all points in the last part.
361  correction = diff(diff.size()-1);
362  }
363  traits2.data(result+si) = traits1.data(first+si) - correction;
364  sum_w += traits1.weight(first+si);
365  }
366  }
367 
368 
369  template<typename ForwardIterator>
370  qQuantileNormalizer::Partitioner::Partitioner(ForwardIterator first,
371  ForwardIterator last,
372  unsigned int N)
373  : average_(utility::Vector(N)), quantiles_(utility::Vector(N))
374  {
375  BOOST_CONCEPT_ASSERT((boost_concepts::ForwardTraversal<ForwardIterator>));
376  BOOST_CONCEPT_ASSERT((utility::DataIterator<ForwardIterator>));
377  build(first, last, N,
379  }
380 
381 
382  template<typename ForwardIterator>
383  void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
384  ForwardIterator last,
385  unsigned int N,
386  utility::unweighted_iterator_tag)
387  {
388  utility::Vector vec(std::distance(first, last));
389  std::copy(first, last, vec.begin());
390  std::sort(vec.begin(), vec.end());
391  init(vec, N);
392  }
393 
394 
395  template<typename ForwardIterator>
396  void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
397  ForwardIterator last,
398  unsigned int N,
399  utility::weighted_iterator_tag)
400  {
401  using namespace utility;
402  std::vector<DataWeight> vec(first, last);
403  std::sort(vec.begin(), vec.end());
404  init(vec, N);
405  }
406 
407 
408 }}} // end of namespace normalizer, yat and thep
409 
410 #endif
detail::weighted_iterator_traits_detail< value >::type type
Definition: iterator_traits.h:114
Perform Q-quantile normalization.
Definition: qQuantileNormalizer.h:87
Cubic spline with natural boundary conditions.
Definition: CSplineInterpolation.h:47
data_reference data(Iter iter) const
Definition: iterator_traits.h:440
Definition: iterator_traits.h:412
The Department of Theoretical Physics namespace as we define it.
Definition: iterator_traits.h:47
Definition: iterator_traits.h:55
DataIterator.
Definition: DataIterator.h:63
void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last, RandomAccessIterator2 result) const
Perform the Q-quantile normalization.
Definition: qQuantileNormalizer.h:239
This is the yat interface to GSL vector.
Definition: Vector.h:59
qQuantileNormalizer(ForwardIterator first, ForwardIterator last, unsigned int Q)
Constructor.
Definition: qQuantileNormalizer.h:229
This is the yat interface to GSL vector.
Definition: VectorBase.h:55
void check_iterator_is_unweighted(Iter iter)
check (at compile time) that iterator is unweighted.
Definition: iterator_traits.h:200
void sort_index(InputIterator first, InputIterator last, std::vector< size_t > &sort_index)
Definition: sort_index.h:146

Generated on Wed Jan 25 2023 03:34:29 for yat by  doxygen 1.8.14