yat  0.12.3pre
qQuantileNormalizer.h
1 #ifndef _theplu_yat_normalizer_qquantile_normalizer_
2 #define _theplu_yat_normalizer_qquantile_normalizer_
3 
4 /*
5  Copyright (C) 2009 Jari Häkkinen, Peter Johansson
6  Copyright (C) 2010 Peter Johansson
7 
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9 
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14 
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22 */
23 
24 #include "utility.h"
25 
26 #include "yat/regression/CSplineInterpolation.h"
27 #include "yat/utility/concept_check.h"
28 #include "yat/utility/DataIterator.h"
29 #include "yat/utility/DataWeight.h"
30 #include "yat/utility/Exception.h"
31 #include "yat/utility/iterator_traits.h"
32 #include "yat/utility/sort_index.h"
33 #include "yat/utility/Vector.h"
34 #include "yat/utility/WeightIterator.h"
35 #include "yat/utility/yat_assert.h"
36 
37 #include <boost/concept_check.hpp>
38 
39 #include <algorithm>
40 #include <cmath>
41 #include <iterator>
42 #include <limits>
43 #include <numeric>
44 #include <stdexcept>
45 #include <vector>
46 
47 namespace theplu {
48 namespace yat {
49 namespace utility {
50  class VectorBase;
51 }
52 namespace normalizer {
53 
89  {
90  public:
117  template<typename ForwardIterator>
118  qQuantileNormalizer(ForwardIterator first, ForwardIterator last,
119  unsigned int Q);
120 
144  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
145  void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last,
146  RandomAccessIterator2 result) const;
147 
148  private:
149 
159  class Partitioner
160  {
161  public:
165  template<typename ForwardIterator>
166  Partitioner(ForwardIterator first, ForwardIterator last,
167  unsigned int N);
168 
174  const utility::Vector& averages(void) const;
175 
189  const utility::Vector& quantiles(void) const;
190 
194  size_t size(void) const;
195 
196  private:
197  // unweighted "constructor"
198  template<typename ForwardIterator>
199  void build(ForwardIterator first, ForwardIterator last, unsigned int N,
201  // weighted "constructor"
202  template<typename ForwardIterator>
203  void build(ForwardIterator first, ForwardIterator last, unsigned int N,
205  void init(const utility::VectorBase&, unsigned int N);
206  void init(const std::vector<utility::DataWeight>&, unsigned int N);
207 
208  utility::Vector average_;
209  utility::Vector quantiles_;
210  };
211 
212  Partitioner target_;
213 
214  // unweighted version
215  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
216  void normalize(const Partitioner& source,RandomAccessIterator1 first,
217  RandomAccessIterator1 last, RandomAccessIterator2 result,
219 
220  // weighted version
221  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
222  void normalize(const Partitioner& source,RandomAccessIterator1 first,
223  RandomAccessIterator1 last, RandomAccessIterator2 result,
225  };
226 
227 
228  // template implementations
229 
230  template<typename ForwardIterator>
232  ForwardIterator last,
233  unsigned int Q)
234  : target_(Partitioner(first, last, Q))
235  {
236  YAT_ASSERT(Q>2);
237  }
238 
239 
240  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
241  void qQuantileNormalizer::operator()(RandomAccessIterator1 first,
242  RandomAccessIterator1 last,
243  RandomAccessIterator2 result) const
244  {
245  Partitioner source(first, last, target_.size());
247  normalize(source, first, last, result, tag);
248  }
249 
250 
251  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
252  void
253  qQuantileNormalizer::normalize(const qQuantileNormalizer::Partitioner& source,
254  RandomAccessIterator1 first,
255  RandomAccessIterator1 last,
256  RandomAccessIterator2 result,
258  {
259  BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
260  BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
265  size_t N = last-first;
266  YAT_ASSERT(N >= target_.size());
267 
268  std::vector<size_t> sorted_index(last-first);
269  utility::sort_index(first, last, sorted_index);
270 
271  utility::Vector diff(source.averages());
272  diff-=target_.averages();
273  const utility::Vector& idx=target_.quantiles();
274  regression::CSplineInterpolation cspline(idx,diff);
275 
276  // linear extrapolation for first part, i.e., use first diff for
277  // all points in the first part.
278  size_t start=0;
279  size_t end = static_cast<size_t>(std::ceil(N*idx(0) - 0.5));
280  // take care of limiting case number of parts approximately equal
281  // to the number of elements in range.
282  if (end==0)
283  ++end;
284  for (size_t i=start; i<end; ++i) {
285  size_t si = sorted_index[i];
286  result[si] = first[si] - diff(0);
287  }
288 
289  using utility::yat_assert;
290 
291  // cspline interpolation for all data between the mid points of
292  // the first and last part
293  start=end;
294  end = static_cast<size_t>(std::ceil(N*idx(idx.size()-1) - 0.5));
295  if (end>=N)
296  end = N-1;
297  for ( size_t i=start; i<end; ++i) {
298  size_t si = sorted_index[i];
299 
300  YAT_ASSERT((i+0.5)/N>idx(0));
301  result[si] = first[si] - cspline.evaluate((i+0.5)/N);
302  }
303 
304  // linear extrapolation for last part, i.e., use last diff for
305  // all points in the last part.
306  for (size_t i=end ; i<N; ++i) {
307  size_t si = sorted_index[i];
308  result[si] = first[si] - diff(diff.size()-1);
309  }
310  }
311 
312 
313  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
314  void qQuantileNormalizer::normalize(const Partitioner& source,
315  RandomAccessIterator1 first,
316  RandomAccessIterator1 last,
317  RandomAccessIterator2 result,
318  utility::weighted_iterator_tag tag) const
319  {
320  BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
321  BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
322  BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator1>));
323  BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator2>));
324  // copy the weights
325  detail::copy_weight_if_weighted(first, last, result);
326 
327  double total_w = std::accumulate(utility::weight_iterator(first),
328  utility::weight_iterator(last), 0.0);
329 
330  std::vector<size_t> sorted_index(last-first);
331  // Code to avoid problems with NaN (ticket:535)
332  // utility::sort_index(utility::data_iterator(first),
333  // utility::data_iterator(last), sorted_index);
334  // ... above statement replaced below code block
335  {
336  using namespace utility;
337  std::vector<double> vec;
338  vec.reserve(std::distance(first, last));
339  std::copy(utility::data_iterator(first), utility::data_iterator(last),
340  std::back_inserter(vec));
341  for (std::vector<double>::iterator i(vec.begin()); i!=vec.end(); ++i)
342  if (std::isnan(*i))
343  *i = std::numeric_limits<double>::infinity();
344  utility::sort_index(vec.begin(), vec.end(), sorted_index);
345  }
346  // end Code to avoid problems with NaN (ticket:535)
347 
348  utility::Vector diff(source.averages());
349  diff-=target_.averages();
350  const utility::Vector& idx=target_.quantiles();
351  regression::CSplineInterpolation cspline(idx,diff);
352 
353  double sum_w=0;
354  utility::iterator_traits<RandomAccessIterator1> traits1;
355  utility::iterator_traits<RandomAccessIterator2> traits2;
356  for (size_t i=0; i<sorted_index.size(); ++i) {
357  size_t si = sorted_index[i];
358  double w = (sum_w + 0.5*traits1.weight(first+si))/total_w;
359  double correction = 0;
360  if (w <= idx(0)) {
361  // linear extrapolation for first part, i.e., use first diff for
362  // all points in the first part.
363  correction = diff(0);
364  }
365  else if (w < idx(idx.size()-1) ) {
366  // cspline interpolation for all data between the mid points of
367  // the first and last part
368  correction = cspline.evaluate(w);
369  }
370  else {
371  // linear extrapolation for last part, i.e., use last diff for
372  // all points in the last part.
373  correction = diff(diff.size()-1);
374  }
375  traits2.data(result+si) = traits1.data(first+si) - correction;
376  sum_w += traits1.weight(first+si);
377  }
378  }
379 
380 
381  template<typename ForwardIterator>
382  qQuantileNormalizer::Partitioner::Partitioner(ForwardIterator first,
383  ForwardIterator last,
384  unsigned int N)
385  : average_(utility::Vector(N)), quantiles_(utility::Vector(N))
386  {
387  BOOST_CONCEPT_ASSERT((boost::ForwardIterator<ForwardIterator>));
388  BOOST_CONCEPT_ASSERT((utility::DataIterator<ForwardIterator>));
389  build(first, last, N,
390  typename utility::weighted_iterator_traits<ForwardIterator>::type());
391  }
392 
393 
394  template<typename ForwardIterator>
395  void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
396  ForwardIterator last,
397  unsigned int N,
398  utility::unweighted_iterator_tag)
399  {
400  utility::Vector vec(std::distance(first, last));
401  std::copy(first, last, vec.begin());
402  std::sort(vec.begin(), vec.end());
403  init(vec, N);
404  }
405 
406 
407  template<typename ForwardIterator>
408  void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
409  ForwardIterator last,
410  unsigned int N,
411  utility::weighted_iterator_tag)
412  {
413  using namespace utility;
414  std::vector<DataWeight> vec;
415  vec.reserve(std::distance(first, last));
416  std::back_insert_iterator<std::vector<DataWeight> > inserter(vec);
417  std::copy(first, last, inserter);
418  std::sort(vec.begin(), vec.end());
419  init(vec, N);
420  }
421 
422 
423 }}} // end of namespace normalizer, yat and thep
424 
425 #endif
Perform Q-quantile normalization.
Definition: qQuantileNormalizer.h:88
Cubic spline with natural boundary conditions.
Definition: CSplineInterpolation.h:47
Concept check for Data Iterator.
Definition: concept_check.h:224
Definition: iterator_traits.h:46
void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last, RandomAccessIterator2 result) const
Perform the Q-quantile normalization.
Definition: qQuantileNormalizer.h:241
Definition: iterator_traits.h:54
This is the yat interface to GSL vector.
Definition: Vector.h:57
qQuantileNormalizer(ForwardIterator first, ForwardIterator last, unsigned int Q)
Constructor.
Definition: qQuantileNormalizer.h:231
This is the yat interface to GSL vector.
Definition: VectorBase.h:52
void check_iterator_is_unweighted(Iter iter)
check (at compile time) that iterator is unweighted.
Definition: iterator_traits.h:194
void sort_index(ForwardIterator first, ForwardIterator last, std::vector< size_t > &sort_index)
Definition: sort_index.h:94
detail::unweighted_type_and< w_type1, w_type2 >::type type
return unweighted if both are unweighted
Definition: iterator_traits.h:159

Generated on Mon Jun 1 2015 12:29:51 for yat by  doxygen 1.8.5