1 #ifndef _theplu_yat_normalizer_qquantile_normalizer_
2 #define _theplu_yat_normalizer_qquantile_normalizer_
26 #include "yat/regression/CSplineInterpolation.h"
27 #include "yat/utility/concept_check.h"
28 #include "yat/utility/DataIterator.h"
29 #include "yat/utility/DataWeight.h"
30 #include "yat/utility/Exception.h"
31 #include "yat/utility/iterator_traits.h"
32 #include "yat/utility/sort_index.h"
33 #include "yat/utility/Vector.h"
34 #include "yat/utility/WeightIterator.h"
35 #include "yat/utility/yat_assert.h"
37 #include <boost/concept_check.hpp>
52 namespace normalizer {
117 template<
typename ForwardIterator>
144 template<
typename RandomAccessIterator1,
typename RandomAccessIterator2>
145 void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last,
146 RandomAccessIterator2 result)
const;
165 template<
typename ForwardIterator>
166 Partitioner(ForwardIterator first, ForwardIterator last,
194 size_t size(
void)
const;
198 template<
typename ForwardIterator>
199 void build(ForwardIterator first, ForwardIterator last,
unsigned int N,
202 template<
typename ForwardIterator>
203 void build(ForwardIterator first, ForwardIterator last,
unsigned int N,
206 void init(
const std::vector<utility::DataWeight>&,
unsigned int N);
215 template<
typename RandomAccessIterator1,
typename RandomAccessIterator2>
216 void normalize(
const Partitioner& source,RandomAccessIterator1 first,
217 RandomAccessIterator1 last, RandomAccessIterator2 result,
221 template<
typename RandomAccessIterator1,
typename RandomAccessIterator2>
222 void normalize(
const Partitioner& source,RandomAccessIterator1 first,
223 RandomAccessIterator1 last, RandomAccessIterator2 result,
230 template<
typename ForwardIterator>
232 ForwardIterator last,
234 : target_(Partitioner(first, last, Q))
240 template<
typename RandomAccessIterator1,
typename RandomAccessIterator2>
242 RandomAccessIterator1 last,
243 RandomAccessIterator2 result)
const
245 Partitioner source(first, last, target_.size());
247 normalize(source, first, last, result, tag);
251 template<
typename RandomAccessIterator1,
typename RandomAccessIterator2>
253 qQuantileNormalizer::normalize(
const qQuantileNormalizer::Partitioner& source,
254 RandomAccessIterator1 first,
255 RandomAccessIterator1 last,
256 RandomAccessIterator2 result,
259 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
260 BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
265 size_t N = last-first;
266 YAT_ASSERT(N >= target_.size());
268 std::vector<size_t> sorted_index(last-first);
272 diff-=target_.averages();
279 size_t end =
static_cast<size_t>(std::ceil(N*idx(0) - 0.5));
284 for (
size_t i=start; i<end; ++i) {
285 size_t si = sorted_index[i];
286 result[si] = first[si] - diff(0);
289 using utility::yat_assert;
294 end =
static_cast<size_t>(std::ceil(N*idx(idx.
size()-1) - 0.5));
297 for (
size_t i=start; i<end; ++i) {
298 size_t si = sorted_index[i];
300 YAT_ASSERT((i+0.5)/N>idx(0));
301 result[si] = first[si] - cspline.evaluate((i+0.5)/N);
306 for (
size_t i=end ; i<N; ++i) {
307 size_t si = sorted_index[i];
308 result[si] = first[si] - diff(diff.size()-1);
313 template<
typename RandomAccessIterator1,
typename RandomAccessIterator2>
314 void qQuantileNormalizer::normalize(
const Partitioner& source,
315 RandomAccessIterator1 first,
316 RandomAccessIterator1 last,
317 RandomAccessIterator2 result,
318 utility::weighted_iterator_tag tag)
const
320 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
321 BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
322 BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator1>));
323 BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator2>));
325 detail::copy_weight_if_weighted(first, last, result);
327 double total_w = std::accumulate(utility::weight_iterator(first),
328 utility::weight_iterator(last), 0.0);
330 std::vector<size_t> sorted_index(last-first);
336 using namespace utility;
337 std::vector<double> vec;
338 vec.reserve(std::distance(first, last));
339 std::copy(utility::data_iterator(first), utility::data_iterator(last),
340 std::back_inserter(vec));
341 for (std::vector<double>::iterator i(vec.begin()); i!=vec.end(); ++i)
343 *i = std::numeric_limits<double>::infinity();
348 utility::Vector diff(source.averages());
349 diff-=target_.averages();
350 const utility::Vector& idx=target_.quantiles();
351 regression::CSplineInterpolation cspline(idx,diff);
354 utility::iterator_traits<RandomAccessIterator1> traits1;
355 utility::iterator_traits<RandomAccessIterator2> traits2;
356 for (
size_t i=0; i<sorted_index.size(); ++i) {
357 size_t si = sorted_index[i];
358 double w = (sum_w + 0.5*traits1.weight(first+si))/total_w;
359 double correction = 0;
363 correction = diff(0);
365 else if (w < idx(idx.size()-1) ) {
368 correction = cspline.evaluate(w);
373 correction = diff(diff.size()-1);
375 traits2.data(result+si) = traits1.data(first+si) - correction;
376 sum_w += traits1.weight(first+si);
381 template<
typename ForwardIterator>
382 qQuantileNormalizer::Partitioner::Partitioner(ForwardIterator first,
383 ForwardIterator last,
385 : average_(utility::Vector(N)), quantiles_(utility::Vector(N))
387 BOOST_CONCEPT_ASSERT((boost::ForwardIterator<ForwardIterator>));
388 BOOST_CONCEPT_ASSERT((utility::DataIterator<ForwardIterator>));
389 build(first, last, N,
390 typename utility::weighted_iterator_traits<ForwardIterator>::type());
394 template<
typename ForwardIterator>
395 void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
396 ForwardIterator last,
398 utility::unweighted_iterator_tag)
400 utility::Vector vec(std::distance(first, last));
401 std::copy(first, last, vec.begin());
402 std::sort(vec.begin(), vec.end());
407 template<
typename ForwardIterator>
408 void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
409 ForwardIterator last,
411 utility::weighted_iterator_tag)
413 using namespace utility;
414 std::vector<DataWeight> vec;
415 vec.reserve(std::distance(first, last));
416 std::back_insert_iterator<std::vector<DataWeight> > inserter(vec);
417 std::copy(first, last, inserter);
418 std::sort(vec.begin(), vec.end());