00001 #ifndef _theplu_yat_normalizer_qquantile_normalizer_
00002 #define _theplu_yat_normalizer_qquantile_normalizer_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "utility.h"
00025
00026 #include "yat/regression/CSplineInterpolation.h"
00027 #include "yat/utility/concept_check.h"
00028 #include "yat/utility/DataIterator.h"
00029 #include "yat/utility/DataWeight.h"
00030 #include "yat/utility/Exception.h"
00031 #include "yat/utility/iterator_traits.h"
00032 #include "yat/utility/sort_index.h"
00033 #include "yat/utility/Vector.h"
00034 #include "yat/utility/WeightIterator.h"
00035 #include "yat/utility/yat_assert.h"
00036
00037 #include <boost/concept_check.hpp>
00038
00039 #include <algorithm>
00040 #include <cmath>
00041 #include <iterator>
00042 #include <limits>
00043 #include <numeric>
00044 #include <stdexcept>
00045 #include <vector>
00046
00047 namespace theplu {
00048 namespace yat {
00049 namespace utility {
00050 class VectorBase;
00051 }
00052 namespace normalizer {
00053
00088 class qQuantileNormalizer
00089 {
00090 public:
00117 template<typename ForwardIterator>
00118 qQuantileNormalizer(ForwardIterator first, ForwardIterator last,
00119 unsigned int Q);
00120
00144 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00145 void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last,
00146 RandomAccessIterator2 result) const;
00147
00148 private:
00149
00159 class Partitioner
00160 {
00161 public:
00165 template<typename ForwardIterator>
00166 Partitioner(ForwardIterator first, ForwardIterator last,
00167 unsigned int N);
00168
00174 const utility::Vector& averages(void) const;
00175
00189 const utility::Vector& quantiles(void) const;
00190
00194 size_t size(void) const;
00195
00196 private:
00197
00198 template<typename ForwardIterator>
00199 void build(ForwardIterator first, ForwardIterator last, unsigned int N,
00200 utility::unweighted_iterator_tag);
00201
00202 template<typename ForwardIterator>
00203 void build(ForwardIterator first, ForwardIterator last, unsigned int N,
00204 utility::weighted_iterator_tag);
00205 void init(const utility::VectorBase&, unsigned int N);
00206 void init(const std::vector<utility::DataWeight>&, unsigned int N);
00207
00208 utility::Vector average_;
00209 utility::Vector quantiles_;
00210 };
00211
00212 Partitioner target_;
00213
00214
00215 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00216 void normalize(const Partitioner& source,RandomAccessIterator1 first,
00217 RandomAccessIterator1 last, RandomAccessIterator2 result,
00218 utility::unweighted_iterator_tag tag) const;
00219
00220
00221 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00222 void normalize(const Partitioner& source,RandomAccessIterator1 first,
00223 RandomAccessIterator1 last, RandomAccessIterator2 result,
00224 utility::weighted_iterator_tag tag) const;
00225 };
00226
00227
00228
00229
00230 template<typename ForwardIterator>
00231 qQuantileNormalizer::qQuantileNormalizer(ForwardIterator first,
00232 ForwardIterator last,
00233 unsigned int Q)
00234 : target_(Partitioner(first, last, Q))
00235 {
00236 YAT_ASSERT(Q>2);
00237 }
00238
00239
00240 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00241 void qQuantileNormalizer::operator()(RandomAccessIterator1 first,
00242 RandomAccessIterator1 last,
00243 RandomAccessIterator2 result) const
00244 {
00245 Partitioner source(first, last, target_.size());
00246 typename utility::weighted_if_any2<RandomAccessIterator1, RandomAccessIterator2>::type tag;
00247 normalize(source, first, last, result, tag);
00248 }
00249
00250
00251 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00252 void
00253 qQuantileNormalizer::normalize(const qQuantileNormalizer::Partitioner& source,
00254 RandomAccessIterator1 first,
00255 RandomAccessIterator1 last,
00256 RandomAccessIterator2 result,
00257 utility::unweighted_iterator_tag tag) const
00258 {
00259 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
00260 BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
00261 BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator1>));
00262 BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator2>));
00263 utility::check_iterator_is_unweighted(first);
00264 utility::check_iterator_is_unweighted(result);
00265 size_t N = last-first;
00266 YAT_ASSERT(N >= target_.size());
00267
00268 std::vector<size_t> sorted_index(last-first);
00269 utility::sort_index(first, last, sorted_index);
00270
00271 utility::Vector diff(source.averages());
00272 diff-=target_.averages();
00273 const utility::Vector& idx=target_.quantiles();
00274 regression::CSplineInterpolation cspline(idx,diff);
00275
00276
00277
00278 size_t start=0;
00279 size_t end = static_cast<size_t>(std::ceil(N*idx(0) - 0.5));
00280
00281
00282 if (end==0)
00283 ++end;
00284 for (size_t i=start; i<end; ++i) {
00285 size_t si = sorted_index[i];
00286 result[si] = first[si] - diff(0);
00287 }
00288
00289 using utility::yat_assert;
00290
00291
00292
00293 start=end;
00294 end = static_cast<size_t>(std::ceil(N*idx(idx.size()-1) - 0.5));
00295 if (end>=N)
00296 end = N-1;
00297 for ( size_t i=start; i<end; ++i) {
00298 size_t si = sorted_index[i];
00299
00300 YAT_ASSERT((i+0.5)/N>idx(0));
00301 result[si] = first[si] - cspline.evaluate((i+0.5)/N);
00302 }
00303
00304
00305
00306 for (size_t i=end ; i<N; ++i) {
00307 size_t si = sorted_index[i];
00308 result[si] = first[si] - diff(diff.size()-1);
00309 }
00310 }
00311
00312
00313 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00314 void qQuantileNormalizer::normalize(const Partitioner& source,
00315 RandomAccessIterator1 first,
00316 RandomAccessIterator1 last,
00317 RandomAccessIterator2 result,
00318 utility::weighted_iterator_tag tag) const
00319 {
00320 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
00321 BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
00322 BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator1>));
00323 BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator2>));
00324
00325 detail::copy_weight_if_weighted(first, last, result);
00326
00327 double total_w = std::accumulate(utility::weight_iterator(first),
00328 utility::weight_iterator(last), 0.0);
00329
00330 std::vector<size_t> sorted_index(last-first);
00331
00332
00333
00334
00335 {
00336 using namespace utility;
00337 std::vector<double> vec;
00338 vec.reserve(std::distance(first, last));
00339 std::copy(utility::data_iterator(first), utility::data_iterator(last),
00340 std::back_inserter(vec));
00341 for (std::vector<double>::iterator i(vec.begin()); i!=vec.end(); ++i)
00342 if (std::isnan(*i))
00343 *i = std::numeric_limits<double>::infinity();
00344 utility::sort_index(vec.begin(), vec.end(), sorted_index);
00345 }
00346
00347
00348 utility::Vector diff(source.averages());
00349 diff-=target_.averages();
00350 const utility::Vector& idx=target_.quantiles();
00351 regression::CSplineInterpolation cspline(idx,diff);
00352
00353 double sum_w=0;
00354 utility::iterator_traits<RandomAccessIterator1> traits1;
00355 utility::iterator_traits<RandomAccessIterator2> traits2;
00356 for (size_t i=0; i<sorted_index.size(); ++i) {
00357 size_t si = sorted_index[i];
00358 double w = (sum_w + 0.5*traits1.weight(first+si))/total_w;
00359 double correction = 0;
00360 if (w <= idx(0)) {
00361
00362
00363 correction = diff(0);
00364 }
00365 else if (w < idx(idx.size()-1) ) {
00366
00367
00368 correction = cspline.evaluate(w);
00369 }
00370 else {
00371
00372
00373 correction = diff(diff.size()-1);
00374 }
00375 traits2.data(result+si) = traits1.data(first+si) - correction;
00376 sum_w += traits1.weight(first+si);
00377 }
00378 }
00379
00380
00381 template<typename ForwardIterator>
00382 qQuantileNormalizer::Partitioner::Partitioner(ForwardIterator first,
00383 ForwardIterator last,
00384 unsigned int N)
00385 : average_(utility::Vector(N)), quantiles_(utility::Vector(N))
00386 {
00387 BOOST_CONCEPT_ASSERT((boost::ForwardIterator<ForwardIterator>));
00388 BOOST_CONCEPT_ASSERT((utility::DataIterator<ForwardIterator>));
00389 build(first, last, N,
00390 typename utility::weighted_iterator_traits<ForwardIterator>::type());
00391 }
00392
00393
00394 template<typename ForwardIterator>
00395 void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
00396 ForwardIterator last,
00397 unsigned int N,
00398 utility::unweighted_iterator_tag)
00399 {
00400 utility::Vector vec(std::distance(first, last));
00401 std::copy(first, last, vec.begin());
00402 std::sort(vec.begin(), vec.end());
00403 init(vec, N);
00404 }
00405
00406
00407 template<typename ForwardIterator>
00408 void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
00409 ForwardIterator last,
00410 unsigned int N,
00411 utility::weighted_iterator_tag)
00412 {
00413 using namespace utility;
00414 std::vector<DataWeight> vec;
00415 vec.reserve(std::distance(first, last));
00416 std::back_insert_iterator<std::vector<DataWeight> > inserter(vec);
00417 std::copy(first, last, inserter);
00418 std::sort(vec.begin(), vec.end());
00419 init(vec, N);
00420 }
00421
00422
00423 }}}
00424
00425 #endif