00001 #ifndef _theplu_yat_normalizer_qquantile_normalizer_
00002 #define _theplu_yat_normalizer_qquantile_normalizer_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "utility.h"
00025
00026 #include "yat/regression/CSplineInterpolation.h"
00027 #include "yat/utility/DataIterator.h"
00028 #include "yat/utility/DataWeight.h"
00029 #include "yat/utility/iterator_traits.h"
00030 #include "yat/utility/sort_index.h"
00031 #include "yat/utility/Vector.h"
00032 #include "yat/utility/WeightIterator.h"
00033 #include "yat/utility/yat_assert.h"
00034
00035 #include <boost/concept_check.hpp>
00036
00037 #include <algorithm>
00038 #include <cmath>
00039 #include <iterator>
00040 #include <limits>
00041 #include <numeric>
00042 #include <stdexcept>
00043 #include <vector>
00044
00045 namespace theplu {
00046 namespace yat {
00047 namespace utility {
00048 class VectorBase;
00049 }
00050 namespace normalizer {
00051
00086 class qQuantileNormalizer
00087 {
00088 public:
00115 template<typename ForwardIterator>
00116 qQuantileNormalizer(ForwardIterator first, ForwardIterator last,
00117 unsigned int Q);
00118
00142 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00143 void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last,
00144 RandomAccessIterator2 result) const;
00145
00146 private:
00147
00157 class Partitioner
00158 {
00159 public:
00163 template<typename ForwardIterator>
00164 Partitioner(ForwardIterator first, ForwardIterator last,
00165 unsigned int N);
00166
00172 const utility::Vector& averages(void) const;
00173
00187 const utility::Vector& quantiles(void) const;
00188
00192 size_t size(void) const;
00193
00194 private:
00195
00196 template<typename ForwardIterator>
00197 void build(ForwardIterator first, ForwardIterator last, unsigned int N,
00198 utility::unweighted_iterator_tag);
00199
00200 template<typename ForwardIterator>
00201 void build(ForwardIterator first, ForwardIterator last, unsigned int N,
00202 utility::weighted_iterator_tag);
00203 void init(const utility::VectorBase&, unsigned int N);
00204 void init(const std::vector<utility::DataWeight>&, unsigned int N);
00205
00206 utility::Vector average_;
00207 utility::Vector quantiles_;
00208 };
00209
00210 Partitioner target_;
00211
00212
00213 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00214 void normalize(const Partitioner& source,RandomAccessIterator1 first,
00215 RandomAccessIterator1 last, RandomAccessIterator2 result,
00216 utility::unweighted_iterator_tag tag) const;
00217
00218
00219 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00220 void normalize(const Partitioner& source,RandomAccessIterator1 first,
00221 RandomAccessIterator1 last, RandomAccessIterator2 result,
00222 utility::weighted_iterator_tag tag) const;
00223 };
00224
00225
00226
00227
00228 template<typename ForwardIterator>
00229 qQuantileNormalizer::qQuantileNormalizer(ForwardIterator first,
00230 ForwardIterator last,
00231 unsigned int Q)
00232 : target_(Partitioner(first, last, Q))
00233 {
00234 utility::yat_assert<std::runtime_error>(Q>2,
00235 "qQuantileNormalizer: Q too small");
00236 }
00237
00238
00239 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00240 void qQuantileNormalizer::operator()(RandomAccessIterator1 first,
00241 RandomAccessIterator1 last,
00242 RandomAccessIterator2 result) const
00243 {
00244 Partitioner source(first, last, target_.size());
00245 typename utility::weighted_if_any2<RandomAccessIterator1, RandomAccessIterator2>::type tag;
00246 normalize(source, first, last, result, tag);
00247 }
00248
00249
00250 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00251 void
00252 qQuantileNormalizer::normalize(const qQuantileNormalizer::Partitioner& source,
00253 RandomAccessIterator1 first,
00254 RandomAccessIterator1 last,
00255 RandomAccessIterator2 result,
00256 utility::unweighted_iterator_tag tag) const
00257 {
00258 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
00259 BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
00260 utility::check_iterator_is_unweighted(first);
00261 utility::check_iterator_is_unweighted(result);
00262 size_t N = last-first;
00263 utility::yat_assert<std::runtime_error>
00264 (N >= target_.size(), "qQuantileNormalizer: Input range too small");
00265
00266 std::vector<size_t> sorted_index(last-first);
00267 utility::sort_index(first, last, sorted_index);
00268
00269 utility::Vector diff(source.averages());
00270 diff-=target_.averages();
00271 const utility::Vector& idx=target_.quantiles();
00272 regression::CSplineInterpolation cspline(idx,diff);
00273
00274
00275
00276 size_t start=0;
00277 size_t end = static_cast<size_t>(std::ceil(N*idx(0) - 0.5));
00278
00279
00280 if (end==0)
00281 ++end;
00282 for (size_t i=start; i<end; ++i) {
00283 size_t si = sorted_index[i];
00284 result[si] = first[si] - diff(0);
00285 }
00286
00287 using utility::yat_assert;
00288
00289
00290
00291 start=end;
00292 end = static_cast<size_t>(std::ceil(N*idx(idx.size()-1) - 0.5));
00293 if (end>=N)
00294 end = N-1;
00295 for ( size_t i=start; i<end; ++i) {
00296 size_t si = sorted_index[i];
00297
00298 yat_assert<std::runtime_error>((i+0.5)/N>idx(0),
00299 "qQuantileNormalizer: invalid input to cspline");
00300 result[si] = first[si] - cspline.evaluate((i+0.5)/N);
00301 }
00302
00303
00304
00305 for (size_t i=end ; i<N; ++i) {
00306 size_t si = sorted_index[i];
00307 result[si] = first[si] - diff(diff.size()-1);
00308 }
00309 }
00310
00311
00312 template<typename RandomAccessIterator1, typename RandomAccessIterator2>
00313 void qQuantileNormalizer::normalize(const Partitioner& source,
00314 RandomAccessIterator1 first,
00315 RandomAccessIterator1 last,
00316 RandomAccessIterator2 result,
00317 utility::weighted_iterator_tag tag) const
00318 {
00319 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator1>));
00320 BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIterator2>));
00321
00322 detail::copy_weight_if_weighted(first, last, result);
00323
00324 double total_w = std::accumulate(utility::weight_iterator(first),
00325 utility::weight_iterator(last), 0.0);
00326
00327 std::vector<size_t> sorted_index(last-first);
00328
00329
00330
00331
00332 {
00333 using namespace utility;
00334 std::vector<double> vec;
00335 vec.reserve(std::distance(first, last));
00336 std::copy(utility::data_iterator(first), utility::data_iterator(last),
00337 std::back_inserter(vec));
00338 for (std::vector<double>::iterator i(vec.begin()); i!=vec.end(); ++i)
00339 if (std::isnan(*i))
00340 *i = std::numeric_limits<double>::infinity();
00341 utility::sort_index(vec.begin(), vec.end(), sorted_index);
00342 }
00343
00344
00345 utility::Vector diff(source.averages());
00346 diff-=target_.averages();
00347 const utility::Vector& idx=target_.quantiles();
00348 regression::CSplineInterpolation cspline(idx,diff);
00349
00350 double sum_w=0;
00351 utility::iterator_traits<RandomAccessIterator1> traits1;
00352 utility::iterator_traits<RandomAccessIterator2> traits2;
00353 for (size_t i=0; i<sorted_index.size(); ++i) {
00354 size_t si = sorted_index[i];
00355 double w = (sum_w + 0.5*traits1.weight(first+si))/total_w;
00356 double correction = 0;
00357 if (w <= idx(0)) {
00358
00359
00360 correction = diff(0);
00361 }
00362 else if (w < idx(idx.size()-1) ) {
00363
00364
00365 correction = cspline.evaluate(w);
00366 }
00367 else {
00368
00369
00370 correction = diff(diff.size()-1);
00371 }
00372 traits2.data(result+si) = traits1.data(first+si) - correction;
00373 sum_w += traits1.weight(first+si);
00374 }
00375 }
00376
00377
00378 template<typename ForwardIterator>
00379 qQuantileNormalizer::Partitioner::Partitioner(ForwardIterator first,
00380 ForwardIterator last,
00381 unsigned int N)
00382 : average_(utility::Vector(N)), quantiles_(utility::Vector(N))
00383 {
00384 BOOST_CONCEPT_ASSERT((boost::ForwardIterator<ForwardIterator>));
00385 build(first, last, N,
00386 typename utility::weighted_iterator_traits<ForwardIterator>::type());
00387 }
00388
00389
00390 template<typename ForwardIterator>
00391 void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
00392 ForwardIterator last,
00393 unsigned int N,
00394 utility::unweighted_iterator_tag)
00395 {
00396 utility::Vector vec(std::distance(first, last));
00397 std::copy(first, last, vec.begin());
00398 std::sort(vec.begin(), vec.end());
00399 init(vec, N);
00400 }
00401
00402
00403 template<typename ForwardIterator>
00404 void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
00405 ForwardIterator last,
00406 unsigned int N,
00407 utility::weighted_iterator_tag)
00408 {
00409 using namespace utility;
00410 std::vector<DataWeight> vec;
00411 vec.reserve(std::distance(first, last));
00412 std::back_insert_iterator<std::vector<DataWeight> > inserter(vec);
00413 std::copy(first, last, inserter);
00414 std::sort(vec.begin(), vec.end());
00415 init(vec, N);
00416 }
00417
00418
00419 }}}
00420
00421 #endif