yat  0.8.3pre
Spearman.h
00001 #ifndef _theplu_yat_normalizer_spearman_
00002 #define _theplu_yat_normalizer_spearman_
00003 
00004 // $Id: Spearman.h 2161 2010-01-19 23:59:48Z peter $
00005 
00006 /*
00007   Copyright (C) 2008 Jari Häkkinen, Peter Johansson
00008   Copyright (C) 2009, 2010 Peter Johansson
00009 
00010   This file is part of the yat library, http://dev.thep.lu.se/yat
00011 
00012   The yat library is free software; you can redistribute it and/or
00013   modify it under the terms of the GNU General Public License as
00014   published by the Free Software Foundation; either version 3 of the
00015   License, or (at your option) any later version.
00016 
00017   The yat library is distributed in the hope that it will be useful,
00018   but WITHOUT ANY WARRANTY; without even the implied warranty of
00019   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00020   General Public License for more details.
00021 
00022   You should have received a copy of the GNU General Public License
00023   along with yat. If not, see <http://www.gnu.org/licenses/>.
00024 */
00025 
00026 #include "utility.h"
00027 
00028 #include "yat/utility/DataIterator.h"
00029 #include "yat/utility/iterator_traits.h"
00030 #include "yat/utility/sort_index.h"
00031 #include "yat/utility/WeightIterator.h"
00032 
00033 #include <boost/concept_check.hpp>
00034 
00035 #include <algorithm>
00036 #include <functional>
00037 #include <vector>
00038 
00039 namespace theplu {
00040 namespace yat {
00041 namespace normalizer {
00042 
00052   class Spearman
00053   {
00054   public:
00058     Spearman(void){}
00059 
00070     template<typename RandomAccessIter1, typename RandomAccessIter2>
00071     void operator()(RandomAccessIter1 first, RandomAccessIter1 last,
00072                     RandomAccessIter2 result) const
00073     {
00074       BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIter1>));
00075       BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIter2>));
00076       using utility::weighted_if_any2;
00077       typename weighted_if_any2<RandomAccessIter1,RandomAccessIter2>::type tag;
00078       normalize(first, last, result, tag);
00079     }
00080 
00081 
00082   private:
00083     // unweighted version
00084     template<typename RandomAccessIter1, typename RandomAccessIter2>
00085     void normalize(RandomAccessIter1 first, RandomAccessIter1 last,
00086                    RandomAccessIter2 result, 
00087                    utility::unweighted_iterator_tag) const
00088     {
00089       std::vector<size_t> perm;
00090       utility::sort_index(first, last, perm);
00091       double n = perm.size();
00092       size_t i=0; 
00093       while ( i<perm.size() ) {
00094         size_t min_i = i;
00095         while (i<perm.size() && first[perm[i]]<=first[perm[min_i]])
00096           ++i;
00097         double res = static_cast<double>(i + min_i)/(2*n);
00098         for ( ; min_i < i; ++min_i)
00099           result[perm[min_i]] = res;
00100       }
00101     }
00102 
00103 
00104     // weighted version
00105     template<typename RandomAccessIter1, typename RandomAccessIter2>
00106     void normalize(RandomAccessIter1 first, RandomAccessIter1 last,
00107                    RandomAccessIter2 result, 
00108                    utility::weighted_iterator_tag) const
00109     {
00110       detail::copy_weight_if_weighted(first, last, result);
00111       std::vector<double> input_vec;
00112       input_vec.reserve(last-first);
00113       // set values with w=0 to 0 to avoid problems with NaNs
00114       utility::iterator_traits<RandomAccessIter1> trait;
00115       for (RandomAccessIter1 i=first; i!=last; ++i) {
00116         if (trait.weight(i)==0)
00117           input_vec.push_back(0.0);
00118         else
00119           input_vec.push_back(trait.data(i));
00120       }
00121 
00122       std::vector<size_t> perm(input_vec.size());
00123       utility::sort_index(input_vec.begin(), input_vec.end(), perm);
00124       utility::iterator_traits<RandomAccessIter2> rtrait;
00125 
00126       double sum_w=0;
00127       size_t i=0; 
00128       while ( i<perm.size() ) {
00129         double w=0;
00130         size_t min_i = i;
00131         while (i<perm.size() && (trait.weight(first+perm[i])==0 ||
00132                                  trait.data(first+perm[i]) <= 
00133                                  trait.data(first+perm[min_i])) ) {
00134           w += trait.weight(first+perm[i]);
00135           ++i;
00136         }
00137         double res=sum_w + 0.5*w;
00138         for ( size_t j=min_i; j<i; ++j)
00139           rtrait.data(result+perm[j]) = res;
00140         sum_w += w;
00141       }       
00142 
00143       size_t n = std::distance(first, last);
00144       std::transform(utility::data_iterator(result),
00145                      utility::data_iterator(result+n),
00146                      utility::data_iterator(result),
00147                      std::bind2nd(std::divides<double>(), sum_w));
00148     }
00149 
00150   };
00151 
00152 }}} // end of namespace normalizer, yat and thep
00153 #endif

Generated on Thu Dec 20 2012 03:12:57 for yat by  doxygen 1.8.0-20120409