yat
0.8.3pre
|
00001 #ifndef _theplu_yat_normalizer_spearman_ 00002 #define _theplu_yat_normalizer_spearman_ 00003 00004 // $Id: Spearman.h 2161 2010-01-19 23:59:48Z peter $ 00005 00006 /* 00007 Copyright (C) 2008 Jari Häkkinen, Peter Johansson 00008 Copyright (C) 2009, 2010 Peter Johansson 00009 00010 This file is part of the yat library, http://dev.thep.lu.se/yat 00011 00012 The yat library is free software; you can redistribute it and/or 00013 modify it under the terms of the GNU General Public License as 00014 published by the Free Software Foundation; either version 3 of the 00015 License, or (at your option) any later version. 00016 00017 The yat library is distributed in the hope that it will be useful, 00018 but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00020 General Public License for more details. 00021 00022 You should have received a copy of the GNU General Public License 00023 along with yat. If not, see <http://www.gnu.org/licenses/>. 00024 */ 00025 00026 #include "utility.h" 00027 00028 #include "yat/utility/DataIterator.h" 00029 #include "yat/utility/iterator_traits.h" 00030 #include "yat/utility/sort_index.h" 00031 #include "yat/utility/WeightIterator.h" 00032 00033 #include <boost/concept_check.hpp> 00034 00035 #include <algorithm> 00036 #include <functional> 00037 #include <vector> 00038 00039 namespace theplu { 00040 namespace yat { 00041 namespace normalizer { 00042 00052 class Spearman 00053 { 00054 public: 00058 Spearman(void){} 00059 00070 template<typename RandomAccessIter1, typename RandomAccessIter2> 00071 void operator()(RandomAccessIter1 first, RandomAccessIter1 last, 00072 RandomAccessIter2 result) const 00073 { 00074 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIter1>)); 00075 BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIter2>)); 00076 using utility::weighted_if_any2; 00077 typename weighted_if_any2<RandomAccessIter1,RandomAccessIter2>::type tag; 00078 normalize(first, last, result, tag); 00079 } 00080 00081 00082 private: 00083 // unweighted version 00084 template<typename RandomAccessIter1, typename RandomAccessIter2> 00085 void normalize(RandomAccessIter1 first, RandomAccessIter1 last, 00086 RandomAccessIter2 result, 00087 utility::unweighted_iterator_tag) const 00088 { 00089 std::vector<size_t> perm; 00090 utility::sort_index(first, last, perm); 00091 double n = perm.size(); 00092 size_t i=0; 00093 while ( i<perm.size() ) { 00094 size_t min_i = i; 00095 while (i<perm.size() && first[perm[i]]<=first[perm[min_i]]) 00096 ++i; 00097 double res = static_cast<double>(i + min_i)/(2*n); 00098 for ( ; min_i < i; ++min_i) 00099 result[perm[min_i]] = res; 00100 } 00101 } 00102 00103 00104 // weighted version 00105 template<typename RandomAccessIter1, typename RandomAccessIter2> 00106 void normalize(RandomAccessIter1 first, RandomAccessIter1 last, 00107 RandomAccessIter2 result, 00108 utility::weighted_iterator_tag) const 00109 { 00110 detail::copy_weight_if_weighted(first, last, result); 00111 std::vector<double> input_vec; 00112 input_vec.reserve(last-first); 00113 // set values with w=0 to 0 to avoid problems with NaNs 00114 utility::iterator_traits<RandomAccessIter1> trait; 00115 for (RandomAccessIter1 i=first; i!=last; ++i) { 00116 if (trait.weight(i)==0) 00117 input_vec.push_back(0.0); 00118 else 00119 input_vec.push_back(trait.data(i)); 00120 } 00121 00122 std::vector<size_t> perm(input_vec.size()); 00123 utility::sort_index(input_vec.begin(), input_vec.end(), perm); 00124 utility::iterator_traits<RandomAccessIter2> rtrait; 00125 00126 double sum_w=0; 00127 size_t i=0; 00128 while ( i<perm.size() ) { 00129 double w=0; 00130 size_t min_i = i; 00131 while (i<perm.size() && (trait.weight(first+perm[i])==0 || 00132 trait.data(first+perm[i]) <= 00133 trait.data(first+perm[min_i])) ) { 00134 w += trait.weight(first+perm[i]); 00135 ++i; 00136 } 00137 double res=sum_w + 0.5*w; 00138 for ( size_t j=min_i; j<i; ++j) 00139 rtrait.data(result+perm[j]) = res; 00140 sum_w += w; 00141 } 00142 00143 size_t n = std::distance(first, last); 00144 std::transform(utility::data_iterator(result), 00145 utility::data_iterator(result+n), 00146 utility::data_iterator(result), 00147 std::bind2nd(std::divides<double>(), sum_w)); 00148 } 00149 00150 }; 00151 00152 }}} // end of namespace normalizer, yat and thep 00153 #endif