00001 #ifndef _theplu_yat_statistics_utility_
00002 #define _theplu_yat_statistics_utility_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "Percentiler.h"
00030
00031 #include "yat/classifier/DataLookupWeighted1D.h"
00032 #include "yat/classifier/Target.h"
00033 #include "yat/utility/deprecate.h"
00034 #include "yat/utility/VectorBase.h"
00035 #include "yat/utility/yat_assert.h"
00036
00037 #include <algorithm>
00038 #include <cmath>
00039 #include <stdexcept>
00040 #include <vector>
00041
00042 #include <gsl/gsl_statistics_double.h>
00043
00044 namespace theplu {
00045 namespace yat {
00046 namespace statistics {
00047
00053 template <typename T, typename ForwardIterator>
00054 void add(T& o, ForwardIterator first, ForwardIterator last,
00055 const classifier::Target& target);
00056
00068 double cdf_hypergeometric_P(unsigned int k, unsigned int n1,
00069 unsigned int n2, unsigned int t);
00070
00071
00087 double pearson_p_value(double r, unsigned int n);
00088
00096 double kurtosis(const utility::VectorBase&);
00097
00098
00104 template <class T>
00105 double mad(T first, T last, const bool sorted=false);
00106
00107
00124 template <class T>
00125 double median(T first, T last, const bool sorted=false);
00126
00127
00153 template <class RandomAccessIterator>
00154 double percentile(RandomAccessIterator first, RandomAccessIterator last,
00155 double p, bool sorted=false) YAT_DEPRECATE;
00156
00157
00163 template <class RandomAccessIterator>
00164 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
00165 double p, bool sorted=false)
00166 {
00167 Percentiler percentiler(p, sorted);
00168 return percentiler(first, last);
00169 }
00170
00177 double skewness(const utility::VectorBase&);
00178
00179
00181
00182 template <typename T, typename ForwardIterator>
00183 void add(T& o, ForwardIterator first, ForwardIterator last,
00184 const classifier::Target& target)
00185 {
00186 for (size_t i=0; first!=last; ++i, ++first)
00187 o.add(utility::iterator_traits<ForwardIterator>().data(first),
00188 target.binary(i),
00189 utility::iterator_traits<ForwardIterator>().weight(first));
00190 }
00191
00192
00193 template <class T>
00194 double mad(T first, T last, const bool sorted=false)
00195 {
00196 double m = median(first, last, sorted);
00197 std::vector<double> ad;
00198 ad.reserve(std::distance(first, last));
00199 for( ; first!=last; ++first)
00200 ad.push_back(fabs(*first-m));
00201 std::sort(ad.begin(), ad.end());
00202 return median(ad.begin(), ad.end(), true);
00203 }
00204
00205
00206 template <class T>
00207 double median(T first, T last, const bool sorted=false)
00208 {
00209 return percentile2(first, last, 50.0, sorted);
00210 }
00211
00212
00213 template <class RandomAccessIterator>
00214 double percentile(RandomAccessIterator first, RandomAccessIterator last,
00215 double p, bool sorted=false)
00216 {
00217
00218 if (first+1 == last)
00219 return utility::iterator_traits<RandomAccessIterator>().data(first);
00220 if (sorted) {
00221
00222 if (p>=100)
00223 return utility::iterator_traits<RandomAccessIterator>().data(--last);
00224 double j = p/100 * (std::distance(first,last)-1);
00225 int i = static_cast<int>(j);
00226 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
00227 }
00228
00229 std::vector<typename std::iterator_traits<RandomAccessIterator>::value_type>
00230 v_copy;
00231 v_copy.reserve(std::distance(first,last));
00232 std::copy(first, last, std::back_inserter(v_copy));
00233 size_t i = static_cast<size_t>(p/100 * (v_copy.size()-1));
00234 if (i+2 < v_copy.size()) {
00235 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
00236 }
00237 else
00238 std::sort(v_copy.begin(), v_copy.end());
00239 return percentile(v_copy.begin(), v_copy.end(), p, true);
00240 }
00241
00242
00243 }}}
00244
00245 #endif