00001 #ifndef _theplu_yat_statistics_utility_
00002 #define _theplu_yat_statistics_utility_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "Percentiler.h"
00030
00031 #include "yat/classifier/DataLookupWeighted1D.h"
00032 #include "yat/classifier/Target.h"
00033 #include "yat/utility/deprecate.h"
00034 #include "yat/utility/iterator_traits.h"
00035 #include "yat/utility/VectorBase.h"
00036 #include "yat/utility/yat_assert.h"
00037
00038 #include <algorithm>
00039 #include <cmath>
00040 #include <stdexcept>
00041 #include <vector>
00042
00043 #include <gsl/gsl_statistics_double.h>
00044
00045 namespace theplu {
00046 namespace yat {
00047 namespace statistics {
00048
00054 template <typename T, typename ForwardIterator>
00055 void add(T& o, ForwardIterator first, ForwardIterator last,
00056 const classifier::Target& target);
00057
00069 double cdf_hypergeometric_P(unsigned int k, unsigned int n1,
00070 unsigned int n2, unsigned int t);
00071
00072
00088 double pearson_p_value(double r, unsigned int n);
00089
00097 double kurtosis(const utility::VectorBase&);
00098
00099
00111 template <class RandomAccessIterator>
00112 double mad(RandomAccessIterator first, RandomAccessIterator last,
00113 bool sorted=false);
00114
00115
00131 template <class RandomAccessIterator>
00132 double median(RandomAccessIterator first, RandomAccessIterator last,
00133 bool sorted=false);
00134
00135
00161 template <class RandomAccessIterator>
00162 double percentile(RandomAccessIterator first, RandomAccessIterator last,
00163 double p, bool sorted=false) YAT_DEPRECATE;
00164
00165
00171 template <class RandomAccessIterator>
00172 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
00173 double p, bool sorted=false)
00174 {
00175 Percentiler percentiler(p, sorted);
00176 return percentiler(first, last);
00177 }
00178
00185 double skewness(const utility::VectorBase&);
00186
00187
00189
00190 template <typename T, typename ForwardIterator>
00191 void add(T& o, ForwardIterator first, ForwardIterator last,
00192 const classifier::Target& target)
00193 {
00194 utility::iterator_traits<ForwardIterator> traits;
00195 for (size_t i=0; first!=last; ++i, ++first)
00196 o.add(traits.data(first), target.binary(i), traits.weight(first));
00197 }
00198
00199
00200 template <class RandomAccessIterator>
00201 double mad(RandomAccessIterator first, RandomAccessIterator last,
00202 bool sorted)
00203 {
00204 double m = median(first, last, sorted);
00205 typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
00206 typedef std::vector<T> Vec;
00207 Vec ad;
00208 ad.reserve(std::distance(first, last));
00209
00210 std::copy(first, last, std::back_insert_iterator<Vec>(ad));
00211 utility::iterator_traits<typename Vec::iterator> traits;
00212 for (typename Vec::iterator i=ad.begin() ; i!=ad.end(); ++i) {
00213
00214 traits.data(i) = std::abs(traits.data(i)-m);
00215 }
00216 std::sort(ad.begin(), ad.end());
00217 return median(ad.begin(), ad.end(), true);
00218 }
00219
00220
00221 template <class RandomAccessIterator>
00222 double median(RandomAccessIterator first, RandomAccessIterator last,
00223 bool sorted)
00224 {
00225 return percentile2(first, last, 50.0, sorted);
00226 }
00227
00228
00229 template <class RandomAccessIterator>
00230 double percentile(RandomAccessIterator first, RandomAccessIterator last,
00231 double p, bool sorted=false)
00232 {
00233 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
00234
00235 if (first+1 == last)
00236 return utility::iterator_traits<RandomAccessIterator>().data(first);
00237 if (sorted) {
00238
00239 if (p>=100)
00240 return utility::iterator_traits<RandomAccessIterator>().data(--last);
00241 double j = p/100 * (std::distance(first,last)-1);
00242 int i = static_cast<int>(j);
00243 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
00244 }
00245
00246 std::vector<typename std::iterator_traits<RandomAccessIterator>::value_type>
00247 v_copy;
00248 v_copy.reserve(std::distance(first,last));
00249 std::copy(first, last, std::back_inserter(v_copy));
00250 size_t i = static_cast<size_t>(p/100 * (v_copy.size()-1));
00251 if (i+2 < v_copy.size()) {
00252 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
00253 }
00254 else
00255 std::sort(v_copy.begin(), v_copy.end());
00256 return percentile(v_copy.begin(), v_copy.end(), p, true);
00257 }
00258
00259
00260 }}}
00261
00262 #endif