00001 #ifndef _theplu_yat_statistics_utility_
00002 #define _theplu_yat_statistics_utility_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "Percentiler.h"
00030
00031 #include "yat/classifier/DataLookupWeighted1D.h"
00032 #include "yat/classifier/Target.h"
00033 #include "yat/utility/concept_check.h"
00034 #include "yat/utility/deprecate.h"
00035 #include "yat/utility/iterator_traits.h"
00036 #include "yat/utility/VectorBase.h"
00037 #include "yat/utility/yat_assert.h"
00038
00039 #include <boost/concept_check.hpp>
00040
00041 #include <algorithm>
00042 #include <cmath>
00043 #include <stdexcept>
00044 #include <vector>
00045
00046 #include <gsl/gsl_statistics_double.h>
00047
00048 namespace theplu {
00049 namespace yat {
00050 namespace statistics {
00051
00057 template <typename T, typename ForwardIterator>
00058 void add(T& o, ForwardIterator first, ForwardIterator last,
00059 const classifier::Target& target);
00060
00072 double cdf_hypergeometric_P(unsigned int k, unsigned int n1,
00073 unsigned int n2, unsigned int t);
00074
00075
00091 double pearson_p_value(double r, unsigned int n);
00092
00100 double kurtosis(const utility::VectorBase&);
00101
00102
00114 template <class RandomAccessIterator>
00115 double mad(RandomAccessIterator first, RandomAccessIterator last,
00116 bool sorted=false);
00117
00118
00134 template <class RandomAccessIterator>
00135 double median(RandomAccessIterator first, RandomAccessIterator last,
00136 bool sorted=false);
00137
00138
00164 template <class RandomAccessIterator>
00165 double percentile(RandomAccessIterator first, RandomAccessIterator last,
00166 double p, bool sorted=false) YAT_DEPRECATE;
00167
00168
00174 template <class RandomAccessIterator>
00175 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
00176 double p, bool sorted=false)
00177 {
00178 Percentiler percentiler(p, sorted);
00179 return percentiler(first, last);
00180 }
00181
00188 double skewness(const utility::VectorBase&);
00189
00190
00192
00193 template <typename T, typename ForwardIterator>
00194 void add(T& o, ForwardIterator first, ForwardIterator last,
00195 const classifier::Target& target)
00196 {
00197 utility::iterator_traits<ForwardIterator> traits;
00198 for (size_t i=0; first!=last; ++i, ++first)
00199 o.add(traits.data(first), target.binary(i), traits.weight(first));
00200 }
00201
00202
00203 template <class RandomAccessIterator>
00204 double mad(RandomAccessIterator first, RandomAccessIterator last,
00205 bool sorted)
00206 {
00207 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
00208 BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator>));
00209 double m = median(first, last, sorted);
00210 typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
00211 typedef std::vector<T> Vec;
00212 Vec ad;
00213 ad.reserve(std::distance(first, last));
00214
00215 std::copy(first, last, std::back_insert_iterator<Vec>(ad));
00216 utility::iterator_traits<typename Vec::iterator> traits;
00217 for (typename Vec::iterator i=ad.begin() ; i!=ad.end(); ++i) {
00218
00219 traits.data(i) = std::abs(traits.data(i)-m);
00220 }
00221 std::sort(ad.begin(), ad.end());
00222 return median(ad.begin(), ad.end(), true);
00223 }
00224
00225
00226 template <class RandomAccessIterator>
00227 double median(RandomAccessIterator first, RandomAccessIterator last,
00228 bool sorted)
00229 {
00230 return percentile2(first, last, 50.0, sorted);
00231 }
00232
00233
00234 template <class RandomAccessIterator>
00235 double percentile(RandomAccessIterator first, RandomAccessIterator last,
00236 double p, bool sorted=false)
00237 {
00238 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
00239 BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator>));
00240
00241 if (first+1 == last)
00242 return utility::iterator_traits<RandomAccessIterator>().data(first);
00243 if (sorted) {
00244
00245 if (p>=100)
00246 return utility::iterator_traits<RandomAccessIterator>().data(--last);
00247 double j = p/100 * (std::distance(first,last)-1);
00248 int i = static_cast<int>(j);
00249 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
00250 }
00251
00252 std::vector<typename std::iterator_traits<RandomAccessIterator>::value_type>
00253 v_copy;
00254 v_copy.reserve(std::distance(first,last));
00255 std::copy(first, last, std::back_inserter(v_copy));
00256 size_t i = static_cast<size_t>(p/100 * (v_copy.size()-1));
00257 if (i+2 < v_copy.size()) {
00258 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
00259 }
00260 else
00261 std::sort(v_copy.begin(), v_copy.end());
00262 return percentile(v_copy.begin(), v_copy.end(), p, true);
00263 }
00264
00265
00266 }}}
00267
00268 #endif