1 #ifndef _theplu_yat_statistics_utility_
2 #define _theplu_yat_statistics_utility_
29 #include "Percentiler.h"
31 #include "yat/classifier/DataLookupWeighted1D.h"
32 #include "yat/classifier/Target.h"
33 #include "yat/normalizer/utility.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/DataIterator.h"
37 #include "yat/utility/iterator_traits.h"
38 #include "yat/utility/VectorBase.h"
39 #include "yat/utility/yat_assert.h"
41 #include <boost/concept_check.hpp>
49 #include <gsl/gsl_statistics_double.h>
53 namespace statistics {
60 template <
typename T,
typename ForwardIterator>
61 void add(T& o, ForwardIterator first, ForwardIterator last,
62 const classifier::Target& target);
79 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
81 BidirectionalIterator1 last,
82 BidirectionalIterator2 result);
97 unsigned int n2,
unsigned int t);
124 double kurtosis(
const utility::VectorBase&);
138 template <
class RandomAccessIterator>
139 double mad(RandomAccessIterator first, RandomAccessIterator last,
158 template <
class RandomAccessIterator>
159 double median(RandomAccessIterator first, RandomAccessIterator last,
188 template <
class RandomAccessIterator>
189 double percentile(RandomAccessIterator first, RandomAccessIterator last,
190 double p,
bool sorted=
false) YAT_DEPRECATE;
197 template <class RandomAccessIterator>
198 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
199 double p,
bool sorted=false)
202 return percentiler(first, last);
216 template <
typename T,
typename ForwardIterator>
217 void add(T& o, ForwardIterator first, ForwardIterator last,
221 for (
size_t i=0; first!=last; ++i, ++first)
226 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
228 BidirectionalIterator1 last,
229 BidirectionalIterator2 result)
231 using boost::Mutable_BidirectionalIterator;
232 BOOST_CONCEPT_ASSERT((boost::BidirectionalIterator<BidirectionalIterator1>));
233 BOOST_CONCEPT_ASSERT((Mutable_BidirectionalIterator<BidirectionalIterator2>));
234 size_t n = std::distance(first, last);
237 std::advance(result, n-1);
244 *result = std::min(*first * n/static_cast<double>(rank), prev);
253 template <
class RandomAccessIterator>
254 double mad(RandomAccessIterator first, RandomAccessIterator last,
257 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
259 double m =
median(first, last, sorted);
260 typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
261 std::vector<T> ad(std::distance(first, last));
263 normalizer::detail::copy_weight_if_weighted(first, last, ad.begin());
267 while (first!=last) {
268 *first2 = std::abs(traits.data(first)-m);
272 std::sort(ad.begin(), ad.end());
273 return median(ad.begin(), ad.end(),
true);
277 template <
class RandomAccessIterator>
278 double median(RandomAccessIterator first, RandomAccessIterator last,
285 template <
class RandomAccessIterator>
286 double percentile(RandomAccessIterator first, RandomAccessIterator last,
287 double p,
bool sorted)
289 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
298 double j = p/100 * (std::distance(first,last)-1);
299 int i =
static_cast<int>(j);
300 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
302 using std::iterator_traits;
303 typedef typename iterator_traits<RandomAccessIterator>::value_type value_t;
304 std::vector<value_t> v_copy;
305 v_copy.reserve(std::distance(first,last));
306 std::copy(first, last, std::back_inserter(v_copy));
307 size_t i =
static_cast<size_t>(p/100 * (v_copy.size()-1));
308 if (i+2 < v_copy.size()) {
309 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
312 std::sort(v_copy.begin(), v_copy.end());
313 return percentile(v_copy.begin(), v_copy.end(), p,
true);