1 #ifndef _theplu_yat_statistics_utility_
2 #define _theplu_yat_statistics_utility_
29 #include "Percentiler.h"
31 #include "yat/classifier/DataLookupWeighted1D.h"
32 #include "yat/classifier/Target.h"
33 #include "yat/normalizer/utility.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/DataIterator.h"
37 #include "yat/utility/iterator_traits.h"
38 #include "yat/utility/sort_index.h"
39 #include "yat/utility/Vector.h"
40 #include "yat/utility/VectorBase.h"
41 #include "yat/utility/yat_assert.h"
43 #include <boost/concept_check.hpp>
44 #include <boost/iterator/permutation_iterator.hpp>
46 #include <gsl/gsl_math.h>
47 #include <gsl/gsl_statistics_double.h>
57 namespace statistics {
64 template <
typename T,
typename ForwardIterator>
65 void add(T& o, ForwardIterator first, ForwardIterator last,
66 const classifier::Target& target);
85 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
87 BidirectionalIterator1 last,
88 BidirectionalIterator2 result);
107 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
109 RandomAccessIterator last,
110 MutableRandomAccessIterator result);
125 unsigned int n2,
unsigned int t);
138 template<
typename InputIterator>
139 double entropy(InputIterator first, InputIterator last);
165 double kurtosis(
const utility::VectorBase&);
179 template <
class RandomAccessIterator>
180 double mad(RandomAccessIterator first, RandomAccessIterator last,
199 template <
class RandomAccessIterator>
200 double median(RandomAccessIterator first, RandomAccessIterator last,
253 template <
class RandomAccessIterator>
254 double percentile(RandomAccessIterator first, RandomAccessIterator last,
255 double p,
bool sorted=
false) YAT_DEPRECATE;
262 template <class RandomAccessIterator>
263 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
264 double p,
bool sorted=false)
267 return percentiler(first, last);
281 template <
typename T,
typename ForwardIterator>
282 void add(T& o, ForwardIterator first, ForwardIterator last,
286 for (
size_t i=0; first!=last; ++i, ++first)
291 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
293 BidirectionalIterator1 last,
294 BidirectionalIterator2 result)
296 using boost::Mutable_BidirectionalIterator;
297 using boost::BidirectionalIterator;
298 BOOST_CONCEPT_ASSERT((BidirectionalIterator<BidirectionalIterator1>));
299 BOOST_CONCEPT_ASSERT((Mutable_BidirectionalIterator<BidirectionalIterator2>));
300 typedef typename std::iterator_traits<BidirectionalIterator1> traits;
301 typename traits::difference_type n = std::distance(first, last);
304 std::advance(result, n-1);
307 typename traits::difference_type rank = n;
311 *result = std::min(*first * n/static_cast<double>(rank), prev);
320 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
322 RandomAccessIterator last,
323 MutableRandomAccessIterator result)
325 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
326 using boost::Mutable_RandomAccessIterator;
327 BOOST_CONCEPT_ASSERT((Mutable_RandomAccessIterator<MutableRandomAccessIterator>));
329 std::vector<size_t> idx;
332 boost::make_permutation_iterator(first, idx.end()),
333 boost::make_permutation_iterator(result, idx.begin()));
337 template<
typename InputIterator>
338 double entropy(InputIterator first, InputIterator last)
340 BOOST_CONCEPT_ASSERT((boost::InputIterator<InputIterator>));
341 using boost::Convertible;
342 typedef typename InputIterator::value_type T;
343 BOOST_CONCEPT_ASSERT((Convertible<T,double>));
346 for (; first != last; ++first) {
349 sum += *first * std::log(static_cast<double>(*first));
352 return -sum / N + log(N);
356 template <
class RandomAccessIterator>
357 double mad(RandomAccessIterator first, RandomAccessIterator last,
360 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
362 double m =
median(first, last, sorted);
363 typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
364 std::vector<T> ad(std::distance(first, last));
366 normalizer::detail::copy_weight_if_weighted(first, last, ad.begin());
370 while (first!=last) {
371 *first2 = std::abs(traits.data(first)-m);
375 return median(ad.begin(), ad.end(),
false);
379 template <
class RandomAccessIterator>
380 double median(RandomAccessIterator first, RandomAccessIterator last,
391 using boost::Convertible;
392 BOOST_CONCEPT_ASSERT((Convertible<typename T::value_type,double>));
404 for (
size_t c = 0; c<n.columns(); ++c)
405 rowsum(c) = std::accumulate(n.begin_column(c), n.end_column(c), 0);
408 for (
size_t r = 0; r<n.rows(); ++r)
409 colsum(r) = std::accumulate(n.begin_row(r), n.end_row(r), 0);
411 double mi = -
entropy(n.begin(), n.end());
412 mi +=
entropy(rowsum.begin(), rowsum.end());
413 mi +=
entropy(colsum.begin(), colsum.end());
419 template <
class RandomAccessIterator>
420 double percentile(RandomAccessIterator first, RandomAccessIterator last,
421 double p,
bool sorted)
423 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
432 double j = p/100 * (std::distance(first,last)-1);
433 int i =
static_cast<int>(j);
434 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
436 using std::iterator_traits;
437 typedef typename iterator_traits<RandomAccessIterator>::value_type value_t;
438 std::vector<value_t> v_copy(first, last);
439 size_t i =
static_cast<size_t>(p/100 * (v_copy.size()-1));
440 if (i+2 < v_copy.size()) {
441 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
444 std::sort(v_copy.begin(), v_copy.end());
445 return percentile(v_copy.begin(), v_copy.end(), p,
true);
void benjamini_hochberg(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:292
double mad(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Median absolute deviation from median.
Definition: utility.h:357
data_reference data(Iter iter) const
Definition: iterator_traits.h:440
Class for containing sample labels.
Definition: Target.h:47
double mutual_information(const T &A)
Calculates the mutual information of A.
Definition: utility.h:388
Concept check for Data Iterator.
Definition: concept_check.h:226
Definition: iterator_traits.h:412
Concept check for Container2D.
Definition: concept_check.h:57
double skewness(const utility::VectorBase &)
Computes the skewness of the data in a vector.
double pearson_p_value(double r, unsigned int n)
one-sided p-value
double median(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Definition: utility.h:380
double entropy(InputIterator first, InputIterator last)
Definition: utility.h:338
DataIterator.
Definition: DataIterator.h:62
This is the yat interface to GSL vector.
Definition: Vector.h:57
This is the yat interface to GSL vector.
Definition: VectorBase.h:52
bool binary(size_t i) const
Default binary is set to false for all classes except class 0.
void sort_index(InputIterator first, InputIterator last, std::vector< size_t > &sort_index)
Definition: sort_index.h:147
weight_reference weight(Iter iter) const
Definition: iterator_traits.h:446
double kurtosis(const utility::VectorBase &)
Computes the kurtosis of the data in a vector.
void benjamini_hochberg_unsorted(RandomAccessIterator first, RandomAccessIterator last, MutableRandomAccessIterator result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:321
void add(T &o, ForwardIterator first, ForwardIterator last, const classifier::Target &target)
Definition: utility.h:282
double cdf_hypergeometric_P(unsigned int k, unsigned int n1, unsigned int n2, unsigned int t)
double percentile2(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:263
double percentile(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:420
Functor to calculate percentile of a range.
Definition: Percentiler.h:50