1 #ifndef _theplu_yat_statistics_utility_
2 #define _theplu_yat_statistics_utility_
29 #include "Percentiler.h"
31 #include "yat/classifier/DataLookupWeighted1D.h"
32 #include "yat/classifier/Target.h"
33 #include "yat/normalizer/utility.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/DataIterator.h"
37 #include "yat/utility/iterator_traits.h"
38 #include "yat/utility/Vector.h"
39 #include "yat/utility/VectorBase.h"
40 #include "yat/utility/yat_assert.h"
42 #include <boost/concept_check.hpp>
43 #include <gsl/gsl_math.h>
44 #include <gsl/gsl_statistics_double.h>
54 namespace statistics {
61 template <
typename T,
typename ForwardIterator>
62 void add(T& o, ForwardIterator first, ForwardIterator last,
63 const classifier::Target& target);
80 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
82 BidirectionalIterator1 last,
83 BidirectionalIterator2 result);
98 unsigned int n2,
unsigned int t);
111 template<
typename InputIterator>
112 double entropy(InputIterator first, InputIterator last);
138 double kurtosis(
const utility::VectorBase&);
152 template <
class RandomAccessIterator>
153 double mad(RandomAccessIterator first, RandomAccessIterator last,
172 template <
class RandomAccessIterator>
173 double median(RandomAccessIterator first, RandomAccessIterator last,
226 template <
class RandomAccessIterator>
227 double percentile(RandomAccessIterator first, RandomAccessIterator last,
228 double p,
bool sorted=
false) YAT_DEPRECATE;
235 template <class RandomAccessIterator>
236 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
237 double p,
bool sorted=false)
240 return percentiler(first, last);
254 template <
typename T,
typename ForwardIterator>
255 void add(T& o, ForwardIterator first, ForwardIterator last,
259 for (
size_t i=0; first!=last; ++i, ++first)
264 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
266 BidirectionalIterator1 last,
267 BidirectionalIterator2 result)
269 using boost::Mutable_BidirectionalIterator;
270 BOOST_CONCEPT_ASSERT((boost::BidirectionalIterator<BidirectionalIterator1>));
271 BOOST_CONCEPT_ASSERT((Mutable_BidirectionalIterator<BidirectionalIterator2>));
272 size_t n = std::distance(first, last);
275 std::advance(result, n-1);
282 *result = std::min(*first * n/static_cast<double>(rank), prev);
291 template<
typename InputIterator>
292 double entropy(InputIterator first, InputIterator last)
294 BOOST_CONCEPT_ASSERT((boost::InputIterator<InputIterator>));
295 using boost::Convertible;
296 typedef typename InputIterator::value_type T;
297 BOOST_CONCEPT_ASSERT((Convertible<T,double>));
300 for (; first != last; ++first) {
303 sum += *first * std::log(static_cast<double>(*first));
306 return -sum / N + log(N);
310 template <
class RandomAccessIterator>
311 double mad(RandomAccessIterator first, RandomAccessIterator last,
314 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
316 double m =
median(first, last, sorted);
317 typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
318 std::vector<T> ad(std::distance(first, last));
320 normalizer::detail::copy_weight_if_weighted(first, last, ad.begin());
324 while (first!=last) {
325 *first2 = std::abs(traits.data(first)-m);
329 std::sort(ad.begin(), ad.end());
330 return median(ad.begin(), ad.end(),
true);
334 template <
class RandomAccessIterator>
335 double median(RandomAccessIterator first, RandomAccessIterator last,
346 using boost::Convertible;
347 BOOST_CONCEPT_ASSERT((Convertible<typename T::value_type,double>));
359 for (
size_t c = 0; c<n.columns(); ++c)
360 rowsum(c) = std::accumulate(n.begin_column(c), n.end_column(c), 0);
363 for (
size_t r = 0; r<n.rows(); ++r)
364 colsum(r) = std::accumulate(n.begin_row(r), n.end_row(r), 0);
366 double mi = -
entropy(n.begin(), n.end());
367 mi +=
entropy(rowsum.begin(), rowsum.end());
368 mi +=
entropy(colsum.begin(), colsum.end());
374 template <
class RandomAccessIterator>
375 double percentile(RandomAccessIterator first, RandomAccessIterator last,
376 double p,
bool sorted)
378 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
387 double j = p/100 * (std::distance(first,last)-1);
388 int i =
static_cast<int>(j);
389 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
391 using std::iterator_traits;
392 typedef typename iterator_traits<RandomAccessIterator>::value_type value_t;
393 std::vector<value_t> v_copy;
394 v_copy.reserve(std::distance(first,last));
395 std::copy(first, last, std::back_inserter(v_copy));
396 size_t i =
static_cast<size_t>(p/100 * (v_copy.size()-1));
397 if (i+2 < v_copy.size()) {
398 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
401 std::sort(v_copy.begin(), v_copy.end());
402 return percentile(v_copy.begin(), v_copy.end(), p,
true);
void benjamini_hochberg(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:265
double mad(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Median absolute deviation from median.
Definition: utility.h:311
data_reference data(Iter iter) const
Definition: iterator_traits.h:434
Class for containing sample labels.
Definition: Target.h:47
double mutual_information(const T &A)
Calculates the mutual information of A.
Definition: utility.h:343
Concept check for Data Iterator.
Definition: concept_check.h:224
Definition: iterator_traits.h:406
Concept check for Container2D.
Definition: concept_check.h:56
double skewness(const utility::VectorBase &)
Computes the skewness of the data in a vector.
double pearson_p_value(double r, unsigned int n)
one-sided p-value
double median(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Definition: utility.h:335
double entropy(InputIterator first, InputIterator last)
Definition: utility.h:292
DataIterator.
Definition: DataIterator.h:61
This is the yat interface to GSL vector.
Definition: Vector.h:57
This is the yat interface to GSL vector.
Definition: VectorBase.h:52
bool binary(size_t i) const
Default binary is set to false for all classes except class 0.
weight_reference weight(Iter iter) const
Definition: iterator_traits.h:440
double kurtosis(const utility::VectorBase &)
Computes the kurtosis of the data in a vector.
void add(T &o, ForwardIterator first, ForwardIterator last, const classifier::Target &target)
Definition: utility.h:255
double cdf_hypergeometric_P(unsigned int k, unsigned int n1, unsigned int n2, unsigned int t)
double percentile2(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:236
double percentile(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:375
Functor to calculate percentile of a range.
Definition: Percentiler.h:50