1 #ifndef _theplu_yat_statistics_utility_ 2 #define _theplu_yat_statistics_utility_ 29 #include "Percentiler.h" 31 #include "yat/classifier/DataLookupWeighted1D.h" 32 #include "yat/classifier/Target.h" 33 #include "yat/normalizer/utility.h" 34 #include "yat/utility/concept_check.h" 35 #include "yat/utility/DataIterator.h" 37 #include "yat/utility/iterator_traits.h" 38 #include "yat/utility/Matrix.h" 39 #include "yat/utility/sort_index.h" 40 #include "yat/utility/Vector.h" 41 #include "yat/utility/VectorBase.h" 42 #include "yat/utility/yat_assert.h" 44 #include <boost/concept_check.hpp> 45 #include <boost/iterator/iterator_concepts.hpp> 46 #include <boost/iterator/iterator_traits.hpp> 47 #include <boost/iterator/permutation_iterator.hpp> 49 #include <gsl/gsl_math.h> 50 #include <gsl/gsl_statistics_double.h> 60 namespace statistics {
71 template <
typename T,
typename ForwardIterator>
72 void add(T& o, ForwardIterator first, ForwardIterator last,
73 const classifier::Target& target);
95 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
97 BidirectionalIterator1 last,
98 BidirectionalIterator2 result);
120 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
122 RandomAccessIterator last,
123 MutableRandomAccessIterator result);
138 unsigned int n2,
unsigned int t);
161 template<
typename InputIterator>
162 double entropy(InputIterator first, InputIterator last);
188 double kurtosis(
const utility::VectorBase&);
204 template <
class RandomAccessIterator>
205 double mad(RandomAccessIterator first, RandomAccessIterator last,
226 template <
class RandomAccessIterator>
227 double median(RandomAccessIterator first, RandomAccessIterator last,
280 template <
class RandomAccessIterator>
281 double percentile(RandomAccessIterator first, RandomAccessIterator last,
282 double p,
bool sorted=
false) YAT_DEPRECATE;
294 template <class RandomAccessIterator>
295 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
296 double p,
bool sorted=false)
299 return percentiler(first, last);
313 template <
typename T,
typename ForwardIterator>
314 void add(T& o, ForwardIterator first, ForwardIterator last,
317 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<ForwardIterator>));
318 BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<ForwardIterator>));
320 for (
size_t i=0; first!=last; ++i, ++first)
325 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
327 BidirectionalIterator1 last,
328 BidirectionalIterator2 result)
330 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<BidirectionalIterator1>));
331 BOOST_CONCEPT_ASSERT((boost_concepts::BidirectionalTraversal<BidirectionalIterator1>));
333 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<BidirectionalIterator2>));
334 BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<BidirectionalIterator2>));
335 BOOST_CONCEPT_ASSERT((boost_concepts::BidirectionalTraversal<BidirectionalIterator2>));
337 typedef typename boost::iterator_difference<BidirectionalIterator1>::type
340 diff_type n = std::distance(first, last);
343 std::advance(result, n-1);
350 x = std::min(*first * n/static_cast<double>(rank), x);
359 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
361 RandomAccessIterator last,
362 MutableRandomAccessIterator result)
364 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<RandomAccessIterator>));
365 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
367 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<MutableRandomAccessIterator>));
368 BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<MutableRandomAccessIterator>));
369 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<MutableRandomAccessIterator>));
371 std::vector<size_t> idx;
374 boost::make_permutation_iterator(first, idx.end()),
375 boost::make_permutation_iterator(result, idx.begin()));
379 template<
typename InputIterator>
380 double entropy(InputIterator first, InputIterator last)
382 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<InputIterator>));
383 BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<InputIterator>));
384 using boost::Convertible;
385 typedef typename boost::iterator_value<InputIterator>::type T;
386 BOOST_CONCEPT_ASSERT((Convertible<T,double>));
389 for (; first != last; ++first) {
392 sum += *first * std::log(static_cast<double>(*first));
395 return -sum / N + log(N);
399 template <
class RandomAccessIterator>
400 double mad(RandomAccessIterator first, RandomAccessIterator last,
404 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
405 double m =
median(first, last, sorted);
406 typedef typename boost::iterator_value<RandomAccessIterator>::type T;
407 std::vector<T> ad(std::distance(first, last));
409 normalizer::detail::copy_weight_if_weighted(first, last, ad.begin());
413 while (first!=last) {
414 *first2 = std::abs(traits.data(first)-m);
418 return median(ad.begin(), ad.end(),
false);
422 template <
class RandomAccessIterator>
423 double median(RandomAccessIterator first, RandomAccessIterator last,
427 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
436 using boost::Convertible;
437 BOOST_CONCEPT_ASSERT((Convertible<typename T::value_type,double>));
449 for (
size_t c = 0; c<n.columns(); ++c)
450 rowsum(c) = std::accumulate(n.begin_column(c), n.end_column(c), 0);
453 for (
size_t r = 0; r<n.rows(); ++r)
454 colsum(r) = std::accumulate(n.begin_row(r), n.end_row(r), 0);
456 double mi = -
entropy(n.begin(), n.end());
457 mi +=
entropy(rowsum.begin(), rowsum.end());
458 mi +=
entropy(colsum.begin(), colsum.end());
464 template <
class RandomAccessIterator>
465 double percentile(RandomAccessIterator first, RandomAccessIterator last,
466 double p,
bool sorted)
468 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
477 double j = p/100 * (std::distance(first,last)-1);
478 int i =
static_cast<int>(j);
479 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
481 using std::iterator_traits;
482 typedef typename iterator_traits<RandomAccessIterator>::value_type value_t;
483 std::vector<value_t> v_copy(first, last);
484 size_t i =
static_cast<size_t>(p/100 * (v_copy.size()-1));
485 if (i+2 < v_copy.size()) {
486 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
489 std::sort(v_copy.begin(), v_copy.end());
490 return percentile(v_copy.begin(), v_copy.end(), p,
true);
void benjamini_hochberg(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:326
double mad(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Median absolute deviation from median.
Definition: utility.h:400
data_reference data(Iter iter) const
Definition: iterator_traits.h:440
Class for containing sample labels.
Definition: Target.h:47
double mutual_information(const T &A)
Calculates the mutual information of A.
Definition: utility.h:433
Concept check for Data Iterator.
Definition: concept_check.h:240
Definition: iterator_traits.h:412
The Department of Theoretical Physics namespace as we define it.
Concept check for Container2D.
Definition: concept_check.h:65
double skewness(const utility::VectorBase &)
Computes the skewness of the data in a vector.
double pearson_p_value(double r, unsigned int n)
one-sided p-value
double median(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Definition: utility.h:423
double entropy(InputIterator first, InputIterator last)
Definition: utility.h:380
DataIterator.
Definition: DataIterator.h:63
This is the yat interface to GSL vector.
Definition: Vector.h:59
This is the yat interface to GSL vector.
Definition: VectorBase.h:55
bool binary(size_t i) const
Default binary is set to false for all classes except class 0.
void sort_index(InputIterator first, InputIterator last, std::vector< size_t > &sort_index)
Definition: sort_index.h:146
weight_reference weight(Iter iter) const
Definition: iterator_traits.h:446
double kurtosis(const utility::VectorBase &)
Computes the kurtosis of the data in a vector.
void benjamini_hochberg_unsorted(RandomAccessIterator first, RandomAccessIterator last, MutableRandomAccessIterator result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:360
void add(T &o, ForwardIterator first, ForwardIterator last, const classifier::Target &target)
Definition: utility.h:314
utility::Matrix correlation(utility::Matrix x)
double cdf_hypergeometric_P(unsigned int k, unsigned int n1, unsigned int n2, unsigned int t)
double percentile2(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:295
double percentile(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:465
Functor to calculate percentile of a range.
Definition: Percentiler.h:51