1 #ifndef _theplu_yat_statistics_utility_ 2 #define _theplu_yat_statistics_utility_ 29 #include "Percentiler.h" 31 #include "yat/classifier/DataLookupWeighted1D.h" 32 #include "yat/classifier/Target.h" 33 #include "yat/normalizer/utility.h" 34 #include "yat/utility/concept_check.h" 35 #include "yat/utility/DataIterator.h" 37 #include "yat/utility/iterator_traits.h" 38 #include "yat/utility/Matrix.h" 39 #include "yat/utility/sort_index.h" 40 #include "yat/utility/Vector.h" 41 #include "yat/utility/VectorBase.h" 42 #include "yat/utility/yat_assert.h" 44 #include <boost/concept_check.hpp> 45 #include <boost/iterator/iterator_concepts.hpp> 46 #include <boost/iterator/iterator_traits.hpp> 47 #include <boost/iterator/permutation_iterator.hpp> 49 #include <gsl/gsl_math.h> 50 #include <gsl/gsl_statistics_double.h> 60 namespace statistics {
71 template <
typename T,
typename ForwardIterator>
72 void add(T& o, ForwardIterator first, ForwardIterator last,
73 const classifier::Target& target);
95 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
97 BidirectionalIterator1 last,
98 BidirectionalIterator2 result);
120 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
122 RandomAccessIterator last,
123 MutableRandomAccessIterator result);
138 unsigned int n2,
unsigned int t);
161 template<
typename InputIterator>
162 double entropy(InputIterator first, InputIterator last);
188 double kurtosis(
const utility::VectorBase&);
206 template <
class RandomAccessIterator>
207 double mad(RandomAccessIterator first, RandomAccessIterator last,
230 template <
class RandomAccessIterator>
231 double median(RandomAccessIterator first, RandomAccessIterator last,
284 template <
class RandomAccessIterator>
285 double percentile(RandomAccessIterator first, RandomAccessIterator last,
286 double p,
bool sorted=
false) YAT_DEPRECATE;
300 template <class RandomAccessIterator>
301 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
302 double p,
bool sorted=false)
305 return percentiler(first, last);
319 template <
typename T,
typename ForwardIterator>
320 void add(T& o, ForwardIterator first, ForwardIterator last,
323 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<ForwardIterator>));
324 BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<ForwardIterator>));
326 for (
size_t i=0; first!=last; ++i, ++first)
331 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
333 BidirectionalIterator1 last,
334 BidirectionalIterator2 result)
336 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<BidirectionalIterator1>));
337 BOOST_CONCEPT_ASSERT((boost_concepts::BidirectionalTraversal<BidirectionalIterator1>));
339 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<BidirectionalIterator2>));
340 BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<BidirectionalIterator2>));
341 BOOST_CONCEPT_ASSERT((boost_concepts::BidirectionalTraversal<BidirectionalIterator2>));
343 typedef typename boost::iterator_difference<BidirectionalIterator1>::type
346 diff_type n = std::distance(first, last);
349 std::advance(result, n-1);
356 x = std::min(*first * n/static_cast<double>(rank), x);
365 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
367 RandomAccessIterator last,
368 MutableRandomAccessIterator result)
370 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<RandomAccessIterator>));
371 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
373 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<MutableRandomAccessIterator>));
374 BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<MutableRandomAccessIterator>));
375 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<MutableRandomAccessIterator>));
377 std::vector<size_t> idx;
380 boost::make_permutation_iterator(first, idx.end()),
381 boost::make_permutation_iterator(result, idx.begin()));
385 template<
typename InputIterator>
386 double entropy(InputIterator first, InputIterator last)
388 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<InputIterator>));
389 BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<InputIterator>));
390 using boost::Convertible;
391 typedef typename boost::iterator_value<InputIterator>::type T;
392 BOOST_CONCEPT_ASSERT((Convertible<T,double>));
395 for (; first != last; ++first) {
398 sum += *first * std::log(static_cast<double>(*first));
401 return -sum / N + log(N);
405 template <
class RandomAccessIterator>
406 double mad(RandomAccessIterator first, RandomAccessIterator last,
410 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
411 double m =
median(first, last, sorted);
412 typedef typename boost::iterator_value<RandomAccessIterator>::type T;
413 std::vector<T> ad(std::distance(first, last));
415 normalizer::detail::copy_weight_if_weighted(first, last, ad.begin());
419 while (first!=last) {
420 *first2 = std::abs(traits.data(first)-m);
424 return median(ad.begin(), ad.end(),
false);
428 template <
class RandomAccessIterator>
429 double median(RandomAccessIterator first, RandomAccessIterator last,
433 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
442 using boost::Convertible;
443 BOOST_CONCEPT_ASSERT((Convertible<typename T::value_type,double>));
455 for (
size_t c = 0; c<n.columns(); ++c)
456 rowsum(c) = std::accumulate(n.begin_column(c), n.end_column(c), 0);
459 for (
size_t r = 0; r<n.rows(); ++r)
460 colsum(r) = std::accumulate(n.begin_row(r), n.end_row(r), 0);
462 double mi = -
entropy(n.begin(), n.end());
463 mi +=
entropy(rowsum.begin(), rowsum.end());
464 mi +=
entropy(colsum.begin(), colsum.end());
470 template <
class RandomAccessIterator>
471 double percentile(RandomAccessIterator first, RandomAccessIterator last,
472 double p,
bool sorted)
474 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
483 double j = p/100 * (std::distance(first,last)-1);
484 int i =
static_cast<int>(j);
485 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
487 using std::iterator_traits;
488 typedef typename iterator_traits<RandomAccessIterator>::value_type value_t;
489 std::vector<value_t> v_copy(first, last);
490 size_t i =
static_cast<size_t>(p/100 * (v_copy.size()-1));
491 if (i+2 < v_copy.size()) {
492 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
495 std::sort(v_copy.begin(), v_copy.end());
496 return percentile(v_copy.begin(), v_copy.end(), p,
true);
void benjamini_hochberg(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:332
double mad(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Median absolute deviation from median.
Definition: utility.h:406
data_reference data(Iter iter) const
Definition: iterator_traits.h:440
Class for containing sample labels.
Definition: Target.h:47
double mutual_information(const T &A)
Calculates the mutual information of A.
Definition: utility.h:439
Concept check for Data Iterator.
Definition: concept_check.h:240
Definition: iterator_traits.h:412
The Department of Theoretical Physics namespace as we define it.
Concept check for Container2D.
Definition: concept_check.h:65
double skewness(const utility::VectorBase &)
Computes the skewness of the data in a vector.
double pearson_p_value(double r, unsigned int n)
one-sided p-value
double median(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Definition: utility.h:429
double entropy(InputIterator first, InputIterator last)
Definition: utility.h:386
DataIterator.
Definition: DataIterator.h:63
This is the yat interface to GSL vector.
Definition: Vector.h:59
This is the yat interface to GSL vector.
Definition: VectorBase.h:55
bool binary(size_t i) const
Default binary is set to false for all classes except class 0.
void sort_index(InputIterator first, InputIterator last, std::vector< size_t > &sort_index)
Definition: sort_index.h:146
weight_reference weight(Iter iter) const
Definition: iterator_traits.h:446
double kurtosis(const utility::VectorBase &)
Computes the kurtosis of the data in a vector.
void benjamini_hochberg_unsorted(RandomAccessIterator first, RandomAccessIterator last, MutableRandomAccessIterator result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:366
void add(T &o, ForwardIterator first, ForwardIterator last, const classifier::Target &target)
Definition: utility.h:320
utility::Matrix correlation(utility::Matrix x)
double cdf_hypergeometric_P(unsigned int k, unsigned int n1, unsigned int n2, unsigned int t)
double percentile2(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:301
double percentile(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:471
Functor to calculate percentile of a range.
Definition: Percentiler.h:51