yat  0.10.4pre
utility.h
1 #ifndef _theplu_yat_statistics_utility_
2 #define _theplu_yat_statistics_utility_
3 
4 // $Id: utility.h 2673 2011-12-03 00:30:12Z peter $
5 
6 /*
7  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2005 Peter Johansson
9  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
10  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
11  Copyright (C) 2009, 2010, 2011 Peter Johansson
12 
13  This file is part of the yat library, http://dev.thep.lu.se/yat
14 
15  The yat library is free software; you can redistribute it and/or
16  modify it under the terms of the GNU General Public License as
17  published by the Free Software Foundation; either version 3 of the
18  License, or (at your option) any later version.
19 
20  The yat library is distributed in the hope that it will be useful,
21  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23  General Public License for more details.
24 
25  You should have received a copy of the GNU General Public License
26  along with yat. If not, see <http://www.gnu.org/licenses/>.
27 */
28 
29 #include "Percentiler.h"
30 
31 #include "yat/classifier/DataLookupWeighted1D.h"
32 #include "yat/classifier/Target.h"
33 #include "yat/normalizer/utility.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/DataIterator.h"
36 #include "yat/utility/deprecate.h"
37 #include "yat/utility/iterator_traits.h"
38 #include "yat/utility/VectorBase.h"
39 #include "yat/utility/yat_assert.h"
40 
41 #include <boost/concept_check.hpp>
42 
43 #include <algorithm>
44 #include <cmath>
45 #include <iterator>
46 #include <stdexcept>
47 #include <vector>
48 
49 #include <gsl/gsl_statistics_double.h>
50 
51 namespace theplu {
52 namespace yat {
53 namespace statistics {
54 
60  template <typename T, typename ForwardIterator>
61  void add(T& o, ForwardIterator first, ForwardIterator last,
62  const classifier::Target& target);
63 
79  template<typename BidirectionalIterator1, typename BidirectionalIterator2>
80  void benjamini_hochberg(BidirectionalIterator1 first,
81  BidirectionalIterator1 last,
82  BidirectionalIterator2 result);
83 
84 
96  double cdf_hypergeometric_P(unsigned int k, unsigned int n1,
97  unsigned int n2, unsigned int t);
98 
99 
115  double pearson_p_value(double r, unsigned int n);
116 
124  double kurtosis(const utility::VectorBase&);
125 
126 
138  template <class RandomAccessIterator>
139  double mad(RandomAccessIterator first, RandomAccessIterator last,
140  bool sorted=false);
141 
142 
158  template <class RandomAccessIterator>
159  double median(RandomAccessIterator first, RandomAccessIterator last,
160  bool sorted=false);
161 
162 
188  template <class RandomAccessIterator>
189  double percentile(RandomAccessIterator first, RandomAccessIterator last,
190  double p, bool sorted=false) YAT_DEPRECATE;
191 
197  template <class RandomAccessIterator>
198  double percentile2(RandomAccessIterator first, RandomAccessIterator last,
199  double p, bool sorted=false)
200  {
201  Percentiler percentiler(p, sorted);
202  return percentiler(first, last);
203  }
204 
211  double skewness(const utility::VectorBase&);
212 
213 
215 
216  template <typename T, typename ForwardIterator>
217  void add(T& o, ForwardIterator first, ForwardIterator last,
218  const classifier::Target& target)
219  {
221  for (size_t i=0; first!=last; ++i, ++first)
222  o.add(traits.data(first), target.binary(i), traits.weight(first));
223  }
224 
225 
226  template<typename BidirectionalIterator1, typename BidirectionalIterator2>
227  void benjamini_hochberg(BidirectionalIterator1 first,
228  BidirectionalIterator1 last,
229  BidirectionalIterator2 result)
230  {
231  using boost::Mutable_BidirectionalIterator;
232  BOOST_CONCEPT_ASSERT((boost::BidirectionalIterator<BidirectionalIterator1>));
233  BOOST_CONCEPT_ASSERT((Mutable_BidirectionalIterator<BidirectionalIterator2>));
234  size_t n = std::distance(first, last);
235  if (!n)
236  return;
237  std::advance(result, n-1);
238  first = last;
239  --first;
240  size_t rank = n;
241 
242  double prev = 1.0;
243  while (rank) {
244  *result = std::min(*first * n/static_cast<double>(rank), prev);
245  prev = *result;
246  --rank;
247  --first;
248  --result;
249  }
250  }
251 
252 
253  template <class RandomAccessIterator>
254  double mad(RandomAccessIterator first, RandomAccessIterator last,
255  bool sorted)
256  {
257  BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
259  double m = median(first, last, sorted);
260  typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
261  std::vector<T> ad(std::distance(first, last));
262  // copy weights if needed
263  normalizer::detail::copy_weight_if_weighted(first, last, ad.begin());
264  // assign data (absolute deviation from median)
267  while (first!=last) {
268  *first2 = std::abs(traits.data(first)-m);
269  ++first;
270  ++first2;
271  }
272  std::sort(ad.begin(), ad.end());
273  return median(ad.begin(), ad.end(), true);
274  }
275 
276 
277  template <class RandomAccessIterator>
278  double median(RandomAccessIterator first, RandomAccessIterator last,
279  bool sorted)
280  {
281  return percentile2(first, last, 50.0, sorted);
282  }
283 
284 
285  template <class RandomAccessIterator>
286  double percentile(RandomAccessIterator first, RandomAccessIterator last,
287  double p, bool sorted)
288  {
289  BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
291  // range is one value only is a special case
292  if (first+1 == last)
294  if (sorted) {
295  // have to take care of this special case
296  if (p>=100)
298  double j = p/100 * (std::distance(first,last)-1);
299  int i = static_cast<int>(j);
300  return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
301  }
302  using std::iterator_traits;
303  typedef typename iterator_traits<RandomAccessIterator>::value_type value_t;
304  std::vector<value_t> v_copy;
305  v_copy.reserve(std::distance(first,last));
306  std::copy(first, last, std::back_inserter(v_copy));
307  size_t i = static_cast<size_t>(p/100 * (v_copy.size()-1));
308  if (i+2 < v_copy.size()) {
309  std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
310  }
311  else
312  std::sort(v_copy.begin(), v_copy.end());
313  return percentile(v_copy.begin(), v_copy.end(), p, true);
314  }
315 
316 
317 }}} // of namespace statistics, yat, and theplu
318 
319 #endif

Generated on Mon Nov 11 2013 09:41:44 for yat by  doxygen 1.8.1