yat  0.13.2pre
Percentiler.h
1 #ifndef _theplu_yat_statistics_percentiler_
2 #define _theplu_yat_statistics_percentiler_
3 
4 // $Id: Percentiler.h 3330 2014-10-14 08:03:25Z peter $
5 
6 /*
7  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2014 Peter Johansson
9 
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11 
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16 
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24 */
25 
26 #include "yat/utility/concept_check.h"
27 #include "yat/utility/DataWeight.h"
28 #include "yat/utility/iterator_traits.h"
29 #include "yat/utility/yat_assert.h"
30 #include "yat/utility/WeightIterator.h"
31 
32 #include <boost/concept_check.hpp>
33 
34 #include <algorithm>
35 #include <cmath>
36 #include <limits>
37 #include <numeric>
38 #include <stdexcept>
39 #include <vector>
40 
41 namespace theplu {
42 namespace yat {
43 namespace statistics {
44 
51  {
52  public:
60  Percentiler(double perc=50, bool sorted=false);
61 
96  template<typename RandomAccessIterator>
97  double operator()(RandomAccessIterator first,
98  RandomAccessIterator last) const
99  {
100  BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
102  if (first==last)
103  return std::numeric_limits<double>::quiet_NaN();
104  // just to avoid long line
106  typedef typename weighted_iterator_traits<RandomAccessIterator>::type tag;
107  return calculate(first, last, sorted_, tag());
108  }
109  private:
110  double perc_;
111  bool sorted_;
112 
113  // unweighted version
114  template<typename RandomAccessIterator>
115  double calculate(RandomAccessIterator first, RandomAccessIterator last,
116  bool sorted, utility::unweighted_iterator_tag tag) const;
117 
118  // weighted version
119  template<typename RandomAccessIterator>
120  double calculate(RandomAccessIterator first, RandomAccessIterator last,
121  bool sorted, utility::weighted_iterator_tag tag) const;
122 
123  // using compiler generated copy
124  //Percentiler(const Percentiler&);
125  //Percentiler& operator=(const Percentiler&);
126 
127  };
128 
129  // template implementations
130  // /////////////////////////
131 
132  // unweighted version
133  template<typename RandomAccessIterator>
134  double
135  Percentiler::calculate(RandomAccessIterator first,
136  RandomAccessIterator last,
137  bool sorted,
139  {
140  size_t n = last - first;
141  // range is one value only is a special case
142  if (n == 1)
143  *first;
144 
145  double j = n * perc_ / 100.0;
146 
147  // Some special cases
148  if (j > n-1) {
149  if (sorted)
150  return *(--last);
151  return *std::max_element(first, last);
152  }
153  if (j < 1.0) {
154  if (sorted)
155  return *first;
156  return *std::min_element(first, last);
157  }
158 
159  size_t i = static_cast<size_t>(j);
160  // for border case (j integer) we take the average of adjacent bins
161  size_t k = (i==j) ? i-1 : i;
162 
163  if (sorted) {
164  if (i==k)
165  return first[i];
166  return (first[i]+first[k])/2;
167  }
168 
169  std::vector<double> vec(first, last);
170  // find the ith element
171  std::nth_element(vec.begin(), vec.begin()+i, vec.end());
172  // if i==k simply return vec[i]
173  if (i==k)
174  return vec[i];
175  YAT_ASSERT(k==i-1);
176  // otherwise return average of vec[i] and vec[k].
177 
178  // We need to find the kth element. Since we have called
179  // nth_element above, we know that all elements in (begin+i, end)
180  // are >= vec[i] and all elements in [begin, begin+i) are <=
181  // vec[i]. Consequently, kth (k=i-1) element is the max element in
182  // range [begin, begin+i).
183  return (vec[i] + *std::max_element(vec.begin(), vec.begin()+i))/2;
184  }
185 
186 
187  // weighted version
188  template<typename RandomAccessIterator>
189  double Percentiler::calculate(RandomAccessIterator first,
190  RandomAccessIterator last,
191  bool sorted,
192  utility::weighted_iterator_tag tag) const
193  {
194  if (sorted) {
195  utility::iterator_traits<RandomAccessIterator> trait;
196  std::vector<double> accum_w;
197  accum_w.reserve(last-first);
198  std::partial_sum(weight_iterator(first),
199  weight_iterator(last),
200  std::back_inserter(accum_w));
201 
202  double w_bound=perc_/100.0*accum_w.back();
203  std::vector<double>::const_iterator upper(accum_w.begin());
204  double margin=1e-10;
205  while (upper!=accum_w.end() && *upper <= w_bound+margin)
206  ++upper;
207  while (upper!=accum_w.begin() &&
208  (upper==accum_w.end() ||
209  trait.weight(first+(upper-accum_w.begin()))==0.0))
210  --upper;
211  std::vector<double>::const_iterator lower(upper);
212  while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
213  --lower;
214 
215  return (trait.data(first+(upper-accum_w.begin()))+
216  trait.data(first+(lower-accum_w.begin())))/2;
217  }
218 
219  std::vector<utility::DataWeight> v_copy(first, last);
220  std::sort(v_copy.begin(), v_copy.end());
221  return calculate(v_copy.begin(), v_copy.end(), true, tag);
222  }
223 
224 
225 }}} // of namespace statistics, yat, and theplu
226 
227 #endif
Concept check for Data Iterator.
Definition: concept_check.h:226
Definition: iterator_traits.h:47
Definition: iterator_traits.h:55
Definition: iterator_traits.h:105
Percentiler(double perc=50, bool sorted=false)
Functor to calculate percentile of a range.
Definition: Percentiler.h:50
double operator()(RandomAccessIterator first, RandomAccessIterator last) const
Definition: Percentiler.h:97

Generated on Wed Jan 4 2017 02:23:07 for yat by  doxygen 1.8.5