yat  0.14.5pre
Percentiler.h
1 #ifndef _theplu_yat_statistics_percentiler_
2 #define _theplu_yat_statistics_percentiler_
3 
4 // $Id: Percentiler.h 3550 2017-01-03 05:41:02Z peter $
5 
6 /*
7  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2014, 2016 Peter Johansson
9 
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11 
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16 
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24 */
25 
26 #include "yat/utility/concept_check.h"
27 #include "yat/utility/DataWeight.h"
28 #include "yat/utility/iterator_traits.h"
29 #include "yat/utility/yat_assert.h"
30 #include "yat/utility/WeightIterator.h"
31 
32 #include <boost/concept_check.hpp>
33 #include <boost/iterator/iterator_concepts.hpp>
34 
35 #include <algorithm>
36 #include <cmath>
37 #include <limits>
38 #include <numeric>
39 #include <stdexcept>
40 #include <vector>
41 
42 namespace theplu {
43 namespace yat {
44 namespace statistics {
45 
52  {
53  public:
61  Percentiler(double perc=50, bool sorted=false);
62 
99  template<typename RandomAccessIterator>
100  double operator()(RandomAccessIterator first,
101  RandomAccessIterator last) const
102  {
104  BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
105  if (first==last)
106  return std::numeric_limits<double>::quiet_NaN();
107  // just to avoid long line
109  typedef typename weighted_iterator_traits<RandomAccessIterator>::type tag;
110  return calculate(first, last, sorted_, tag());
111  }
112  private:
113  double perc_;
114  bool sorted_;
115 
116  // unweighted version
117  template<typename RandomAccessIterator>
118  double calculate(RandomAccessIterator first, RandomAccessIterator last,
119  bool sorted, utility::unweighted_iterator_tag tag) const;
120 
121  // weighted version
122  template<typename RandomAccessIterator>
123  double calculate(RandomAccessIterator first, RandomAccessIterator last,
124  bool sorted, utility::weighted_iterator_tag tag) const;
125 
126  // using compiler generated copy
127  //Percentiler(const Percentiler&);
128  //Percentiler& operator=(const Percentiler&);
129 
130  };
131 
132  // template implementations
133  // /////////////////////////
134 
135  // unweighted version
136  template<typename RandomAccessIterator>
137  double
138  Percentiler::calculate(RandomAccessIterator first,
139  RandomAccessIterator last,
140  bool sorted,
142  {
143  size_t n = last - first;
144  // range is one value only is a special case
145  if (n == 1)
146  *first;
147 
148  double j = n * perc_ / 100.0;
149 
150  // Some special cases
151  if (j > n-1) {
152  if (sorted)
153  return *(--last);
154  return *std::max_element(first, last);
155  }
156  if (j < 1.0) {
157  if (sorted)
158  return *first;
159  return *std::min_element(first, last);
160  }
161 
162  size_t i = static_cast<size_t>(j);
163  // for border case (j integer) we take the average of adjacent bins
164  size_t k = (i==j) ? i-1 : i;
165 
166  if (sorted) {
167  if (i==k)
168  return first[i];
169  return (first[i]+first[k])/2;
170  }
171 
172  std::vector<double> vec(first, last);
173  // find the ith element
174  std::nth_element(vec.begin(), vec.begin()+i, vec.end());
175  // if i==k simply return vec[i]
176  if (i==k)
177  return vec[i];
178  YAT_ASSERT(k==i-1);
179  // otherwise return average of vec[i] and vec[k].
180 
181  // We need to find the kth element. Since we have called
182  // nth_element above, we know that all elements in (begin+i, end)
183  // are >= vec[i] and all elements in [begin, begin+i) are <=
184  // vec[i]. Consequently, kth (k=i-1) element is the max element in
185  // range [begin, begin+i).
186  return (vec[i] + *std::max_element(vec.begin(), vec.begin()+i))/2;
187  }
188 
189 
190  // weighted version
191  template<typename RandomAccessIterator>
192  double Percentiler::calculate(RandomAccessIterator first,
193  RandomAccessIterator last,
194  bool sorted,
195  utility::weighted_iterator_tag tag) const
196  {
197  if (sorted) {
198  utility::iterator_traits<RandomAccessIterator> trait;
199  std::vector<double> accum_w;
200  accum_w.reserve(last-first);
201  std::partial_sum(weight_iterator(first),
202  weight_iterator(last),
203  std::back_inserter(accum_w));
204 
205  double w_bound=perc_/100.0*accum_w.back();
206  std::vector<double>::const_iterator upper(accum_w.begin());
207  double margin=1e-10;
208  while (upper!=accum_w.end() && *upper <= w_bound+margin)
209  ++upper;
210  while (upper!=accum_w.begin() &&
211  (upper==accum_w.end() ||
212  trait.weight(first+(upper-accum_w.begin()))==0.0))
213  --upper;
214  std::vector<double>::const_iterator lower(upper);
215  while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
216  --lower;
217 
218  return (trait.data(first+(upper-accum_w.begin()))+
219  trait.data(first+(lower-accum_w.begin())))/2;
220  }
221 
222  std::vector<utility::DataWeight> v_copy(first, last);
223  std::sort(v_copy.begin(), v_copy.end());
224  return calculate(v_copy.begin(), v_copy.end(), true, tag);
225  }
226 
227 
228 }}} // of namespace statistics, yat, and theplu
229 
230 #endif
Concept check for Data Iterator.
Definition: concept_check.h:228
Definition: iterator_traits.h:47
Definition: iterator_traits.h:55
Definition: iterator_traits.h:105
Percentiler(double perc=50, bool sorted=false)
Functor to calculate percentile of a range.
Definition: Percentiler.h:51
double operator()(RandomAccessIterator first, RandomAccessIterator last) const
Definition: Percentiler.h:100

Generated on Tue Sep 26 2017 02:33:29 for yat by  doxygen 1.8.5