yat  0.21pre
Percentiler.h
1 #ifndef _theplu_yat_statistics_percentiler_
2 #define _theplu_yat_statistics_percentiler_
3 
4 // $Id: Percentiler.h 4207 2022-08-26 04:36:28Z peter $
5 
6 /*
7  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2014, 2016, 2020, 2022 Peter Johansson
9 
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11 
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16 
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24 */
25 
26 #include "yat/utility/concept_check.h"
27 #include "yat/utility/DataWeight.h"
28 #include "yat/utility/iterator_traits.h"
29 #include "yat/utility/yat_assert.h"
30 #include "yat/utility/WeightIterator.h"
31 
32 #include <boost/concept_check.hpp>
33 #include <boost/iterator/iterator_concepts.hpp>
34 
35 #include <algorithm>
36 #include <cmath>
37 #include <limits>
38 #include <numeric>
39 #include <stdexcept>
40 #include <vector>
41 
42 namespace theplu {
43 namespace yat {
44 namespace statistics {
45 
52  {
53  public:
61  Percentiler(double perc=50, bool sorted=false);
62 
101  template<typename RandomAccessIterator>
102  double operator()(RandomAccessIterator first,
103  RandomAccessIterator last) const
104  {
106  BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
107  //
108  if (first==last)
109  return std::numeric_limits<double>::quiet_NaN();
110  // just to avoid long line
112  typedef typename weighted_iterator_traits<RandomAccessIterator>::type tag;
113  return calculate(first, last, sorted_, tag());
114  }
115  private:
116  double perc_;
117  bool sorted_;
118 
119  // unweighted version
120  template<typename RandomAccessIterator>
121  double calculate(RandomAccessIterator first, RandomAccessIterator last,
122  bool sorted, utility::unweighted_iterator_tag tag) const;
123 
124  // weighted version
125  template<typename RandomAccessIterator>
126  double calculate(RandomAccessIterator first, RandomAccessIterator last,
127  bool sorted, utility::weighted_iterator_tag tag) const;
128 
129  // using compiler generated copy
130  //Percentiler(const Percentiler&);
131  //Percentiler& operator=(const Percentiler&);
132 
133  };
134 
135  // template implementations
136  // /////////////////////////
137 
138  // unweighted version
139  template<typename RandomAccessIterator>
140  double
141  Percentiler::calculate(RandomAccessIterator first,
142  RandomAccessIterator last,
143  bool sorted,
145  {
146  BOOST_CONCEPT_ASSERT((boost_concepts::LvalueIterator<RandomAccessIterator>));
147  size_t n = last - first;
148  // range is one value only is a special case
149  if (n == 1)
150  *first;
151 
152  double j = n * perc_ / 100.0;
153 
154  // Some special cases
155  if (j > n-1) {
156  if (sorted)
157  return *(--last);
158  return *std::max_element(first, last);
159  }
160  if (j < 1.0) {
161  if (sorted)
162  return *first;
163  return *std::min_element(first, last);
164  }
165 
166  size_t i = static_cast<size_t>(j);
167  // for border case (j integer) we take the average of adjacent bins
168  size_t k = (i==j) ? i-1 : i;
169 
170  if (sorted) {
171  if (i==k)
172  return first[i];
173  return (first[i]+first[k])/2;
174  }
175 
176  std::vector<double> vec(first, last);
177  // find the ith element
178  std::nth_element(vec.begin(), vec.begin()+i, vec.end());
179  // if i==k simply return vec[i]
180  if (i==k)
181  return vec[i];
182  YAT_ASSERT(k==i-1);
183  // otherwise return average of vec[i] and vec[k].
184 
185  // We need to find the kth element. Since we have called
186  // nth_element above, we know that all elements in (begin+i, end)
187  // are >= vec[i] and all elements in [begin, begin+i) are <=
188  // vec[i]. Consequently, kth (k=i-1) element is the max element in
189  // range [begin, begin+i).
190  return (vec[i] + *std::max_element(vec.begin(), vec.begin()+i))/2;
191  }
192 
193 
194  // weighted version
195  template<typename RandomAccessIterator>
196  double Percentiler::calculate(RandomAccessIterator first,
197  RandomAccessIterator last,
198  bool sorted,
199  utility::weighted_iterator_tag tag) const
200  {
201  if (sorted) {
202  utility::iterator_traits<RandomAccessIterator> trait;
203  std::vector<double> accum_w;
204  accum_w.reserve(last-first);
205  std::partial_sum(weight_iterator(first),
206  weight_iterator(last),
207  std::back_inserter(accum_w));
208 
209  double w_bound=perc_/100.0*accum_w.back();
210  std::vector<double>::const_iterator upper(accum_w.begin());
211  double margin=1e-10;
212  while (upper!=accum_w.end() && *upper <= w_bound+margin)
213  ++upper;
214  while (upper!=accum_w.begin() &&
215  (upper==accum_w.end() ||
216  trait.weight(first+(upper-accum_w.begin()))==0.0))
217  --upper;
218  std::vector<double>::const_iterator lower(upper);
219  while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
220  --lower;
221 
222  return (trait.data(first+(upper-accum_w.begin()))+
223  trait.data(first+(lower-accum_w.begin())))/2;
224  }
225 
226  std::vector<utility::DataWeight> v_copy(first, last);
227  std::sort(v_copy.begin(), v_copy.end());
228  return calculate(v_copy.begin(), v_copy.end(), true, tag);
229  }
230 
231 
232 }}} // of namespace statistics, yat, and theplu
233 
234 #endif
Concept check for Data Iterator.
Definition: concept_check.h:240
The Department of Theoretical Physics namespace as we define it.
Definition: iterator_traits.h:47
Definition: iterator_traits.h:55
Definition: iterator_traits.h:105
Percentiler(double perc=50, bool sorted=false)
double operator()(RandomAccessIterator first, RandomAccessIterator last) const
Definition: Percentiler.h:102
Functor to calculate percentile of a range.
Definition: Percentiler.h:51

Generated on Wed Jan 25 2023 03:34:29 for yat by  doxygen 1.8.14