3512 |
22 Jul 16 |
peter |
1 |
#ifndef _theplu_yat_statistics_tukey_biweight_estimator |
2510 |
08 Jul 11 |
peter |
2 |
#define _theplu_yat_statistics_tukey_biweight_estimator |
2510 |
08 Jul 11 |
peter |
3 |
|
2510 |
08 Jul 11 |
peter |
// $Id$ |
2510 |
08 Jul 11 |
peter |
5 |
|
2510 |
08 Jul 11 |
peter |
6 |
/* |
3875 |
05 Mar 20 |
peter |
Copyright (C) 2011, 2012, 2014, 2016, 2020 Peter Johansson |
2510 |
08 Jul 11 |
peter |
8 |
|
2510 |
08 Jul 11 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2510 |
08 Jul 11 |
peter |
10 |
|
2510 |
08 Jul 11 |
peter |
The yat library is free software; you can redistribute it and/or |
2510 |
08 Jul 11 |
peter |
modify it under the terms of the GNU General Public License as |
2510 |
08 Jul 11 |
peter |
published by the Free Software Foundation; either version 3 of the |
2510 |
08 Jul 11 |
peter |
License, or (at your option) any later version. |
2510 |
08 Jul 11 |
peter |
15 |
|
2510 |
08 Jul 11 |
peter |
The yat library is distributed in the hope that it will be useful, |
2510 |
08 Jul 11 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2510 |
08 Jul 11 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2510 |
08 Jul 11 |
peter |
General Public License for more details. |
2510 |
08 Jul 11 |
peter |
20 |
|
2510 |
08 Jul 11 |
peter |
You should have received a copy of the GNU General Public License |
2510 |
08 Jul 11 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2510 |
08 Jul 11 |
peter |
23 |
*/ |
2510 |
08 Jul 11 |
peter |
24 |
|
2510 |
08 Jul 11 |
peter |
25 |
#include "AveragerWeighted.h" |
2510 |
08 Jul 11 |
peter |
26 |
#include "utility.h" |
2510 |
08 Jul 11 |
peter |
27 |
|
2510 |
08 Jul 11 |
peter |
28 |
#include "yat/regression/TukeyBiweight.h" |
2510 |
08 Jul 11 |
peter |
29 |
|
2510 |
08 Jul 11 |
peter |
30 |
#include "yat/utility/concept_check.h" |
2510 |
08 Jul 11 |
peter |
31 |
#include "yat/utility/Exception.h" |
2510 |
08 Jul 11 |
peter |
32 |
#include "yat/utility/iterator_traits.h" |
2510 |
08 Jul 11 |
peter |
33 |
|
2510 |
08 Jul 11 |
peter |
34 |
#include <boost/concept_check.hpp> |
3514 |
22 Jul 16 |
peter |
35 |
#include <boost/iterator/iterator_concepts.hpp> |
2510 |
08 Jul 11 |
peter |
36 |
|
2510 |
08 Jul 11 |
peter |
37 |
#include <algorithm> |
2510 |
08 Jul 11 |
peter |
38 |
#include <iterator> |
2510 |
08 Jul 11 |
peter |
39 |
#include <limits> |
2510 |
08 Jul 11 |
peter |
40 |
#include <vector> |
2510 |
08 Jul 11 |
peter |
41 |
|
2510 |
08 Jul 11 |
peter |
42 |
namespace theplu { |
2510 |
08 Jul 11 |
peter |
43 |
namespace yat { |
3512 |
22 Jul 16 |
peter |
44 |
namespace statistics { |
2510 |
08 Jul 11 |
peter |
45 |
|
2510 |
08 Jul 11 |
peter |
46 |
/** |
2510 |
08 Jul 11 |
peter |
\brief Tukey's Biweight Estimator |
2510 |
08 Jul 11 |
peter |
48 |
|
2510 |
08 Jul 11 |
peter |
Calculates the estimate as a weighted sum |
2510 |
08 Jul 11 |
peter |
\f$ m = \frac{\sum w_i(1-u_i^2)^2x_i}{\sum w_i(1-u_i^2)^2 } \f$ |
2510 |
08 Jul 11 |
peter |
51 |
|
2511 |
08 Jul 11 |
peter |
where the sums run over data points with |u|<1 and u is calculated as |
2510 |
08 Jul 11 |
peter |
53 |
|
2510 |
08 Jul 11 |
peter |
\f$ u_i = \frac{x_i-m}{k*\textrm{mad}} \f$ |
2510 |
08 Jul 11 |
peter |
55 |
|
2814 |
16 Aug 12 |
peter |
As the estimate \a m also appears on right-hand side, the estimate |
2510 |
08 Jul 11 |
peter |
is calculated in an iterative manner. |
2510 |
08 Jul 11 |
peter |
58 |
|
2510 |
08 Jul 11 |
peter |
\see regression::TukeyBiweight |
2510 |
08 Jul 11 |
peter |
\see mad |
2510 |
08 Jul 11 |
peter |
61 |
|
2510 |
08 Jul 11 |
peter |
\since New in yat 0.8 |
2510 |
08 Jul 11 |
peter |
63 |
*/ |
2510 |
08 Jul 11 |
peter |
64 |
class TukeyBiweightEstimator |
2510 |
08 Jul 11 |
peter |
65 |
{ |
2510 |
08 Jul 11 |
peter |
66 |
public: |
2510 |
08 Jul 11 |
peter |
67 |
/** |
3917 |
31 May 20 |
peter |
\since New in yat 0.18 |
3917 |
31 May 20 |
peter |
69 |
*/ |
3917 |
31 May 20 |
peter |
70 |
class Stopper |
3917 |
31 May 20 |
peter |
71 |
{ |
3917 |
31 May 20 |
peter |
72 |
public: |
3917 |
31 May 20 |
peter |
73 |
/** |
3917 |
31 May 20 |
peter |
Default constructor sets max epochs to 1000 |
3917 |
31 May 20 |
peter |
75 |
*/ |
3917 |
31 May 20 |
peter |
76 |
Stopper(size_t max_epochs=1000); |
3917 |
31 May 20 |
peter |
77 |
/** |
3917 |
31 May 20 |
peter |
\return true if \a current is sufficiently close to \a previous |
3917 |
31 May 20 |
peter |
79 |
*/ |
3917 |
31 May 20 |
peter |
80 |
virtual bool operator()(double current, double previous) const=0; |
3917 |
31 May 20 |
peter |
81 |
protected: |
3917 |
31 May 20 |
peter |
/// \return number of epochs |
3917 |
31 May 20 |
peter |
83 |
size_t epochs(void) const; |
3917 |
31 May 20 |
peter |
/// \return number of max epochs |
3917 |
31 May 20 |
peter |
85 |
size_t max_epochs(void) const; |
3917 |
31 May 20 |
peter |
86 |
private: |
3917 |
31 May 20 |
peter |
87 |
size_t epochs_; |
3917 |
31 May 20 |
peter |
88 |
size_t max_epochs_; |
3925 |
05 Jul 20 |
peter |
89 |
friend class TukeyBiweightEstimator; |
3917 |
31 May 20 |
peter |
90 |
bool stop(double current, double previous); |
3917 |
31 May 20 |
peter |
91 |
}; |
3917 |
31 May 20 |
peter |
92 |
|
3917 |
31 May 20 |
peter |
93 |
/** |
2510 |
08 Jul 11 |
peter |
\brief Constructor |
2510 |
08 Jul 11 |
peter |
95 |
|
2510 |
08 Jul 11 |
peter |
\param cutoff defines how close to the center (estimate) a data |
2510 |
08 Jul 11 |
peter |
point needs to be (in terms of MADs) to influence the estimate. |
3242 |
24 May 14 |
peter |
\param sorted if true object presumes input is a sorted range; |
3242 |
24 May 14 |
peter |
otherwise object creates a sorted copy of the range and base |
3242 |
24 May 14 |
peter |
the computation on that. |
2510 |
08 Jul 11 |
peter |
101 |
*/ |
2510 |
08 Jul 11 |
peter |
102 |
explicit TukeyBiweightEstimator(double cutoff=4.685, bool sorted=false) |
2510 |
08 Jul 11 |
peter |
103 |
: cutoff_(cutoff), sorted_(sorted) {} |
2510 |
08 Jul 11 |
peter |
104 |
|
2510 |
08 Jul 11 |
peter |
105 |
/** |
3512 |
22 Jul 16 |
peter |
Type Requirements: |
3512 |
22 Jul 16 |
peter |
- \c RandomAccessIterator must be a \ref |
3512 |
22 Jul 16 |
peter |
concept_data_iterator |
3512 |
22 Jul 16 |
peter |
- \c RandomAccessIterator must be a \random_access_traversal_iterator |
3870 |
24 Feb 20 |
peter |
- \c For unweighted iterator, RandomAccessIterator must be an |
3870 |
24 Feb 20 |
peter |
\lvalue_iterator as implied by \forward_iterator. |
3512 |
22 Jul 16 |
peter |
112 |
|
2510 |
08 Jul 11 |
peter |
\return Tukey's Biweight Estimate |
2510 |
08 Jul 11 |
peter |
114 |
*/ |
2510 |
08 Jul 11 |
peter |
115 |
template<typename RandomAccessIterator> |
3512 |
22 Jul 16 |
peter |
116 |
double operator()(RandomAccessIterator first, |
2510 |
08 Jul 11 |
peter |
117 |
RandomAccessIterator last) const; |
2510 |
08 Jul 11 |
peter |
118 |
|
3917 |
31 May 20 |
peter |
119 |
/** |
3917 |
31 May 20 |
peter |
\since New in yat 0.18 |
3917 |
31 May 20 |
peter |
121 |
*/ |
3917 |
31 May 20 |
peter |
122 |
template<typename RandomAccessIterator> |
3917 |
31 May 20 |
peter |
123 |
double operator()(RandomAccessIterator first, |
3917 |
31 May 20 |
peter |
124 |
RandomAccessIterator last, |
3917 |
31 May 20 |
peter |
125 |
Stopper& stopper) const; |
3917 |
31 May 20 |
peter |
126 |
|
2510 |
08 Jul 11 |
peter |
127 |
private: |
2510 |
08 Jul 11 |
peter |
128 |
double cutoff_; |
2510 |
08 Jul 11 |
peter |
129 |
bool sorted_; |
2510 |
08 Jul 11 |
peter |
130 |
|
2510 |
08 Jul 11 |
peter |
131 |
template<typename RandomAccessIterator> |
3512 |
22 Jul 16 |
peter |
132 |
double estimate(RandomAccessIterator first, |
3917 |
31 May 20 |
peter |
133 |
RandomAccessIterator last, |
3917 |
31 May 20 |
peter |
134 |
Stopper& stopper) const; |
2510 |
08 Jul 11 |
peter |
135 |
|
2510 |
08 Jul 11 |
peter |
136 |
template<typename InputIterator> |
3512 |
22 Jul 16 |
peter |
137 |
double estimate(InputIterator first, InputIterator last, |
2510 |
08 Jul 11 |
peter |
138 |
double center, double spread) const; |
3917 |
31 May 20 |
peter |
139 |
|
3917 |
31 May 20 |
peter |
140 |
class DefaultStopper : public Stopper |
3917 |
31 May 20 |
peter |
141 |
{ |
3917 |
31 May 20 |
peter |
142 |
public: |
3917 |
31 May 20 |
peter |
143 |
bool operator()(double current, double previous) const; |
3917 |
31 May 20 |
peter |
144 |
}; |
2510 |
08 Jul 11 |
peter |
145 |
}; |
2510 |
08 Jul 11 |
peter |
146 |
|
3917 |
31 May 20 |
peter |
147 |
|
2510 |
08 Jul 11 |
peter |
148 |
template<typename RandomAccessIterator> |
3512 |
22 Jul 16 |
peter |
149 |
double TukeyBiweightEstimator::operator()(RandomAccessIterator first, |
2510 |
08 Jul 11 |
peter |
150 |
RandomAccessIterator last) const |
2510 |
08 Jul 11 |
peter |
151 |
{ |
3917 |
31 May 20 |
peter |
152 |
TukeyBiweightEstimator::DefaultStopper stopper; |
3917 |
31 May 20 |
peter |
153 |
return (*this)(first, last, stopper); |
3917 |
31 May 20 |
peter |
154 |
} |
3917 |
31 May 20 |
peter |
155 |
|
3917 |
31 May 20 |
peter |
156 |
|
3917 |
31 May 20 |
peter |
157 |
template<typename RandomAccessIterator> |
3917 |
31 May 20 |
peter |
158 |
double |
3917 |
31 May 20 |
peter |
159 |
TukeyBiweightEstimator::operator()(RandomAccessIterator first, |
3917 |
31 May 20 |
peter |
160 |
RandomAccessIterator last, |
3917 |
31 May 20 |
peter |
161 |
TukeyBiweightEstimator::Stopper& stopper) const |
3917 |
31 May 20 |
peter |
162 |
{ |
3512 |
22 Jul 16 |
peter |
163 |
using boost_concepts::RandomAccessTraversal; |
3512 |
22 Jul 16 |
peter |
164 |
BOOST_CONCEPT_ASSERT((RandomAccessTraversal<RandomAccessIterator>)); |
2510 |
08 Jul 11 |
peter |
165 |
using utility::DataIteratorConcept; |
2510 |
08 Jul 11 |
peter |
166 |
BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>)); |
2510 |
08 Jul 11 |
peter |
167 |
|
2510 |
08 Jul 11 |
peter |
168 |
if (sorted_) |
3917 |
31 May 20 |
peter |
169 |
return estimate(first, last, stopper); |
2510 |
08 Jul 11 |
peter |
170 |
|
2510 |
08 Jul 11 |
peter |
// if not sorted, create a sorted copy |
2510 |
08 Jul 11 |
peter |
172 |
typedef typename std::iterator_traits<RandomAccessIterator> traits; |
3241 |
24 May 14 |
peter |
173 |
std::vector<typename traits::value_type> vec(first, last); |
2510 |
08 Jul 11 |
peter |
174 |
std::sort(vec.begin(), vec.end()); |
3917 |
31 May 20 |
peter |
175 |
return estimate(vec.begin(), vec.end(), stopper); |
2510 |
08 Jul 11 |
peter |
176 |
} |
2510 |
08 Jul 11 |
peter |
177 |
|
2510 |
08 Jul 11 |
peter |
178 |
|
2510 |
08 Jul 11 |
peter |
179 |
template<typename RandomAccessIterator> |
3917 |
31 May 20 |
peter |
180 |
double |
3917 |
31 May 20 |
peter |
181 |
TukeyBiweightEstimator::estimate(RandomAccessIterator first, |
3917 |
31 May 20 |
peter |
182 |
RandomAccessIterator last, |
3917 |
31 May 20 |
peter |
183 |
TukeyBiweightEstimator::Stopper& stopper) const |
2510 |
08 Jul 11 |
peter |
184 |
{ |
2510 |
08 Jul 11 |
peter |
185 |
const double scale = mad(first, last, true); |
2510 |
08 Jul 11 |
peter |
186 |
double m = median(first, last, true); |
2510 |
08 Jul 11 |
peter |
// if mad is zero all (non-zero weight) data points are equal and |
2510 |
08 Jul 11 |
peter |
// median is the only sensible estimate. Also, calculations below |
2510 |
08 Jul 11 |
peter |
// would "divide by zero" so we need to interupt here. |
2510 |
08 Jul 11 |
peter |
190 |
if (scale==0) |
2510 |
08 Jul 11 |
peter |
191 |
return m; |
2510 |
08 Jul 11 |
peter |
192 |
double m0 = m+1.0; |
3917 |
31 May 20 |
peter |
193 |
while (!stopper.stop(m, m0)) { |
2510 |
08 Jul 11 |
peter |
194 |
m0 = m; |
2510 |
08 Jul 11 |
peter |
195 |
m = estimate(first, last, m, scale); |
2510 |
08 Jul 11 |
peter |
196 |
} |
2510 |
08 Jul 11 |
peter |
197 |
return m; |
2510 |
08 Jul 11 |
peter |
198 |
} |
2510 |
08 Jul 11 |
peter |
199 |
|
2510 |
08 Jul 11 |
peter |
200 |
|
2510 |
08 Jul 11 |
peter |
201 |
template<typename InputIterator> |
3512 |
22 Jul 16 |
peter |
202 |
double TukeyBiweightEstimator::estimate(InputIterator first, |
3512 |
22 Jul 16 |
peter |
203 |
InputIterator last, |
2510 |
08 Jul 11 |
peter |
204 |
double center, double spread) const |
2510 |
08 Jul 11 |
peter |
205 |
{ |
2510 |
08 Jul 11 |
peter |
206 |
double scale = spread*cutoff_; |
2510 |
08 Jul 11 |
peter |
207 |
AveragerWeighted averager; |
2510 |
08 Jul 11 |
peter |
208 |
regression::TukeyBiweight biweight; |
2510 |
08 Jul 11 |
peter |
209 |
utility::iterator_traits<InputIterator> traits; |
2510 |
08 Jul 11 |
peter |
210 |
for ( ; first!=last; ++first) { |
2510 |
08 Jul 11 |
peter |
211 |
double x = traits.data(first); |
2510 |
08 Jul 11 |
peter |
212 |
double w = traits.weight(first) * biweight((x-center)/scale); |
3512 |
22 Jul 16 |
peter |
213 |
averager.add(x, w); |
2510 |
08 Jul 11 |
peter |
214 |
} |
2510 |
08 Jul 11 |
peter |
215 |
return averager.mean(); |
2510 |
08 Jul 11 |
peter |
216 |
} |
2510 |
08 Jul 11 |
peter |
217 |
|
2510 |
08 Jul 11 |
peter |
218 |
}}} // end of namespace theplu yat statistics |
2510 |
08 Jul 11 |
peter |
219 |
# endif |