yat
0.8.3pre
|
00001 #ifndef _theplu_yat_statistics_tukey_biweight_estimator 00002 #define _theplu_yat_statistics_tukey_biweight_estimator 00003 00004 // $Id: TukeyBiweightEstimator.h 2511 2011-07-08 23:44:33Z peter $ 00005 00006 /* 00007 Copyright (C) 2011 Peter Johansson 00008 00009 This file is part of the yat library, http://dev.thep.lu.se/yat 00010 00011 The yat library is free software; you can redistribute it and/or 00012 modify it under the terms of the GNU General Public License as 00013 published by the Free Software Foundation; either version 3 of the 00014 License, or (at your option) any later version. 00015 00016 The yat library is distributed in the hope that it will be useful, 00017 but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00019 General Public License for more details. 00020 00021 You should have received a copy of the GNU General Public License 00022 along with yat. If not, see <http://www.gnu.org/licenses/>. 00023 */ 00024 00025 #include "AveragerWeighted.h" 00026 #include "utility.h" 00027 00028 #include "yat/regression/TukeyBiweight.h" 00029 00030 #include "yat/utility/concept_check.h" 00031 #include "yat/utility/Exception.h" 00032 #include "yat/utility/iterator_traits.h" 00033 00034 #include <boost/concept_check.hpp> 00035 00036 #include <algorithm> 00037 #include <iterator> 00038 #include <limits> 00039 #include <vector> 00040 00041 namespace theplu { 00042 namespace yat { 00043 namespace statistics { 00044 00063 class TukeyBiweightEstimator 00064 { 00065 public: 00075 explicit TukeyBiweightEstimator(double cutoff=4.685, bool sorted=false) 00076 : cutoff_(cutoff), sorted_(sorted) {} 00077 00081 template<typename RandomAccessIterator> 00082 double operator()(RandomAccessIterator first, 00083 RandomAccessIterator last) const; 00084 00085 private: 00086 double cutoff_; 00087 bool sorted_; 00088 00089 template<typename RandomAccessIterator> 00090 double estimate(RandomAccessIterator first, 00091 RandomAccessIterator last) const; 00092 00093 template<typename InputIterator> 00094 double estimate(InputIterator first, InputIterator last, 00095 double center, double spread) const; 00096 }; 00097 00098 template<typename RandomAccessIterator> 00099 double TukeyBiweightEstimator::operator()(RandomAccessIterator first, 00100 RandomAccessIterator last) const 00101 { 00102 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>)); 00103 using utility::DataIteratorConcept; 00104 BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>)); 00105 00106 if (sorted_) 00107 return estimate(first, last); 00108 00109 // if not sorted, create a sorted copy 00110 typedef typename std::iterator_traits<RandomAccessIterator> traits; 00111 std::vector<typename traits::value_type> vec; 00112 vec.reserve(last-first); 00113 std::copy(first, last, std::back_inserter(vec)); 00114 std::sort(vec.begin(), vec.end()); 00115 return estimate(vec.begin(), vec.end()); 00116 00117 const double scale = mad(first, last, true); 00118 double m = median(first, last, true); 00119 if (scale==0) 00120 return m; 00121 // ensure m != m0 00122 double m0 = m+1.0; 00123 size_t epoch = 0; 00124 while (m!=m0) { 00125 m0 = m; 00126 m = estimate(first, last, m, scale); 00127 ++epoch; 00128 if (epoch>1000) 00129 utility::runtime_error("TukeyBiweightIterator: too many epochs"); 00130 } 00131 return m; 00132 } 00133 00134 00135 template<typename RandomAccessIterator> 00136 double TukeyBiweightEstimator::estimate(RandomAccessIterator first, 00137 RandomAccessIterator last) const 00138 { 00139 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>)); 00140 using utility::DataIteratorConcept; 00141 BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>)); 00142 00143 const double scale = mad(first, last, true); 00144 double m = median(first, last, true); 00145 // if mad is zero all (non-zero weight) data points are equal and 00146 // median is the only sensible estimate. Also, calculations below 00147 // would "divide by zero" so we need to interupt here. 00148 if (scale==0) 00149 return m; 00150 double m0 = m+1.0; 00151 size_t epoch = 0; 00152 // FIXME: let user define convergence requirement 00153 while (m!=m0) { 00154 m0 = m; 00155 m = estimate(first, last, m, scale); 00156 ++epoch; 00157 // FIXME: let user define maximal epochs 00158 if (epoch>1000) 00159 utility::runtime_error("TukeyBiweightIterator: too many epochs"); 00160 } 00161 return m; 00162 } 00163 00164 00165 template<typename InputIterator> 00166 double TukeyBiweightEstimator::estimate(InputIterator first, 00167 InputIterator last, 00168 double center, double spread) const 00169 { 00170 double scale = spread*cutoff_; 00171 AveragerWeighted averager; 00172 regression::TukeyBiweight biweight; 00173 utility::iterator_traits<InputIterator> traits; 00174 for ( ; first!=last; ++first) { 00175 double x = traits.data(first); 00176 double w = traits.weight(first) * biweight((x-center)/scale); 00177 averager.add(x, w); 00178 } 00179 return averager.mean(); 00180 } 00181 00182 }}} // end of namespace theplu yat statistics 00183 # endif