yat  0.8.3pre
TukeyBiweightEstimator.h
00001 #ifndef _theplu_yat_statistics_tukey_biweight_estimator 
00002 #define _theplu_yat_statistics_tukey_biweight_estimator
00003 
00004 // $Id: TukeyBiweightEstimator.h 2511 2011-07-08 23:44:33Z peter $
00005 
00006 /*
00007   Copyright (C) 2011 Peter Johansson
00008 
00009   This file is part of the yat library, http://dev.thep.lu.se/yat
00010 
00011   The yat library is free software; you can redistribute it and/or
00012   modify it under the terms of the GNU General Public License as
00013   published by the Free Software Foundation; either version 3 of the
00014   License, or (at your option) any later version.
00015 
00016   The yat library is distributed in the hope that it will be useful,
00017   but WITHOUT ANY WARRANTY; without even the implied warranty of
00018   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00019   General Public License for more details.
00020 
00021   You should have received a copy of the GNU General Public License
00022   along with yat. If not, see <http://www.gnu.org/licenses/>.
00023 */
00024 
00025 #include "AveragerWeighted.h"
00026 #include "utility.h"
00027 
00028 #include "yat/regression/TukeyBiweight.h"
00029 
00030 #include "yat/utility/concept_check.h"
00031 #include "yat/utility/Exception.h"
00032 #include "yat/utility/iterator_traits.h"
00033 
00034 #include <boost/concept_check.hpp>
00035 
00036 #include <algorithm>
00037 #include <iterator>
00038 #include <limits>
00039 #include <vector>
00040 
00041 namespace theplu {
00042 namespace yat {
00043 namespace statistics {  
00044 
00063   class TukeyBiweightEstimator
00064   {
00065   public:
00075     explicit TukeyBiweightEstimator(double cutoff=4.685, bool sorted=false)
00076       : cutoff_(cutoff), sorted_(sorted) {}
00077 
00081     template<typename RandomAccessIterator>
00082     double operator()(RandomAccessIterator first, 
00083                       RandomAccessIterator last) const;
00084 
00085   private:
00086     double cutoff_;
00087     bool sorted_;
00088 
00089     template<typename RandomAccessIterator>
00090     double estimate(RandomAccessIterator first, 
00091                     RandomAccessIterator last) const;
00092 
00093     template<typename InputIterator>
00094     double estimate(InputIterator first, InputIterator last, 
00095                     double center, double spread) const;
00096   };
00097 
00098   template<typename RandomAccessIterator>
00099   double TukeyBiweightEstimator::operator()(RandomAccessIterator first, 
00100                                             RandomAccessIterator last) const
00101   {
00102     BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
00103     using utility::DataIteratorConcept;
00104     BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>));
00105 
00106     if (sorted_)
00107       return estimate(first, last);
00108 
00109     // if not sorted, create a sorted copy
00110     typedef typename std::iterator_traits<RandomAccessIterator> traits;
00111     std::vector<typename traits::value_type> vec;
00112     vec.reserve(last-first);
00113     std::copy(first, last, std::back_inserter(vec));
00114     std::sort(vec.begin(), vec.end());
00115     return estimate(vec.begin(), vec.end());
00116 
00117     const double scale = mad(first, last, true);
00118     double m = median(first, last, true);
00119     if (scale==0)
00120       return m;
00121     // ensure m != m0
00122     double m0 = m+1.0;
00123     size_t epoch = 0;
00124     while (m!=m0) {
00125       m0 = m;
00126       m = estimate(first, last, m, scale);
00127       ++epoch;
00128       if (epoch>1000)
00129         utility::runtime_error("TukeyBiweightIterator: too many epochs");
00130     }
00131     return m;
00132   }
00133 
00134 
00135   template<typename RandomAccessIterator>
00136   double TukeyBiweightEstimator::estimate(RandomAccessIterator first, 
00137                                           RandomAccessIterator last) const
00138   {
00139     BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
00140     using utility::DataIteratorConcept;
00141     BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>));
00142 
00143     const double scale = mad(first, last, true);
00144     double m = median(first, last, true);
00145     // if mad is zero all (non-zero weight) data points are equal and
00146     // median is the only sensible estimate. Also, calculations below
00147     // would "divide by zero" so we need to interupt here.
00148     if (scale==0)
00149       return m;
00150     double m0 = m+1.0;
00151     size_t epoch = 0;
00152     // FIXME: let user define convergence requirement
00153     while (m!=m0) {
00154       m0 = m;
00155       m = estimate(first, last, m, scale);
00156       ++epoch;
00157       // FIXME: let user define maximal epochs
00158       if (epoch>1000)
00159         utility::runtime_error("TukeyBiweightIterator: too many epochs");
00160     }
00161     return m;
00162   }
00163 
00164 
00165   template<typename InputIterator>
00166   double TukeyBiweightEstimator::estimate(InputIterator first, 
00167                                           InputIterator last, 
00168                                           double center, double spread) const
00169   {
00170     double scale = spread*cutoff_;
00171     AveragerWeighted averager;
00172     regression::TukeyBiweight biweight;
00173     utility::iterator_traits<InputIterator> traits;
00174     for ( ; first!=last; ++first) {
00175       double x = traits.data(first);
00176       double w = traits.weight(first) * biweight((x-center)/scale);
00177       averager.add(x, w); 
00178     }
00179     return averager.mean();
00180   }
00181 
00182 }}} // end of namespace theplu yat statistics
00183 # endif

Generated on Thu Dec 20 2012 03:12:58 for yat by  doxygen 1.8.0-20120409