yat  0.8.3pre
EnsembleBuilder.h
00001 #ifndef _theplu_yat_classifier_ensemblebuilder_ 
00002 #define _theplu_yat_classifier_ensemblebuilder_ 
00003 
00004 // $Id: EnsembleBuilder.h 2226 2010-03-24 14:40:02Z peter $
00005 
00006 /*
00007   Copyright (C) 2005 Markus Ringnér
00008   Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
00009   Copyright (C) 2007 Jari Häkkinen, Peter Johansson
00010   Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
00011   Copyright (C) 2009 Jari Häkkinen, Peter Johansson
00012   Copyright (C) 2010 Peter Johansson
00013 
00014   This file is part of the yat library, http://dev.thep.lu.se/yat
00015 
00016   The yat library is free software; you can redistribute it and/or
00017   modify it under the terms of the GNU General Public License as
00018   published by the Free Software Foundation; either version 3 of the
00019   License, or (at your option) any later version.
00020 
00021   The yat library is distributed in the hope that it will be useful,
00022   but WITHOUT ANY WARRANTY; without even the implied warranty of
00023   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00024   General Public License for more details.
00025 
00026   You should have received a copy of the GNU General Public License
00027   along with yat. If not, see <http://www.gnu.org/licenses/>.
00028 */
00029 
00030 #include "FeatureSelector.h"
00031 #include "Sampler.h"
00032 #include "SubsetGenerator.h"
00033 #include "yat/statistics/Averager.h"
00034 #include "yat/utility/Matrix.h"
00035 #include "yat/utility/yat_assert.h"
00036 
00037 #include <vector>
00038 
00039 namespace theplu {
00040 namespace yat {
00041 namespace classifier {  
00042 
00046   template <class Classifier, class Data>
00047   class EnsembleBuilder
00048   {
00049   public:
00053     typedef Classifier classifier_type;
00054 
00059     typedef Data data_type;
00060 
00064     EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
00065 
00069     EnsembleBuilder(const Classifier&, const Data&, const Sampler&, 
00070                     FeatureSelector&);
00071 
00075     virtual ~EnsembleBuilder(void);
00076 
00082     void build(void);
00083 
00087     const Classifier& classifier(size_t i) const;
00088       
00093     unsigned long size(void) const;
00094 
00100     const std::vector<std::vector<statistics::Averager> >& validate(void);
00101     
00111     void predict(const Data& data, 
00112                  std::vector<std::vector<statistics::Averager> > &);
00113 
00114   private:
00115     // no copying
00116     EnsembleBuilder(const EnsembleBuilder&);
00117     const EnsembleBuilder& operator=(const EnsembleBuilder&);
00118     
00119 
00120     const Classifier& mother_;
00121     SubsetGenerator<Data>* subset_;
00122     std::vector<Classifier*> classifier_;
00123     KernelLookup test_data(const KernelLookup&, size_t k);
00124     MatrixLookup test_data(const MatrixLookup&, size_t k);
00125     MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
00126     std::vector<std::vector<statistics::Averager> > validation_result_;
00127 
00128   };
00129   
00130 
00131   // implementation
00132 
00133   template <class Classifier, class Data>
00134   EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00135                                                      const Data& data,
00136                                                      const Sampler& sampler)
00137     : mother_(sc),subset_(new SubsetGenerator<Data>(sampler,data))
00138   {
00139   }
00140 
00141 
00142   template <class Classifier, class Data>
00143   EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00144                                                      const Data& data,
00145                                                      const Sampler& sampler,
00146                                                      FeatureSelector& fs)
00147     : mother_(sc),
00148       subset_(new SubsetGenerator<Data>(sampler,data,fs))
00149   {
00150   }
00151 
00152 
00153   template <class Classifier, class Data>
00154   EnsembleBuilder<Classifier, Data>::~EnsembleBuilder(void)
00155   {
00156     for(size_t i=0; i<classifier_.size(); i++)
00157       delete classifier_[i];
00158     delete subset_;
00159   }
00160 
00161 
00162   template <class Classifier, class Data>
00163   void EnsembleBuilder<Classifier, Data>::build(void)
00164   {
00165     if (classifier_.empty()){
00166       for(unsigned long i=0; i<subset_->size();++i) {
00167         Classifier* classifier = mother_.make_classifier();
00168         classifier->train(subset_->training_data(i), 
00169                           subset_->training_target(i));
00170         classifier_.push_back(classifier);
00171       }   
00172     }
00173   }
00174 
00175 
00176   template <class Classifier, class Data>
00177   const Classifier& EnsembleBuilder<Classifier, Data>::classifier(size_t i) const
00178   {
00179     return *(classifier_[i]);
00180   }
00181 
00182 
00183   template <class Classifier, class Data>
00184   void EnsembleBuilder<Classifier, Data>::predict
00185   (const Data& data, std::vector<std::vector<statistics::Averager> >& result)
00186   {
00187     result = std::vector<std::vector<statistics::Averager> >
00188       (subset_->target().nof_classes(), 
00189        std::vector<statistics::Averager>(data.columns()));
00190     
00191     utility::Matrix prediction;  
00192 
00193     for(unsigned long k=0;k<size();++k) {       
00194       Data sub_data = test_data(data, k);
00195       classifier(k).predict(sub_data,prediction);
00196       for(size_t i=0; i<prediction.rows();i++) 
00197         for(size_t j=0; j<prediction.columns();j++) 
00198           result[i][j].add(prediction(i,j));    
00199     }
00200   }
00201 
00202   
00203   template <class Classifier, class Data>
00204   unsigned long EnsembleBuilder<Classifier, Data>::size(void) const
00205   {
00206     return classifier_.size();
00207   }
00208 
00209 
00210   template <class Classifier, class Data>
00211   MatrixLookup EnsembleBuilder<Classifier,
00212                                Data>::test_data(const MatrixLookup& data,
00213                                                 size_t k)
00214   {
00215     return MatrixLookup(data, subset_->training_features(k), true);
00216   }
00217   
00218 
00219   template <class Classifier, class Data>
00220   MatrixLookupWeighted 
00221   EnsembleBuilder<Classifier, Data>::test_data(const MatrixLookupWeighted& data,
00222                                                size_t k)
00223   {
00224     return MatrixLookupWeighted(data, subset_->training_features(k), 
00225                                 utility::Index(data.columns()));
00226   }
00227   
00228 
00229   template <class Classifier, class Data>
00230   KernelLookup
00231   EnsembleBuilder<Classifier, Data>::test_data(const KernelLookup& kernel,
00232                                                size_t k)
00233   {
00234     // weighted case
00235     if (kernel.weighted()){
00236       // no feature selection
00237       if (kernel.data_weighted().rows()==subset_->training_features(k).size())
00238         return KernelLookup(kernel, subset_->training_index(k), true);
00239       MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
00240       return subset_->training_data(k).test_kernel(mlw);
00241 
00242     }
00243     // unweighted case
00244 
00245     // no feature selection
00246     if (kernel.data().rows()==subset_->training_features(k).size())
00247       return KernelLookup(kernel, subset_->training_index(k), true);
00248     
00249     // feature selection
00250     MatrixLookup ml = test_data(kernel.data(),k);
00251     return subset_->training_data(k).test_kernel(ml);
00252   }
00253   
00254 
00255   template <class Classifier, class Data>
00256   const std::vector<std::vector<statistics::Averager> >& 
00257   EnsembleBuilder<Classifier, Data>::validate(void)
00258   {
00259     // Don't recalculate validation_result_
00260     if (!validation_result_.empty())
00261       return validation_result_;
00262 
00263     validation_result_ = std::vector<std::vector<statistics::Averager> >
00264       (subset_->target().nof_classes(), 
00265        std::vector<statistics::Averager>(subset_->target().size()));
00266 
00267     utility::Matrix prediction;  
00268     for(unsigned long k=0;k<size();k++) {
00269       classifier(k).predict(subset_->validation_data(k),prediction);
00270       
00271       // map results to indices of samples in training + validation data set
00272       for(size_t i=0; i<prediction.rows();i++) 
00273         for(size_t j=0; j<prediction.columns();j++) {
00274           validation_result_[i][subset_->validation_index(k)[j]].
00275             add(prediction(i,j));
00276         }           
00277     }
00278     return validation_result_;
00279   }
00280 
00281 }}} // of namespace classifier, yat, and theplu
00282 
00283 #endif

Generated on Thu Dec 20 2012 03:12:57 for yat by  doxygen 1.8.0-20120409