yat
0.8.3pre
|
00001 #ifndef _theplu_yat_classifier_ensemblebuilder_ 00002 #define _theplu_yat_classifier_ensemblebuilder_ 00003 00004 // $Id: EnsembleBuilder.h 2226 2010-03-24 14:40:02Z peter $ 00005 00006 /* 00007 Copyright (C) 2005 Markus Ringnér 00008 Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér 00009 Copyright (C) 2007 Jari Häkkinen, Peter Johansson 00010 Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér 00011 Copyright (C) 2009 Jari Häkkinen, Peter Johansson 00012 Copyright (C) 2010 Peter Johansson 00013 00014 This file is part of the yat library, http://dev.thep.lu.se/yat 00015 00016 The yat library is free software; you can redistribute it and/or 00017 modify it under the terms of the GNU General Public License as 00018 published by the Free Software Foundation; either version 3 of the 00019 License, or (at your option) any later version. 00020 00021 The yat library is distributed in the hope that it will be useful, 00022 but WITHOUT ANY WARRANTY; without even the implied warranty of 00023 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00024 General Public License for more details. 00025 00026 You should have received a copy of the GNU General Public License 00027 along with yat. If not, see <http://www.gnu.org/licenses/>. 00028 */ 00029 00030 #include "FeatureSelector.h" 00031 #include "Sampler.h" 00032 #include "SubsetGenerator.h" 00033 #include "yat/statistics/Averager.h" 00034 #include "yat/utility/Matrix.h" 00035 #include "yat/utility/yat_assert.h" 00036 00037 #include <vector> 00038 00039 namespace theplu { 00040 namespace yat { 00041 namespace classifier { 00042 00046 template <class Classifier, class Data> 00047 class EnsembleBuilder 00048 { 00049 public: 00053 typedef Classifier classifier_type; 00054 00059 typedef Data data_type; 00060 00064 EnsembleBuilder(const Classifier&, const Data&, const Sampler&); 00065 00069 EnsembleBuilder(const Classifier&, const Data&, const Sampler&, 00070 FeatureSelector&); 00071 00075 virtual ~EnsembleBuilder(void); 00076 00082 void build(void); 00083 00087 const Classifier& classifier(size_t i) const; 00088 00093 unsigned long size(void) const; 00094 00100 const std::vector<std::vector<statistics::Averager> >& validate(void); 00101 00111 void predict(const Data& data, 00112 std::vector<std::vector<statistics::Averager> > &); 00113 00114 private: 00115 // no copying 00116 EnsembleBuilder(const EnsembleBuilder&); 00117 const EnsembleBuilder& operator=(const EnsembleBuilder&); 00118 00119 00120 const Classifier& mother_; 00121 SubsetGenerator<Data>* subset_; 00122 std::vector<Classifier*> classifier_; 00123 KernelLookup test_data(const KernelLookup&, size_t k); 00124 MatrixLookup test_data(const MatrixLookup&, size_t k); 00125 MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k); 00126 std::vector<std::vector<statistics::Averager> > validation_result_; 00127 00128 }; 00129 00130 00131 // implementation 00132 00133 template <class Classifier, class Data> 00134 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc, 00135 const Data& data, 00136 const Sampler& sampler) 00137 : mother_(sc),subset_(new SubsetGenerator<Data>(sampler,data)) 00138 { 00139 } 00140 00141 00142 template <class Classifier, class Data> 00143 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc, 00144 const Data& data, 00145 const Sampler& sampler, 00146 FeatureSelector& fs) 00147 : mother_(sc), 00148 subset_(new SubsetGenerator<Data>(sampler,data,fs)) 00149 { 00150 } 00151 00152 00153 template <class Classifier, class Data> 00154 EnsembleBuilder<Classifier, Data>::~EnsembleBuilder(void) 00155 { 00156 for(size_t i=0; i<classifier_.size(); i++) 00157 delete classifier_[i]; 00158 delete subset_; 00159 } 00160 00161 00162 template <class Classifier, class Data> 00163 void EnsembleBuilder<Classifier, Data>::build(void) 00164 { 00165 if (classifier_.empty()){ 00166 for(unsigned long i=0; i<subset_->size();++i) { 00167 Classifier* classifier = mother_.make_classifier(); 00168 classifier->train(subset_->training_data(i), 00169 subset_->training_target(i)); 00170 classifier_.push_back(classifier); 00171 } 00172 } 00173 } 00174 00175 00176 template <class Classifier, class Data> 00177 const Classifier& EnsembleBuilder<Classifier, Data>::classifier(size_t i) const 00178 { 00179 return *(classifier_[i]); 00180 } 00181 00182 00183 template <class Classifier, class Data> 00184 void EnsembleBuilder<Classifier, Data>::predict 00185 (const Data& data, std::vector<std::vector<statistics::Averager> >& result) 00186 { 00187 result = std::vector<std::vector<statistics::Averager> > 00188 (subset_->target().nof_classes(), 00189 std::vector<statistics::Averager>(data.columns())); 00190 00191 utility::Matrix prediction; 00192 00193 for(unsigned long k=0;k<size();++k) { 00194 Data sub_data = test_data(data, k); 00195 classifier(k).predict(sub_data,prediction); 00196 for(size_t i=0; i<prediction.rows();i++) 00197 for(size_t j=0; j<prediction.columns();j++) 00198 result[i][j].add(prediction(i,j)); 00199 } 00200 } 00201 00202 00203 template <class Classifier, class Data> 00204 unsigned long EnsembleBuilder<Classifier, Data>::size(void) const 00205 { 00206 return classifier_.size(); 00207 } 00208 00209 00210 template <class Classifier, class Data> 00211 MatrixLookup EnsembleBuilder<Classifier, 00212 Data>::test_data(const MatrixLookup& data, 00213 size_t k) 00214 { 00215 return MatrixLookup(data, subset_->training_features(k), true); 00216 } 00217 00218 00219 template <class Classifier, class Data> 00220 MatrixLookupWeighted 00221 EnsembleBuilder<Classifier, Data>::test_data(const MatrixLookupWeighted& data, 00222 size_t k) 00223 { 00224 return MatrixLookupWeighted(data, subset_->training_features(k), 00225 utility::Index(data.columns())); 00226 } 00227 00228 00229 template <class Classifier, class Data> 00230 KernelLookup 00231 EnsembleBuilder<Classifier, Data>::test_data(const KernelLookup& kernel, 00232 size_t k) 00233 { 00234 // weighted case 00235 if (kernel.weighted()){ 00236 // no feature selection 00237 if (kernel.data_weighted().rows()==subset_->training_features(k).size()) 00238 return KernelLookup(kernel, subset_->training_index(k), true); 00239 MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k); 00240 return subset_->training_data(k).test_kernel(mlw); 00241 00242 } 00243 // unweighted case 00244 00245 // no feature selection 00246 if (kernel.data().rows()==subset_->training_features(k).size()) 00247 return KernelLookup(kernel, subset_->training_index(k), true); 00248 00249 // feature selection 00250 MatrixLookup ml = test_data(kernel.data(),k); 00251 return subset_->training_data(k).test_kernel(ml); 00252 } 00253 00254 00255 template <class Classifier, class Data> 00256 const std::vector<std::vector<statistics::Averager> >& 00257 EnsembleBuilder<Classifier, Data>::validate(void) 00258 { 00259 // Don't recalculate validation_result_ 00260 if (!validation_result_.empty()) 00261 return validation_result_; 00262 00263 validation_result_ = std::vector<std::vector<statistics::Averager> > 00264 (subset_->target().nof_classes(), 00265 std::vector<statistics::Averager>(subset_->target().size())); 00266 00267 utility::Matrix prediction; 00268 for(unsigned long k=0;k<size();k++) { 00269 classifier(k).predict(subset_->validation_data(k),prediction); 00270 00271 // map results to indices of samples in training + validation data set 00272 for(size_t i=0; i<prediction.rows();i++) 00273 for(size_t j=0; j<prediction.columns();j++) { 00274 validation_result_[i][subset_->validation_index(k)[j]]. 00275 add(prediction(i,j)); 00276 } 00277 } 00278 return validation_result_; 00279 } 00280 00281 }}} // of namespace classifier, yat, and theplu 00282 00283 #endif