00001 #ifndef _theplu_yat_classifier_ensemblebuilder_
00002 #define _theplu_yat_classifier_ensemblebuilder_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "FeatureSelector.h"
00031 #include "Sampler.h"
00032 #include "SubsetGenerator.h"
00033 #include "yat/statistics/Averager.h"
00034 #include "yat/utility/Matrix.h"
00035 #include "yat/utility/yat_assert.h"
00036
00037 #include <vector>
00038
00039 namespace theplu {
00040 namespace yat {
00041 namespace classifier {
00042
00046 template <class Classifier, class Data>
00047 class EnsembleBuilder
00048 {
00049 public:
00053 typedef Classifier classifier_type;
00054
00059 typedef Data data_type;
00060
00064 EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
00065
00069 EnsembleBuilder(const Classifier&, const Data&, const Sampler&,
00070 FeatureSelector&);
00071
00075 virtual ~EnsembleBuilder(void);
00076
00082 void build(void);
00083
00087 const Classifier& classifier(size_t i) const;
00088
00093 unsigned long size(void) const;
00094
00100 const std::vector<std::vector<statistics::Averager> >& validate(void);
00101
00111 void predict(const Data& data,
00112 std::vector<std::vector<statistics::Averager> > &);
00113
00114 private:
00115
00116 EnsembleBuilder(const EnsembleBuilder&);
00117 const EnsembleBuilder& operator=(const EnsembleBuilder&);
00118
00119
00120 const Classifier& mother_;
00121 SubsetGenerator<Data>* subset_;
00122 std::vector<Classifier*> classifier_;
00123 KernelLookup test_data(const KernelLookup&, size_t k);
00124 MatrixLookup test_data(const MatrixLookup&, size_t k);
00125 MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
00126 std::vector<std::vector<statistics::Averager> > validation_result_;
00127
00128 };
00129
00130
00131
00132
00133 template <class Classifier, class Data>
00134 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00135 const Data& data,
00136 const Sampler& sampler)
00137 : mother_(sc),subset_(new SubsetGenerator<Data>(sampler,data))
00138 {
00139 }
00140
00141
00142 template <class Classifier, class Data>
00143 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00144 const Data& data,
00145 const Sampler& sampler,
00146 FeatureSelector& fs)
00147 : mother_(sc),
00148 subset_(new SubsetGenerator<Data>(sampler,data,fs))
00149 {
00150 }
00151
00152
00153 template <class Classifier, class Data>
00154 EnsembleBuilder<Classifier, Data>::~EnsembleBuilder(void)
00155 {
00156 for(size_t i=0; i<classifier_.size(); i++)
00157 delete classifier_[i];
00158 delete subset_;
00159 }
00160
00161
00162 template <class Classifier, class Data>
00163 void EnsembleBuilder<Classifier, Data>::build(void)
00164 {
00165 if (classifier_.empty()){
00166 for(unsigned long i=0; i<subset_->size();++i) {
00167 Classifier* classifier = mother_.make_classifier();
00168 classifier->train(subset_->training_data(i),
00169 subset_->training_target(i));
00170 classifier_.push_back(classifier);
00171 }
00172 }
00173 }
00174
00175
00176 template <class Classifier, class Data>
00177 const Classifier& EnsembleBuilder<Classifier, Data>::classifier(size_t i) const
00178 {
00179 return *(classifier_[i]);
00180 }
00181
00182
00183 template <class Classifier, class Data>
00184 void EnsembleBuilder<Classifier, Data>::predict
00185 (const Data& data, std::vector<std::vector<statistics::Averager> >& result)
00186 {
00187 result = std::vector<std::vector<statistics::Averager> >
00188 (subset_->target().nof_classes(),
00189 std::vector<statistics::Averager>(data.columns()));
00190
00191 utility::Matrix prediction;
00192
00193 for(unsigned long k=0;k<size();++k) {
00194 Data sub_data = test_data(data, k);
00195 classifier(k).predict(sub_data,prediction);
00196 for(size_t i=0; i<prediction.rows();i++)
00197 for(size_t j=0; j<prediction.columns();j++)
00198 result[i][j].add(prediction(i,j));
00199 }
00200 }
00201
00202
00203 template <class Classifier, class Data>
00204 unsigned long EnsembleBuilder<Classifier, Data>::size(void) const
00205 {
00206 return classifier_.size();
00207 }
00208
00209
00210 template <class Classifier, class Data>
00211 MatrixLookup EnsembleBuilder<Classifier,
00212 Data>::test_data(const MatrixLookup& data,
00213 size_t k)
00214 {
00215 return MatrixLookup(data, subset_->training_features(k), true);
00216 }
00217
00218
00219 template <class Classifier, class Data>
00220 MatrixLookupWeighted
00221 EnsembleBuilder<Classifier, Data>::test_data(const MatrixLookupWeighted& data,
00222 size_t k)
00223 {
00224 return MatrixLookupWeighted(data, subset_->training_features(k),
00225 utility::Index(data.columns()));
00226 }
00227
00228
00229 template <class Classifier, class Data>
00230 KernelLookup
00231 EnsembleBuilder<Classifier, Data>::test_data(const KernelLookup& kernel,
00232 size_t k)
00233 {
00234
00235 if (kernel.weighted()){
00236
00237 if (kernel.data_weighted().rows()==subset_->training_features(k).size())
00238 return KernelLookup(kernel, subset_->training_index(k), true);
00239 MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
00240 return subset_->training_data(k).test_kernel(mlw);
00241
00242 }
00243
00244
00245
00246 if (kernel.data().rows()==subset_->training_features(k).size())
00247 return KernelLookup(kernel, subset_->training_index(k), true);
00248
00249
00250 MatrixLookup ml = test_data(kernel.data(),k);
00251 return subset_->training_data(k).test_kernel(ml);
00252 }
00253
00254
00255 template <class Classifier, class Data>
00256 const std::vector<std::vector<statistics::Averager> >&
00257 EnsembleBuilder<Classifier, Data>::validate(void)
00258 {
00259
00260 if (!validation_result_.empty())
00261 return validation_result_;
00262
00263 validation_result_ = std::vector<std::vector<statistics::Averager> >
00264 (subset_->target().nof_classes(),
00265 std::vector<statistics::Averager>(subset_->target().size()));
00266
00267 utility::Matrix prediction;
00268 for(unsigned long k=0;k<size();k++) {
00269 classifier(k).predict(subset_->validation_data(k),prediction);
00270
00271
00272 for(size_t i=0; i<prediction.rows();i++)
00273 for(size_t j=0; j<prediction.columns();j++) {
00274 validation_result_[i][subset_->validation_index(k)[j]].
00275 add(prediction(i,j));
00276 }
00277 }
00278 return validation_result_;
00279 }
00280
00281 }}}
00282
00283 #endif