00001 #ifndef _theplu_yat_classifier_ensemblebuilder_
00002 #define _theplu_yat_classifier_ensemblebuilder_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "FeatureSelector.h"
00030 #include "Sampler.h"
00031 #include "SubsetGenerator.h"
00032 #include "yat/statistics/Averager.h"
00033 #include "yat/utility/Matrix.h"
00034 #include "yat/utility/yat_assert.h"
00035
00036 #include <vector>
00037
00038 namespace theplu {
00039 namespace yat {
00040 namespace classifier {
00041
00045 template <class Classifier, class Data>
00046 class EnsembleBuilder
00047 {
00048 public:
00052 typedef Classifier classifier_type;
00053
00058 typedef Data data_type;
00059
00063 EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
00064
00068 EnsembleBuilder(const Classifier&, const Data&, const Sampler&,
00069 FeatureSelector&);
00070
00074 virtual ~EnsembleBuilder(void);
00075
00081 void build(void);
00082
00086 const Classifier& classifier(size_t i) const;
00087
00092 unsigned long size(void) const;
00093
00099 const std::vector<std::vector<statistics::Averager> >& validate(void);
00100
00110 void predict(const Data& data,
00111 std::vector<std::vector<statistics::Averager> > &);
00112
00113 private:
00114
00115 EnsembleBuilder(const EnsembleBuilder&);
00116 const EnsembleBuilder& operator=(const EnsembleBuilder&);
00117
00118
00119 const Classifier& mother_;
00120 SubsetGenerator<Data>* subset_;
00121 std::vector<Classifier*> classifier_;
00122 KernelLookup test_data(const KernelLookup&, size_t k);
00123 MatrixLookup test_data(const MatrixLookup&, size_t k);
00124 MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
00125 std::vector<std::vector<statistics::Averager> > validation_result_;
00126
00127 };
00128
00129
00130
00131
00132 template <class Classifier, class Data>
00133 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00134 const Data& data,
00135 const Sampler& sampler)
00136 : mother_(sc),subset_(new SubsetGenerator<Data>(sampler,data))
00137 {
00138 }
00139
00140
00141 template <class Classifier, class Data>
00142 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00143 const Data& data,
00144 const Sampler& sampler,
00145 FeatureSelector& fs)
00146 : mother_(sc),
00147 subset_(new SubsetGenerator<Data>(sampler,data,fs))
00148 {
00149 }
00150
00151
00152 template <class Classifier, class Data>
00153 EnsembleBuilder<Classifier, Data>::~EnsembleBuilder(void)
00154 {
00155 for(size_t i=0; i<classifier_.size(); i++)
00156 delete classifier_[i];
00157 delete subset_;
00158 }
00159
00160
00161 template <class Classifier, class Data>
00162 void EnsembleBuilder<Classifier, Data>::build(void)
00163 {
00164 if (classifier_.empty()){
00165 for(unsigned long i=0; i<subset_->size();++i) {
00166 Classifier* classifier = mother_.make_classifier();
00167 classifier->train(subset_->training_data(i),
00168 subset_->training_target(i));
00169 classifier_.push_back(classifier);
00170 }
00171 }
00172 }
00173
00174
00175 template <class Classifier, class Data>
00176 const Classifier& EnsembleBuilder<Classifier, Data>::classifier(size_t i) const
00177 {
00178 return *(classifier_[i]);
00179 }
00180
00181
00182 template <class Classifier, class Data>
00183 void EnsembleBuilder<Classifier, Data>::predict
00184 (const Data& data, std::vector<std::vector<statistics::Averager> >& result)
00185 {
00186 result = std::vector<std::vector<statistics::Averager> >
00187 (subset_->target().nof_classes(),
00188 std::vector<statistics::Averager>(data.columns()));
00189
00190 utility::Matrix prediction;
00191
00192 for(unsigned long k=0;k<size();++k) {
00193 Data sub_data = test_data(data, k);
00194 classifier(k).predict(sub_data,prediction);
00195 for(size_t i=0; i<prediction.rows();i++)
00196 for(size_t j=0; j<prediction.columns();j++)
00197 result[i][j].add(prediction(i,j));
00198 }
00199 }
00200
00201
00202 template <class Classifier, class Data>
00203 unsigned long EnsembleBuilder<Classifier, Data>::size(void) const
00204 {
00205 return classifier_.size();
00206 }
00207
00208
00209 template <class Classifier, class Data>
00210 MatrixLookup EnsembleBuilder<Classifier,
00211 Data>::test_data(const MatrixLookup& data,
00212 size_t k)
00213 {
00214 return MatrixLookup(data, subset_->training_features(k), true);
00215 }
00216
00217
00218 template <class Classifier, class Data>
00219 MatrixLookupWeighted
00220 EnsembleBuilder<Classifier, Data>::test_data(const MatrixLookupWeighted& data,
00221 size_t k)
00222 {
00223 return MatrixLookupWeighted(data, subset_->training_features(k), true);
00224 }
00225
00226
00227 template <class Classifier, class Data>
00228 KernelLookup
00229 EnsembleBuilder<Classifier, Data>::test_data(const KernelLookup& kernel,
00230 size_t k)
00231 {
00232
00233 if (kernel.weighted()){
00234
00235 if (kernel.data_weighted().rows()==subset_->training_features(k).size())
00236 return KernelLookup(kernel, subset_->training_index(k), true);
00237 MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
00238 return subset_->training_data(k).test_kernel(mlw);
00239
00240 }
00241
00242
00243
00244 if (kernel.data().rows()==subset_->training_features(k).size())
00245 return KernelLookup(kernel, subset_->training_index(k), true);
00246
00247
00248 MatrixLookup ml = test_data(kernel.data(),k);
00249 return subset_->training_data(k).test_kernel(ml);
00250 }
00251
00252
00253 template <class Classifier, class Data>
00254 const std::vector<std::vector<statistics::Averager> >&
00255 EnsembleBuilder<Classifier, Data>::validate(void)
00256 {
00257
00258 if (!validation_result_.empty())
00259 return validation_result_;
00260
00261 validation_result_ = std::vector<std::vector<statistics::Averager> >
00262 (subset_->target().nof_classes(),
00263 std::vector<statistics::Averager>(subset_->target().size()));
00264
00265 utility::Matrix prediction;
00266 for(unsigned long k=0;k<size();k++) {
00267 classifier(k).predict(subset_->validation_data(k),prediction);
00268
00269
00270 for(size_t i=0; i<prediction.rows();i++)
00271 for(size_t j=0; j<prediction.columns();j++) {
00272 validation_result_[i][subset_->validation_index(k)[j]].
00273 add(prediction(i,j));
00274 }
00275 }
00276 return validation_result_;
00277 }
00278
00279 }}}
00280
00281 #endif