00001 #ifndef _theplu_yat_classifier_ensemblebuilder_
00002 #define _theplu_yat_classifier_ensemblebuilder_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "FeatureSelector.h"
00030 #include "Sampler.h"
00031 #include "SubsetGenerator.h"
00032 #include "yat/statistics/Averager.h"
00033 #include "yat/utility/Matrix.h"
00034
00035 #include <vector>
00036
00037 namespace theplu {
00038 namespace yat {
00039 namespace classifier {
00040
00044 template <class Classifier, class Data>
00045 class EnsembleBuilder
00046 {
00047 public:
00051 typedef Classifier classifier_type;
00052
00057 typedef Data data_type;
00058
00062 EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
00063
00067 EnsembleBuilder(const Classifier&, const Data&, const Sampler&,
00068 FeatureSelector&);
00069
00073 virtual ~EnsembleBuilder(void);
00074
00080 void build(void);
00081
00085 const Classifier& classifier(size_t i) const;
00086
00091 unsigned long size(void) const;
00092
00098 const std::vector<std::vector<statistics::Averager> >& validate(void);
00099
00109 void predict(const Data& data,
00110 std::vector<std::vector<statistics::Averager> > &);
00111
00112 private:
00113
00114 EnsembleBuilder(const EnsembleBuilder&);
00115 const EnsembleBuilder& operator=(const EnsembleBuilder&);
00116
00117
00118 const Classifier& mother_;
00119 SubsetGenerator<Data>* subset_;
00120 std::vector<Classifier*> classifier_;
00121 KernelLookup test_data(const KernelLookup&, size_t k);
00122 MatrixLookup test_data(const MatrixLookup&, size_t k);
00123 MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
00124 std::vector<std::vector<statistics::Averager> > validation_result_;
00125
00126 };
00127
00128
00129
00130
00131 template <class Classifier, class Data>
00132 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00133 const Data& data,
00134 const Sampler& sampler)
00135 : mother_(sc),subset_(new SubsetGenerator<Data>(sampler,data))
00136 {
00137 }
00138
00139
00140 template <class Classifier, class Data>
00141 EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
00142 const Data& data,
00143 const Sampler& sampler,
00144 FeatureSelector& fs)
00145 : mother_(sc),
00146 subset_(new SubsetGenerator<Data>(sampler,data,fs))
00147 {
00148 }
00149
00150
00151 template <class Classifier, class Data>
00152 EnsembleBuilder<Classifier, Data>::~EnsembleBuilder(void)
00153 {
00154 for(size_t i=0; i<classifier_.size(); i++)
00155 delete classifier_[i];
00156 delete subset_;
00157 }
00158
00159
00160 template <class Classifier, class Data>
00161 void EnsembleBuilder<Classifier, Data>::build(void)
00162 {
00163 if (classifier_.empty()){
00164 for(unsigned long i=0; i<subset_->size();++i) {
00165 Classifier* classifier = mother_.make_classifier();
00166 classifier->train(subset_->training_data(i),
00167 subset_->training_target(i));
00168 classifier_.push_back(classifier);
00169 }
00170 }
00171 }
00172
00173
00174 template <class Classifier, class Data>
00175 const Classifier& EnsembleBuilder<Classifier, Data>::classifier(size_t i) const
00176 {
00177 return *(classifier_[i]);
00178 }
00179
00180
00181 template <class Classifier, class Data>
00182 void EnsembleBuilder<Classifier, Data>::predict
00183 (const Data& data, std::vector<std::vector<statistics::Averager> >& result)
00184 {
00185 result = std::vector<std::vector<statistics::Averager> >
00186 (subset_->target().nof_classes(),
00187 std::vector<statistics::Averager>(data.columns()));
00188
00189 utility::Matrix prediction;
00190
00191 for(unsigned long k=0;k<size();++k) {
00192 Data sub_data = test_data(data, k);
00193 classifier(k).predict(sub_data,prediction);
00194 for(size_t i=0; i<prediction.rows();i++)
00195 for(size_t j=0; j<prediction.columns();j++)
00196 result[i][j].add(prediction(i,j));
00197 }
00198 }
00199
00200
00201 template <class Classifier, class Data>
00202 unsigned long EnsembleBuilder<Classifier, Data>::size(void) const
00203 {
00204 return classifier_.size();
00205 }
00206
00207
00208 template <class Classifier, class Data>
00209 MatrixLookup EnsembleBuilder<Classifier,
00210 Data>::test_data(const MatrixLookup& data,
00211 size_t k)
00212 {
00213 return MatrixLookup(data, subset_->training_features(k), true);
00214 }
00215
00216
00217 template <class Classifier, class Data>
00218 MatrixLookupWeighted
00219 EnsembleBuilder<Classifier, Data>::test_data(const MatrixLookupWeighted& data,
00220 size_t k)
00221 {
00222 return MatrixLookupWeighted(data, subset_->training_features(k), true);
00223 }
00224
00225
00226 template <class Classifier, class Data>
00227 KernelLookup
00228 EnsembleBuilder<Classifier, Data>::test_data(const KernelLookup& kernel,
00229 size_t k)
00230 {
00231
00232 if (kernel.weighted()){
00233
00234 if (kernel.data_weighted().rows()==subset_->training_features(k).size())
00235 return KernelLookup(kernel, subset_->training_index(k), true);
00236 MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
00237 return subset_->training_data(k).test_kernel(mlw);
00238
00239 }
00240
00241
00242
00243 if (kernel.data().rows()==subset_->training_features(k).size())
00244 return KernelLookup(kernel, subset_->training_index(k), true);
00245
00246
00247 MatrixLookup ml = test_data(kernel.data(),k);
00248 return subset_->training_data(k).test_kernel(ml);
00249 }
00250
00251
00252 template <class Classifier, class Data>
00253 const std::vector<std::vector<statistics::Averager> >&
00254 EnsembleBuilder<Classifier, Data>::validate(void)
00255 {
00256
00257 if (!validation_result_.empty())
00258 return validation_result_;
00259
00260 validation_result_ = std::vector<std::vector<statistics::Averager> >
00261 (subset_->target().nof_classes(),
00262 std::vector<statistics::Averager>(subset_->target().size()));
00263
00264 utility::Matrix prediction;
00265 for(unsigned long k=0;k<size();k++) {
00266 classifier(k).predict(subset_->validation_data(k),prediction);
00267
00268
00269 for(size_t i=0; i<prediction.rows();i++)
00270 for(size_t j=0; j<prediction.columns();j++) {
00271 validation_result_[i][subset_->validation_index(k)[j]].
00272 add(prediction(i,j));
00273 }
00274 }
00275 return validation_result_;
00276 }
00277
00278 }}}
00279
00280 #endif