00001 #ifndef _theplu_yat_classifier_subset_generator_
00002 #define _theplu_yat_classifier_subset_generator_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "FeatureSelector.h"
00028 #include "KernelLookup.h"
00029 #include "MatrixLookup.h"
00030 #include "MatrixLookupWeighted.h"
00031 #include "Target.h"
00032 #include "Sampler.h"
00033 #include "yat/utility/Index.h"
00034 #include "yat/utility/yat_assert.h"
00035
00036 #include <algorithm>
00037 #include <utility>
00038 #include <typeinfo>
00039 #include <vector>
00040
00041 namespace theplu {
00042 namespace yat {
00043 namespace classifier {
00056 template <typename Data>
00057 class SubsetGenerator
00058 {
00059 public:
00063 typedef Data value_type;
00064
00091 SubsetGenerator(const Sampler& sampler, const Data& data);
00092
00131 SubsetGenerator(const Sampler& sampler, const Data& data,
00132 FeatureSelector& fs);
00133
00137 ~SubsetGenerator();
00138
00142 size_t size(void) const;
00143
00147 const Target& target(void) const;
00148
00155 const Data& training_data(size_t i) const;
00156
00163 const utility::Index& training_features(size_t i) const;
00164
00168 const utility::Index& training_index(size_t i) const;
00169
00173 const Target& training_target(size_t i) const;
00174
00181 const Data& validation_data(size_t i) const;
00182
00186 const utility::Index& validation_index(size_t i) const;
00187
00191 const Target& validation_target(size_t i) const;
00192
00193 private:
00194 void build(const MatrixLookup&);
00195 void build(const MatrixLookupWeighted&);
00196 void build(const KernelLookup&);
00197
00198 SubsetGenerator(const SubsetGenerator&);
00199 const SubsetGenerator& operator=(const SubsetGenerator&) const;
00200
00201 FeatureSelector* f_selector_;
00202 std::vector<utility::Index > features_;
00203 const Sampler& sampler_;
00204 std::vector<const Data*> training_data_;
00205 std::vector<Target> training_target_;
00206 std::vector<const Data*> validation_data_;
00207 std::vector<Target> validation_target_;
00208
00209 };
00210
00211
00212
00213
00214 template<typename Data>
00215 SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler,
00216 const Data& data)
00217 : f_selector_(NULL), sampler_(sampler)
00218 {
00219 YAT_ASSERT(target().size()==data.columns());
00220
00221 training_data_.reserve(sampler_.size());
00222 validation_data_.reserve(sampler_.size());
00223 build(data);
00224 YAT_ASSERT(training_data_.size()==size());
00225 YAT_ASSERT(training_target_.size()==size());
00226 YAT_ASSERT(validation_data_.size()==size());
00227 YAT_ASSERT(validation_target_.size()==size());
00228 }
00229
00230
00231 template<typename Data>
00232 SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler,
00233 const Data& data,
00234 FeatureSelector& fs)
00235 : f_selector_(&fs), sampler_(sampler)
00236 {
00237 YAT_ASSERT(target().size()==data.columns());
00238 features_.reserve(size());
00239 training_data_.reserve(size());
00240 validation_data_.reserve(size());
00241 build(data);
00242 YAT_ASSERT(training_data_.size()==size());
00243 YAT_ASSERT(training_target_.size()==size());
00244 YAT_ASSERT(validation_data_.size()==size());
00245 YAT_ASSERT(validation_target_.size()==size());
00246 }
00247
00248
00249 template<typename Data>
00250 SubsetGenerator<Data>::~SubsetGenerator()
00251 {
00252 YAT_ASSERT(training_data_.size()==validation_data_.size());
00253 for (size_t i=0; i<training_data_.size(); i++)
00254 delete training_data_[i];
00255 for (size_t i=0; i<validation_data_.size(); i++)
00256 delete validation_data_[i];
00257 }
00258
00259
00260 template<typename Data>
00261 void SubsetGenerator<Data>::build(const MatrixLookup& ml)
00262 {
00263 if (!f_selector_)
00264 features_.push_back(utility::Index(ml.rows()));
00265
00266 for (size_t k=0; k<size(); k++){
00267 training_target_.push_back(Target(target(),training_index(k)));
00268 validation_target_.push_back(Target(target(),validation_index(k)));
00269 if (f_selector_){
00270
00271 const MatrixLookup* train_data_all_feat =
00272 new MatrixLookup(ml, training_index(k), false);
00273
00274 YAT_ASSERT(train_data_all_feat);
00275 f_selector_->update(*train_data_all_feat, training_target(k));
00276
00277 features_.push_back(f_selector_->features());
00278 YAT_ASSERT(train_data_all_feat);
00279 delete train_data_all_feat;
00280 }
00281
00282
00283 training_data_.push_back(new MatrixLookup(ml,features_.back(),
00284 training_index(k)));
00285 validation_data_.push_back(new MatrixLookup(ml,features_.back(),
00286 validation_index(k)));
00287 }
00288
00289 }
00290
00291
00292 template<typename Data>
00293 void SubsetGenerator<Data>::build(const MatrixLookupWeighted& ml)
00294 {
00295 if (!f_selector_)
00296 features_.push_back(utility::Index(ml.rows()));
00297
00298 for (unsigned long k=0; k<size(); k++){
00299 training_target_.push_back(Target(target(),training_index(k)));
00300 validation_target_.push_back(Target(target(),validation_index(k)));
00301 if (f_selector_){
00302
00303 const MatrixLookupWeighted* train_data_all_feat =
00304 new MatrixLookupWeighted(ml, training_index(k), false);
00305
00306 f_selector_->update(*train_data_all_feat, training_target(k));
00307
00308 features_.push_back(f_selector_->features());
00309 delete train_data_all_feat;
00310 }
00311
00312
00313
00314 training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(),
00315 training_index(k)));
00316 validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(),
00317 validation_index(k)));
00318 }
00319 }
00320
00321 template<typename Data>
00322 void SubsetGenerator<Data>::build(const KernelLookup& kernel)
00323 {
00324 for (unsigned long k=0; k<size(); k++){
00325 training_target_.push_back(Target(target(),training_index(k)));
00326 validation_target_.push_back(Target(target(),validation_index(k)));
00327
00328 if (f_selector_){
00329 if (kernel.weighted()){
00330 MatrixLookupWeighted ml = kernel.data_weighted();
00331 f_selector_->update(MatrixLookupWeighted(ml,training_index(k),false),
00332 training_target(k));
00333 }
00334 else {
00335 MatrixLookup ml=kernel.data();
00336 f_selector_->update(MatrixLookup(ml,training_index(k), false),
00337 training_target(k));
00338 }
00339 features_.push_back(f_selector_->features());
00340 KernelLookup kl = kernel.selected(features_.back());
00341
00342 training_data_.push_back(new KernelLookup(kl,training_index(k),
00343 training_index(k)));
00344 validation_data_.push_back(new KernelLookup(kl, training_index(k),
00345 validation_index(k)));
00346 }
00347 else {
00348 training_data_.push_back(new KernelLookup(kernel, training_index(k),
00349 training_index(k)));
00350 validation_data_.push_back(new KernelLookup(kernel,
00351 training_index(k),
00352 validation_index(k)));
00353 }
00354
00355 }
00356 if (!f_selector_){
00357 if (kernel.weighted())
00358 features_.push_back(utility::Index(kernel.data_weighted().rows()));
00359 else
00360 features_.push_back(utility::Index(kernel.data().rows()));
00361 }
00362 }
00363
00364
00365 template<typename Data>
00366 size_t SubsetGenerator<Data>::size(void) const
00367 {
00368 return sampler_.size();
00369 }
00370
00371
00372 template<typename Data>
00373 const Target& SubsetGenerator<Data>::target(void) const
00374 {
00375 return sampler_.target();
00376 }
00377
00378
00379 template<typename Data>
00380 const Data&
00381 SubsetGenerator<Data>::training_data(size_t i) const
00382 {
00383 return *(training_data_[i]);
00384 }
00385
00386
00387 template<typename Data>
00388 const utility::Index&
00389 SubsetGenerator<Data>::training_features(size_t i) const
00390 {
00391 utility::yat_assert<std::runtime_error>(features_.size(),
00392 "SubsetGenerator::training_features");
00393 return f_selector_ ? features_[i] : features_[0];
00394 }
00395
00396
00397 template<typename Data>
00398 const utility::Index&
00399 SubsetGenerator<Data>::training_index(size_t i) const
00400 {
00401 return sampler_.training_index(i);
00402 }
00403
00404
00405 template<typename Data>
00406 const Target&
00407 SubsetGenerator<Data>::training_target(size_t i) const
00408 {
00409 return training_target_[i];
00410 }
00411
00412
00413 template<typename Data>
00414 const Data&
00415 SubsetGenerator<Data>::validation_data(size_t i) const
00416 {
00417 return *(validation_data_[i]);
00418 }
00419
00420
00421 template<typename Data>
00422 const utility::Index&
00423 SubsetGenerator<Data>::validation_index(size_t i) const
00424 {
00425 return sampler_.validation_index(i);
00426 }
00427
00428
00429 template<typename Data>
00430 const Target&
00431 SubsetGenerator<Data>::validation_target(size_t i) const
00432 {
00433 return validation_target_[i];
00434 }
00435
00436 }}}
00437
00438 #endif
00439