00001 #ifndef _theplu_yat_classifier_subset_generator_
00002 #define _theplu_yat_classifier_subset_generator_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "FeatureSelector.h"
00028 #include "KernelLookup.h"
00029 #include "MatrixLookup.h"
00030 #include "MatrixLookupWeighted.h"
00031 #include "Target.h"
00032 #include "Sampler.h"
00033 #include "yat/utility/Index.h"
00034 #include "yat/utility/yat_assert.h"
00035
00036 #include <algorithm>
00037 #include <utility>
00038 #include <typeinfo>
00039 #include <vector>
00040
00041 namespace theplu {
00042 namespace yat {
00043 namespace classifier {
00056 template <typename Data>
00057 class SubsetGenerator
00058 {
00059 public:
00063 typedef Data value_type;
00064
00091 SubsetGenerator(const Sampler& sampler, const Data& data);
00092
00131 SubsetGenerator(const Sampler& sampler, const Data& data,
00132 FeatureSelector& fs);
00133
00137 ~SubsetGenerator();
00138
00142 size_t size(void) const;
00143
00147 const Target& target(void) const;
00148
00155 const Data& training_data(size_t i) const;
00156
00163 const utility::Index& training_features(size_t i) const;
00164
00168 const utility::Index& training_index(size_t i) const;
00169
00173 const Target& training_target(size_t i) const;
00174
00181 const Data& validation_data(size_t i) const;
00182
00186 const utility::Index& validation_index(size_t i) const;
00187
00191 const Target& validation_target(size_t i) const;
00192
00193 private:
00194 void build(const MatrixLookup&);
00195 void build(const MatrixLookupWeighted&);
00196 void build(const KernelLookup&);
00197
00198 SubsetGenerator(const SubsetGenerator&);
00199 const SubsetGenerator& operator=(const SubsetGenerator&) const;
00200
00201 FeatureSelector* f_selector_;
00202 std::vector<utility::Index > features_;
00203 const Sampler& sampler_;
00204 std::vector<const Data*> training_data_;
00205 std::vector<Target> training_target_;
00206 std::vector<const Data*> validation_data_;
00207 std::vector<Target> validation_target_;
00208
00209 };
00210
00211
00212
00213
00214 template<typename Data>
00215 SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler,
00216 const Data& data)
00217 : f_selector_(NULL), sampler_(sampler)
00218 {
00219 YAT_ASSERT(target().size()==data.columns());
00220
00221 training_data_.reserve(sampler_.size());
00222 validation_data_.reserve(sampler_.size());
00223 build(data);
00224 YAT_ASSERT(training_data_.size()==size());
00225 YAT_ASSERT(training_target_.size()==size());
00226 YAT_ASSERT(validation_data_.size()==size());
00227 YAT_ASSERT(validation_target_.size()==size());
00228 }
00229
00230
00231 template<typename Data>
00232 SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler,
00233 const Data& data,
00234 FeatureSelector& fs)
00235 : f_selector_(&fs), sampler_(sampler)
00236 {
00237 YAT_ASSERT(target().size()==data.columns());
00238 features_.reserve(size());
00239 training_data_.reserve(size());
00240 validation_data_.reserve(size());
00241 build(data);
00242 YAT_ASSERT(training_data_.size()==size());
00243 YAT_ASSERT(training_target_.size()==size());
00244 YAT_ASSERT(validation_data_.size()==size());
00245 YAT_ASSERT(validation_target_.size()==size());
00246 }
00247
00248
00249 template<typename Data>
00250 SubsetGenerator<Data>::~SubsetGenerator()
00251 {
00252 YAT_ASSERT(training_data_.size()==validation_data_.size());
00253 for (size_t i=0; i<training_data_.size(); i++)
00254 delete training_data_[i];
00255 for (size_t i=0; i<validation_data_.size(); i++)
00256 delete validation_data_[i];
00257 }
00258
00259
00260 template<typename Data>
00261 void SubsetGenerator<Data>::build(const MatrixLookup& ml)
00262 {
00263 if (!f_selector_)
00264 features_.push_back(utility::Index(ml.rows()));
00265
00266 for (size_t k=0; k<size(); k++){
00267 training_target_.push_back(Target(target(),training_index(k)));
00268 validation_target_.push_back(Target(target(),validation_index(k)));
00269 if (f_selector_){
00270
00271 const MatrixLookup* train_data_all_feat =
00272 new MatrixLookup(ml, training_index(k), false);
00273
00274 YAT_ASSERT(train_data_all_feat);
00275 f_selector_->update(*train_data_all_feat, training_target(k));
00276
00277 features_.push_back(f_selector_->features());
00278 YAT_ASSERT(train_data_all_feat);
00279 delete train_data_all_feat;
00280 }
00281
00282
00283 training_data_.push_back(new MatrixLookup(ml,features_.back(),
00284 training_index(k)));
00285 validation_data_.push_back(new MatrixLookup(ml,features_.back(),
00286 validation_index(k)));
00287 }
00288
00289 }
00290
00291
00292 template<typename Data>
00293 void SubsetGenerator<Data>::build(const MatrixLookupWeighted& ml)
00294 {
00295 if (!f_selector_)
00296 features_.push_back(utility::Index(ml.rows()));
00297
00298 for (unsigned long k=0; k<size(); k++){
00299 training_target_.push_back(Target(target(),training_index(k)));
00300 validation_target_.push_back(Target(target(),validation_index(k)));
00301 if (f_selector_){
00302
00303 const MatrixLookupWeighted* train_data_all_feat =
00304 new MatrixLookupWeighted(ml, utility::Index(ml.rows()),
00305 training_index(k));
00306
00307 f_selector_->update(*train_data_all_feat, training_target(k));
00308
00309 features_.push_back(f_selector_->features());
00310 delete train_data_all_feat;
00311 }
00312
00313
00314
00315 training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(),
00316 training_index(k)));
00317 validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(),
00318 validation_index(k)));
00319 }
00320 }
00321
00322 template<typename Data>
00323 void SubsetGenerator<Data>::build(const KernelLookup& kernel)
00324 {
00325 for (unsigned long k=0; k<size(); k++){
00326 training_target_.push_back(Target(target(),training_index(k)));
00327 validation_target_.push_back(Target(target(),validation_index(k)));
00328
00329 if (f_selector_){
00330 if (kernel.weighted()){
00331 MatrixLookupWeighted ml = kernel.data_weighted();
00332 f_selector_->update(MatrixLookupWeighted(ml,
00333 utility::Index(ml.rows()),
00334 training_index(k)),
00335 training_target(k));
00336 }
00337 else {
00338 MatrixLookup ml=kernel.data();
00339 f_selector_->update(MatrixLookup(ml,training_index(k), false),
00340 training_target(k));
00341 }
00342 features_.push_back(f_selector_->features());
00343 KernelLookup kl = kernel.selected(features_.back());
00344
00345 training_data_.push_back(new KernelLookup(kl,training_index(k),
00346 training_index(k)));
00347 validation_data_.push_back(new KernelLookup(kl, training_index(k),
00348 validation_index(k)));
00349 }
00350 else {
00351 training_data_.push_back(new KernelLookup(kernel, training_index(k),
00352 training_index(k)));
00353 validation_data_.push_back(new KernelLookup(kernel,
00354 training_index(k),
00355 validation_index(k)));
00356 }
00357
00358 }
00359 if (!f_selector_){
00360 if (kernel.weighted())
00361 features_.push_back(utility::Index(kernel.data_weighted().rows()));
00362 else
00363 features_.push_back(utility::Index(kernel.data().rows()));
00364 }
00365 }
00366
00367
00368 template<typename Data>
00369 size_t SubsetGenerator<Data>::size(void) const
00370 {
00371 return sampler_.size();
00372 }
00373
00374
00375 template<typename Data>
00376 const Target& SubsetGenerator<Data>::target(void) const
00377 {
00378 return sampler_.target();
00379 }
00380
00381
00382 template<typename Data>
00383 const Data&
00384 SubsetGenerator<Data>::training_data(size_t i) const
00385 {
00386 return *(training_data_[i]);
00387 }
00388
00389
00390 template<typename Data>
00391 const utility::Index&
00392 SubsetGenerator<Data>::training_features(size_t i) const
00393 {
00394 YAT_ASSERT(features_.size());
00395 return f_selector_ ? features_[i] : features_[0];
00396 }
00397
00398
00399 template<typename Data>
00400 const utility::Index&
00401 SubsetGenerator<Data>::training_index(size_t i) const
00402 {
00403 return sampler_.training_index(i);
00404 }
00405
00406
00407 template<typename Data>
00408 const Target&
00409 SubsetGenerator<Data>::training_target(size_t i) const
00410 {
00411 return training_target_[i];
00412 }
00413
00414
00415 template<typename Data>
00416 const Data&
00417 SubsetGenerator<Data>::validation_data(size_t i) const
00418 {
00419 return *(validation_data_[i]);
00420 }
00421
00422
00423 template<typename Data>
00424 const utility::Index&
00425 SubsetGenerator<Data>::validation_index(size_t i) const
00426 {
00427 return sampler_.validation_index(i);
00428 }
00429
00430
00431 template<typename Data>
00432 const Target&
00433 SubsetGenerator<Data>::validation_target(size_t i) const
00434 {
00435 return validation_target_[i];
00436 }
00437
00438 }}}
00439
00440 #endif