yat  0.8.3pre
SubsetGenerator.h
00001 #ifndef _theplu_yat_classifier_subset_generator_ 
00002 #define _theplu_yat_classifier_subset_generator_ 
00003 
00004 // $Id: SubsetGenerator.h 2226 2010-03-24 14:40:02Z peter $
00005 
00006 /*
00007   Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
00008   Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
00009   Copyright (C) 2009, 2010 Peter Johansson
00010 
00011   This file is part of the yat library, http://dev.thep.lu.se/yat
00012 
00013   The yat library is free software; you can redistribute it and/or
00014   modify it under the terms of the GNU General Public License as
00015   published by the Free Software Foundation; either version 3 of the
00016   License, or (at your option) any later version.
00017 
00018   The yat library is distributed in the hope that it will be useful,
00019   but WITHOUT ANY WARRANTY; without even the implied warranty of
00020   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00021   General Public License for more details.
00022 
00023   You should have received a copy of the GNU General Public License
00024   along with yat. If not, see <http://www.gnu.org/licenses/>.
00025 */
00026 
00027 #include "FeatureSelector.h"
00028 #include "KernelLookup.h"
00029 #include "MatrixLookup.h"
00030 #include "MatrixLookupWeighted.h"
00031 #include "Target.h"
00032 #include "Sampler.h"
00033 #include "yat/utility/Index.h"
00034 #include "yat/utility/yat_assert.h"
00035 
00036 #include <algorithm>
00037 #include <utility>
00038 #include <typeinfo>
00039 #include <vector>
00040 
00041 namespace theplu {
00042 namespace yat {
00043 namespace classifier {  
00056   template <typename Data> 
00057   class SubsetGenerator
00058   {
00059   public:
00063     typedef Data value_type;
00064 
00091     SubsetGenerator(const Sampler& sampler, const Data& data);
00092 
00131     SubsetGenerator(const Sampler& sampler, const Data& data, 
00132                     FeatureSelector& fs);
00133 
00137     ~SubsetGenerator();
00138   
00142     size_t size(void) const;
00143 
00147     const Target& target(void) const;
00148 
00155     const Data& training_data(size_t i) const;
00156 
00163     const utility::Index& training_features(size_t i) const;
00164 
00168     const utility::Index& training_index(size_t i) const;
00169 
00173     const Target& training_target(size_t i) const;
00174 
00181     const Data& validation_data(size_t i) const;
00182 
00186     const utility::Index& validation_index(size_t i) const;
00187 
00191     const Target& validation_target(size_t i) const;
00192 
00193   private:
00194     void build(const MatrixLookup&);
00195     void build(const MatrixLookupWeighted&);
00196     void build(const KernelLookup&);
00197 
00198     SubsetGenerator(const SubsetGenerator&);
00199     const SubsetGenerator& operator=(const SubsetGenerator&) const;
00200 
00201     FeatureSelector* f_selector_;
00202     std::vector<utility::Index > features_;
00203     const Sampler& sampler_;
00204     std::vector<const Data*> training_data_;
00205     std::vector<Target> training_target_;
00206     std::vector<const Data*> validation_data_;
00207     std::vector<Target> validation_target_;
00208 
00209   };
00210 
00211 
00212   // templates
00213 
00214   template<typename Data>
00215   SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler, 
00216                                          const Data& data)
00217     : f_selector_(NULL), sampler_(sampler)
00218   { 
00219     YAT_ASSERT(target().size()==data.columns());
00220 
00221     training_data_.reserve(sampler_.size());
00222     validation_data_.reserve(sampler_.size());
00223     build(data);
00224     YAT_ASSERT(training_data_.size()==size());
00225     YAT_ASSERT(training_target_.size()==size());
00226     YAT_ASSERT(validation_data_.size()==size());
00227     YAT_ASSERT(validation_target_.size()==size());
00228   }
00229 
00230 
00231   template<typename Data>
00232   SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler, 
00233                                       const Data& data, 
00234                                       FeatureSelector& fs)
00235     : f_selector_(&fs), sampler_(sampler)
00236   { 
00237     YAT_ASSERT(target().size()==data.columns());
00238     features_.reserve(size());
00239     training_data_.reserve(size());
00240     validation_data_.reserve(size());
00241     build(data);
00242     YAT_ASSERT(training_data_.size()==size());
00243     YAT_ASSERT(training_target_.size()==size());
00244     YAT_ASSERT(validation_data_.size()==size());
00245     YAT_ASSERT(validation_target_.size()==size());
00246   }
00247 
00248 
00249   template<typename Data>
00250   SubsetGenerator<Data>::~SubsetGenerator()
00251   {
00252     YAT_ASSERT(training_data_.size()==validation_data_.size());
00253     for (size_t i=0; i<training_data_.size(); i++) 
00254       delete training_data_[i];
00255     for (size_t i=0; i<validation_data_.size(); i++) 
00256       delete validation_data_[i];
00257   }
00258 
00259 
00260   template<typename Data>
00261   void SubsetGenerator<Data>::build(const MatrixLookup& ml)
00262   {
00263     if (!f_selector_)// no feature selection
00264       features_.push_back(utility::Index(ml.rows()));
00265 
00266     for (size_t k=0; k<size(); k++){
00267       training_target_.push_back(Target(target(),training_index(k)));
00268       validation_target_.push_back(Target(target(),validation_index(k)));
00269       if (f_selector_){
00270         // training data with no feature selection
00271         const MatrixLookup* train_data_all_feat = 
00272           new MatrixLookup(ml, training_index(k), false);
00273         // use these data to create feature selection
00274         YAT_ASSERT(train_data_all_feat);
00275         f_selector_->update(*train_data_all_feat, training_target(k));
00276         // get features
00277         features_.push_back(f_selector_->features());
00278         YAT_ASSERT(train_data_all_feat);
00279         delete train_data_all_feat;
00280       }
00281       
00282       // Dynamically allocated. Must be deleted in destructor.
00283       training_data_.push_back(new MatrixLookup(ml,features_.back(), 
00284                                                 training_index(k)));
00285       validation_data_.push_back(new MatrixLookup(ml,features_.back(), 
00286                                                   validation_index(k)));      
00287     }
00288 
00289   }
00290 
00291 
00292   template<typename Data>
00293   void SubsetGenerator<Data>::build(const MatrixLookupWeighted& ml)
00294   {
00295     if (!f_selector_)// no feature selection
00296       features_.push_back(utility::Index(ml.rows()));
00297 
00298     for (unsigned long k=0; k<size(); k++){
00299       training_target_.push_back(Target(target(),training_index(k)));
00300       validation_target_.push_back(Target(target(),validation_index(k)));
00301       if (f_selector_){
00302         // training data with no feature selection
00303         const MatrixLookupWeighted* train_data_all_feat = 
00304           new MatrixLookupWeighted(ml, utility::Index(ml.rows()), 
00305                                    training_index(k));
00306         // use these data to create feature selection
00307         f_selector_->update(*train_data_all_feat, training_target(k));
00308         // get features
00309         features_.push_back(f_selector_->features());
00310         delete train_data_all_feat;
00311       }
00312 
00313 
00314       // Dynamically allocated. Must be deleted in destructor.
00315       training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
00316                                                         training_index(k)));
00317       validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
00318                                                           validation_index(k)));
00319     }
00320   }
00321 
00322   template<typename Data>
00323   void SubsetGenerator<Data>::build(const KernelLookup& kernel)
00324   {
00325     for (unsigned long k=0; k<size(); k++){
00326       training_target_.push_back(Target(target(),training_index(k)));
00327       validation_target_.push_back(Target(target(),validation_index(k)));
00328 
00329       if (f_selector_){
00330         if (kernel.weighted()){
00331           MatrixLookupWeighted ml = kernel.data_weighted();
00332           f_selector_->update(MatrixLookupWeighted(ml, 
00333                                                    utility::Index(ml.rows()),
00334                                                    training_index(k)), 
00335                               training_target(k));
00336         }
00337         else {
00338           MatrixLookup ml=kernel.data();
00339           f_selector_->update(MatrixLookup(ml,training_index(k), false), 
00340                               training_target(k));
00341         } 
00342         features_.push_back(f_selector_->features());
00343         KernelLookup kl = kernel.selected(features_.back());
00344         // Dynamically allocated. Must be deleted in destructor.
00345         training_data_.push_back(new KernelLookup(kl,training_index(k),
00346                                                   training_index(k)));
00347         validation_data_.push_back(new KernelLookup(kl, training_index(k), 
00348                                                     validation_index(k)));
00349       }
00350       else {// no feature selection
00351         training_data_.push_back(new KernelLookup(kernel, training_index(k),
00352                                                   training_index(k)));
00353         validation_data_.push_back(new KernelLookup(kernel, 
00354                                                     training_index(k), 
00355                                                     validation_index(k)));
00356       }
00357       
00358     }
00359     if (!f_selector_){
00360       if (kernel.weighted())
00361         features_.push_back(utility::Index(kernel.data_weighted().rows()));
00362       else
00363         features_.push_back(utility::Index(kernel.data().rows()));
00364     }
00365   }
00366 
00367 
00368   template<typename Data>
00369   size_t SubsetGenerator<Data>::size(void) const
00370   {
00371     return sampler_.size();
00372   }
00373 
00374 
00375   template<typename Data>
00376   const Target& SubsetGenerator<Data>::target(void) const
00377   {
00378     return sampler_.target();
00379   }
00380 
00381 
00382   template<typename Data>
00383   const Data&
00384   SubsetGenerator<Data>::training_data(size_t i) const 
00385   {
00386     return *(training_data_[i]);
00387   }
00388 
00389 
00390   template<typename Data>
00391   const utility::Index&
00392   SubsetGenerator<Data>::training_features(size_t i) const
00393   {
00394     YAT_ASSERT(features_.size());
00395     return f_selector_ ? features_[i] : features_[0];
00396   }
00397 
00398 
00399   template<typename Data>
00400   const utility::Index&
00401   SubsetGenerator<Data>::training_index(size_t i) const
00402   {
00403     return sampler_.training_index(i);
00404   }
00405 
00406 
00407   template<typename Data>
00408   const Target&
00409   SubsetGenerator<Data>::training_target(size_t i) const
00410   {
00411     return training_target_[i];
00412   }
00413 
00414 
00415   template<typename Data>
00416   const Data&
00417   SubsetGenerator<Data>::validation_data(size_t i) const
00418   {
00419     return *(validation_data_[i]);
00420   }
00421 
00422 
00423   template<typename Data>
00424   const utility::Index&
00425   SubsetGenerator<Data>::validation_index(size_t i) const
00426   {
00427     return sampler_.validation_index(i);
00428   }
00429 
00430 
00431   template<typename Data>
00432   const Target&
00433   SubsetGenerator<Data>::validation_target(size_t i) const
00434   {
00435     return validation_target_[i];
00436   }
00437 
00438 }}} // of namespace classifier, yat, and theplu
00439 
00440 #endif

Generated on Thu Dec 20 2012 03:12:57 for yat by  doxygen 1.8.0-20120409