yat
0.8.3pre
|
00001 #ifndef _theplu_yat_classifier_subset_generator_ 00002 #define _theplu_yat_classifier_subset_generator_ 00003 00004 // $Id: SubsetGenerator.h 2226 2010-03-24 14:40:02Z peter $ 00005 00006 /* 00007 Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér 00008 Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson 00009 Copyright (C) 2009, 2010 Peter Johansson 00010 00011 This file is part of the yat library, http://dev.thep.lu.se/yat 00012 00013 The yat library is free software; you can redistribute it and/or 00014 modify it under the terms of the GNU General Public License as 00015 published by the Free Software Foundation; either version 3 of the 00016 License, or (at your option) any later version. 00017 00018 The yat library is distributed in the hope that it will be useful, 00019 but WITHOUT ANY WARRANTY; without even the implied warranty of 00020 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00021 General Public License for more details. 00022 00023 You should have received a copy of the GNU General Public License 00024 along with yat. If not, see <http://www.gnu.org/licenses/>. 00025 */ 00026 00027 #include "FeatureSelector.h" 00028 #include "KernelLookup.h" 00029 #include "MatrixLookup.h" 00030 #include "MatrixLookupWeighted.h" 00031 #include "Target.h" 00032 #include "Sampler.h" 00033 #include "yat/utility/Index.h" 00034 #include "yat/utility/yat_assert.h" 00035 00036 #include <algorithm> 00037 #include <utility> 00038 #include <typeinfo> 00039 #include <vector> 00040 00041 namespace theplu { 00042 namespace yat { 00043 namespace classifier { 00056 template <typename Data> 00057 class SubsetGenerator 00058 { 00059 public: 00063 typedef Data value_type; 00064 00091 SubsetGenerator(const Sampler& sampler, const Data& data); 00092 00131 SubsetGenerator(const Sampler& sampler, const Data& data, 00132 FeatureSelector& fs); 00133 00137 ~SubsetGenerator(); 00138 00142 size_t size(void) const; 00143 00147 const Target& target(void) const; 00148 00155 const Data& training_data(size_t i) const; 00156 00163 const utility::Index& training_features(size_t i) const; 00164 00168 const utility::Index& training_index(size_t i) const; 00169 00173 const Target& training_target(size_t i) const; 00174 00181 const Data& validation_data(size_t i) const; 00182 00186 const utility::Index& validation_index(size_t i) const; 00187 00191 const Target& validation_target(size_t i) const; 00192 00193 private: 00194 void build(const MatrixLookup&); 00195 void build(const MatrixLookupWeighted&); 00196 void build(const KernelLookup&); 00197 00198 SubsetGenerator(const SubsetGenerator&); 00199 const SubsetGenerator& operator=(const SubsetGenerator&) const; 00200 00201 FeatureSelector* f_selector_; 00202 std::vector<utility::Index > features_; 00203 const Sampler& sampler_; 00204 std::vector<const Data*> training_data_; 00205 std::vector<Target> training_target_; 00206 std::vector<const Data*> validation_data_; 00207 std::vector<Target> validation_target_; 00208 00209 }; 00210 00211 00212 // templates 00213 00214 template<typename Data> 00215 SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler, 00216 const Data& data) 00217 : f_selector_(NULL), sampler_(sampler) 00218 { 00219 YAT_ASSERT(target().size()==data.columns()); 00220 00221 training_data_.reserve(sampler_.size()); 00222 validation_data_.reserve(sampler_.size()); 00223 build(data); 00224 YAT_ASSERT(training_data_.size()==size()); 00225 YAT_ASSERT(training_target_.size()==size()); 00226 YAT_ASSERT(validation_data_.size()==size()); 00227 YAT_ASSERT(validation_target_.size()==size()); 00228 } 00229 00230 00231 template<typename Data> 00232 SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler, 00233 const Data& data, 00234 FeatureSelector& fs) 00235 : f_selector_(&fs), sampler_(sampler) 00236 { 00237 YAT_ASSERT(target().size()==data.columns()); 00238 features_.reserve(size()); 00239 training_data_.reserve(size()); 00240 validation_data_.reserve(size()); 00241 build(data); 00242 YAT_ASSERT(training_data_.size()==size()); 00243 YAT_ASSERT(training_target_.size()==size()); 00244 YAT_ASSERT(validation_data_.size()==size()); 00245 YAT_ASSERT(validation_target_.size()==size()); 00246 } 00247 00248 00249 template<typename Data> 00250 SubsetGenerator<Data>::~SubsetGenerator() 00251 { 00252 YAT_ASSERT(training_data_.size()==validation_data_.size()); 00253 for (size_t i=0; i<training_data_.size(); i++) 00254 delete training_data_[i]; 00255 for (size_t i=0; i<validation_data_.size(); i++) 00256 delete validation_data_[i]; 00257 } 00258 00259 00260 template<typename Data> 00261 void SubsetGenerator<Data>::build(const MatrixLookup& ml) 00262 { 00263 if (!f_selector_)// no feature selection 00264 features_.push_back(utility::Index(ml.rows())); 00265 00266 for (size_t k=0; k<size(); k++){ 00267 training_target_.push_back(Target(target(),training_index(k))); 00268 validation_target_.push_back(Target(target(),validation_index(k))); 00269 if (f_selector_){ 00270 // training data with no feature selection 00271 const MatrixLookup* train_data_all_feat = 00272 new MatrixLookup(ml, training_index(k), false); 00273 // use these data to create feature selection 00274 YAT_ASSERT(train_data_all_feat); 00275 f_selector_->update(*train_data_all_feat, training_target(k)); 00276 // get features 00277 features_.push_back(f_selector_->features()); 00278 YAT_ASSERT(train_data_all_feat); 00279 delete train_data_all_feat; 00280 } 00281 00282 // Dynamically allocated. Must be deleted in destructor. 00283 training_data_.push_back(new MatrixLookup(ml,features_.back(), 00284 training_index(k))); 00285 validation_data_.push_back(new MatrixLookup(ml,features_.back(), 00286 validation_index(k))); 00287 } 00288 00289 } 00290 00291 00292 template<typename Data> 00293 void SubsetGenerator<Data>::build(const MatrixLookupWeighted& ml) 00294 { 00295 if (!f_selector_)// no feature selection 00296 features_.push_back(utility::Index(ml.rows())); 00297 00298 for (unsigned long k=0; k<size(); k++){ 00299 training_target_.push_back(Target(target(),training_index(k))); 00300 validation_target_.push_back(Target(target(),validation_index(k))); 00301 if (f_selector_){ 00302 // training data with no feature selection 00303 const MatrixLookupWeighted* train_data_all_feat = 00304 new MatrixLookupWeighted(ml, utility::Index(ml.rows()), 00305 training_index(k)); 00306 // use these data to create feature selection 00307 f_selector_->update(*train_data_all_feat, training_target(k)); 00308 // get features 00309 features_.push_back(f_selector_->features()); 00310 delete train_data_all_feat; 00311 } 00312 00313 00314 // Dynamically allocated. Must be deleted in destructor. 00315 training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 00316 training_index(k))); 00317 validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 00318 validation_index(k))); 00319 } 00320 } 00321 00322 template<typename Data> 00323 void SubsetGenerator<Data>::build(const KernelLookup& kernel) 00324 { 00325 for (unsigned long k=0; k<size(); k++){ 00326 training_target_.push_back(Target(target(),training_index(k))); 00327 validation_target_.push_back(Target(target(),validation_index(k))); 00328 00329 if (f_selector_){ 00330 if (kernel.weighted()){ 00331 MatrixLookupWeighted ml = kernel.data_weighted(); 00332 f_selector_->update(MatrixLookupWeighted(ml, 00333 utility::Index(ml.rows()), 00334 training_index(k)), 00335 training_target(k)); 00336 } 00337 else { 00338 MatrixLookup ml=kernel.data(); 00339 f_selector_->update(MatrixLookup(ml,training_index(k), false), 00340 training_target(k)); 00341 } 00342 features_.push_back(f_selector_->features()); 00343 KernelLookup kl = kernel.selected(features_.back()); 00344 // Dynamically allocated. Must be deleted in destructor. 00345 training_data_.push_back(new KernelLookup(kl,training_index(k), 00346 training_index(k))); 00347 validation_data_.push_back(new KernelLookup(kl, training_index(k), 00348 validation_index(k))); 00349 } 00350 else {// no feature selection 00351 training_data_.push_back(new KernelLookup(kernel, training_index(k), 00352 training_index(k))); 00353 validation_data_.push_back(new KernelLookup(kernel, 00354 training_index(k), 00355 validation_index(k))); 00356 } 00357 00358 } 00359 if (!f_selector_){ 00360 if (kernel.weighted()) 00361 features_.push_back(utility::Index(kernel.data_weighted().rows())); 00362 else 00363 features_.push_back(utility::Index(kernel.data().rows())); 00364 } 00365 } 00366 00367 00368 template<typename Data> 00369 size_t SubsetGenerator<Data>::size(void) const 00370 { 00371 return sampler_.size(); 00372 } 00373 00374 00375 template<typename Data> 00376 const Target& SubsetGenerator<Data>::target(void) const 00377 { 00378 return sampler_.target(); 00379 } 00380 00381 00382 template<typename Data> 00383 const Data& 00384 SubsetGenerator<Data>::training_data(size_t i) const 00385 { 00386 return *(training_data_[i]); 00387 } 00388 00389 00390 template<typename Data> 00391 const utility::Index& 00392 SubsetGenerator<Data>::training_features(size_t i) const 00393 { 00394 YAT_ASSERT(features_.size()); 00395 return f_selector_ ? features_[i] : features_[0]; 00396 } 00397 00398 00399 template<typename Data> 00400 const utility::Index& 00401 SubsetGenerator<Data>::training_index(size_t i) const 00402 { 00403 return sampler_.training_index(i); 00404 } 00405 00406 00407 template<typename Data> 00408 const Target& 00409 SubsetGenerator<Data>::training_target(size_t i) const 00410 { 00411 return training_target_[i]; 00412 } 00413 00414 00415 template<typename Data> 00416 const Data& 00417 SubsetGenerator<Data>::validation_data(size_t i) const 00418 { 00419 return *(validation_data_[i]); 00420 } 00421 00422 00423 template<typename Data> 00424 const utility::Index& 00425 SubsetGenerator<Data>::validation_index(size_t i) const 00426 { 00427 return sampler_.validation_index(i); 00428 } 00429 00430 00431 template<typename Data> 00432 const Target& 00433 SubsetGenerator<Data>::validation_target(size_t i) const 00434 { 00435 return validation_target_[i]; 00436 } 00437 00438 }}} // of namespace classifier, yat, and theplu 00439 00440 #endif