yat  0.21pre
SubsetGenerator.h
1 #ifndef _theplu_yat_classifier_subset_generator_
2 #define _theplu_yat_classifier_subset_generator_
3 
4 // $Id: SubsetGenerator.h 4207 2022-08-26 04:36:28Z peter $
5 
6 /*
7  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
8  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2009, 2010, 2022 Peter Johansson
10 
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12 
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17 
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 #include "FeatureSelector.h"
28 #include "KernelLookup.h"
29 #include "MatrixLookup.h"
30 #include "MatrixLookupWeighted.h"
31 #include "Target.h"
32 #include "Sampler.h"
33 #include "yat/utility/Index.h"
34 #include "yat/utility/yat_assert.h"
35 
36 #include <algorithm>
37 #include <utility>
38 #include <typeinfo>
39 #include <vector>
40 
41 namespace theplu {
42 namespace yat {
43 namespace classifier {
56  template <typename Data>
58  {
59  public:
63  typedef Data value_type;
64 
91  SubsetGenerator(const Sampler& sampler, const Data& data);
92 
131  SubsetGenerator(const Sampler& sampler, const Data& data,
132  FeatureSelector& fs);
133 
138 
142  size_t size(void) const;
143 
147  const Target& target(void) const;
148 
155  const Data& training_data(size_t i) const;
156 
163  const utility::Index& training_features(size_t i) const;
164 
168  const utility::Index& training_index(size_t i) const;
169 
173  const Target& training_target(size_t i) const;
174 
181  const Data& validation_data(size_t i) const;
182 
186  const utility::Index& validation_index(size_t i) const;
187 
191  const Target& validation_target(size_t i) const;
192 
193  private:
194  void build(const MatrixLookup&);
195  void build(const MatrixLookupWeighted&);
196  void build(const KernelLookup&);
197 
199  const SubsetGenerator& operator=(const SubsetGenerator&) const;
200 
201  FeatureSelector* f_selector_;
202  std::vector<utility::Index > features_;
203  const Sampler& sampler_;
204  std::vector<const Data*> training_data_;
205  std::vector<Target> training_target_;
206  std::vector<const Data*> validation_data_;
207  std::vector<Target> validation_target_;
208 
209  };
210 
211 
212  // templates
213 
214  template<typename Data>
216  const Data& data)
217  : f_selector_(NULL), sampler_(sampler)
218  {
219  YAT_ASSERT(target().size()==data.columns());
220 
221  training_data_.reserve(sampler_.size());
222  validation_data_.reserve(sampler_.size());
223  build(data);
224  YAT_ASSERT(training_data_.size()==size());
225  YAT_ASSERT(training_target_.size()==size());
226  YAT_ASSERT(validation_data_.size()==size());
227  YAT_ASSERT(validation_target_.size()==size());
228  }
229 
230 
231  template<typename Data>
233  const Data& data,
234  FeatureSelector& fs)
235  : f_selector_(&fs), sampler_(sampler)
236  {
237  YAT_ASSERT(target().size()==data.columns());
238  features_.reserve(size());
239  training_data_.reserve(size());
240  validation_data_.reserve(size());
241  build(data);
242  YAT_ASSERT(training_data_.size()==size());
243  YAT_ASSERT(training_target_.size()==size());
244  YAT_ASSERT(validation_data_.size()==size());
245  YAT_ASSERT(validation_target_.size()==size());
246  }
247 
248 
249  template<typename Data>
251  {
252  YAT_ASSERT(training_data_.size()==validation_data_.size());
253  for (size_t i=0; i<training_data_.size(); i++)
254  delete training_data_[i];
255  for (size_t i=0; i<validation_data_.size(); i++)
256  delete validation_data_[i];
257  }
258 
259 
260  template<typename Data>
262  {
263  if (!f_selector_)// no feature selection
264  features_.push_back(utility::Index(ml.rows()));
265 
266  for (size_t k=0; k<size(); k++){
267  training_target_.push_back(Target(target(),training_index(k)));
268  validation_target_.push_back(Target(target(),validation_index(k)));
269  if (f_selector_){
270  // training data with no feature selection
271  const MatrixLookup* train_data_all_feat =
272  new MatrixLookup(ml, training_index(k), false);
273  // use these data to create feature selection
274  YAT_ASSERT(train_data_all_feat);
275  f_selector_->update(*train_data_all_feat, training_target(k));
276  // get features
277  features_.push_back(f_selector_->features());
278  YAT_ASSERT(train_data_all_feat);
279  delete train_data_all_feat;
280  }
281 
282  // Dynamically allocated. Must be deleted in destructor.
283  training_data_.push_back(new MatrixLookup(ml,features_.back(),
284  training_index(k)));
285  validation_data_.push_back(new MatrixLookup(ml,features_.back(),
286  validation_index(k)));
287  }
288 
289  }
290 
291 
292  template<typename Data>
293  void SubsetGenerator<Data>::build(const MatrixLookupWeighted& ml)
294  {
295  if (!f_selector_)// no feature selection
296  features_.push_back(utility::Index(ml.rows()));
297 
298  for (unsigned long k=0; k<size(); k++){
299  training_target_.push_back(Target(target(),training_index(k)));
300  validation_target_.push_back(Target(target(),validation_index(k)));
301  if (f_selector_){
302  // training data with no feature selection
303  const MatrixLookupWeighted* train_data_all_feat =
304  new MatrixLookupWeighted(ml, utility::Index(ml.rows()),
305  training_index(k));
306  // use these data to create feature selection
307  f_selector_->update(*train_data_all_feat, training_target(k));
308  // get features
309  features_.push_back(f_selector_->features());
310  delete train_data_all_feat;
311  }
312 
313 
314  // Dynamically allocated. Must be deleted in destructor.
315  training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(),
316  training_index(k)));
317  validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(),
318  validation_index(k)));
319  }
320  }
321 
322  template<typename Data>
323  void SubsetGenerator<Data>::build(const KernelLookup& kernel)
324  {
325  for (unsigned long k=0; k<size(); k++){
326  training_target_.push_back(Target(target(),training_index(k)));
327  validation_target_.push_back(Target(target(),validation_index(k)));
328 
329  if (f_selector_){
330  if (kernel.weighted()){
331  MatrixLookupWeighted ml = kernel.data_weighted();
332  f_selector_->update(MatrixLookupWeighted(ml,
333  utility::Index(ml.rows()),
334  training_index(k)),
335  training_target(k));
336  }
337  else {
338  MatrixLookup ml=kernel.data();
339  f_selector_->update(MatrixLookup(ml,training_index(k), false),
340  training_target(k));
341  }
342  features_.push_back(f_selector_->features());
343  KernelLookup kl = kernel.selected(features_.back());
344  // Dynamically allocated. Must be deleted in destructor.
345  training_data_.push_back(new KernelLookup(kl,training_index(k),
346  training_index(k)));
347  validation_data_.push_back(new KernelLookup(kl, training_index(k),
348  validation_index(k)));
349  }
350  else {// no feature selection
351  training_data_.push_back(new KernelLookup(kernel, training_index(k),
352  training_index(k)));
353  validation_data_.push_back(new KernelLookup(kernel,
354  training_index(k),
355  validation_index(k)));
356  }
357 
358  }
359  if (!f_selector_){
360  if (kernel.weighted())
361  features_.push_back(utility::Index(kernel.data_weighted().rows()));
362  else
363  features_.push_back(utility::Index(kernel.data().rows()));
364  }
365  }
366 
367 
368  template<typename Data>
369  size_t SubsetGenerator<Data>::size(void) const
370  {
371  return sampler_.size();
372  }
373 
374 
375  template<typename Data>
377  {
378  return sampler_.target();
379  }
380 
381 
382  template<typename Data>
383  const Data&
385  {
386  return *(training_data_[i]);
387  }
388 
389 
390  template<typename Data>
391  const utility::Index&
393  {
394  YAT_ASSERT(features_.size());
395  return f_selector_ ? features_[i] : features_[0];
396  }
397 
398 
399  template<typename Data>
400  const utility::Index&
402  {
403  return sampler_.training_index(i);
404  }
405 
406 
407  template<typename Data>
408  const Target&
410  {
411  return training_target_[i];
412  }
413 
414 
415  template<typename Data>
416  const Data&
418  {
419  return *(validation_data_[i]);
420  }
421 
422 
423  template<typename Data>
424  const utility::Index&
426  {
427  return sampler_.validation_index(i);
428  }
429 
430 
431  template<typename Data>
432  const Target&
434  {
435  return validation_target_[i];
436  }
437 
438 }}} // of namespace classifier, yat, and theplu
439 
440 #endif
General view into utility::Matrix.
Definition: MatrixLookup.h:70
const utility::Index & training_features(size_t i) const
Definition: SubsetGenerator.h:392
Class for containing sample labels.
Definition: Target.h:47
Data value_type
Definition: SubsetGenerator.h:63
The Department of Theoretical Physics namespace as we define it.
const Target & validation_target(size_t i) const
Definition: SubsetGenerator.h:433
const utility::Index & validation_index(size_t i) const
Definition: SubsetGenerator.h:425
const utility::Index & training_index(size_t i) const
Definition: SubsetGenerator.h:401
Interface class for FeatureSelection.
Definition: FeatureSelector.h:40
Class splitting Data into training and validation set.
Definition: SubsetGenerator.h:57
~SubsetGenerator()
Definition: SubsetGenerator.h:250
Class for storing indices of, e.g., a MatrixLookup.
Definition: Index.h:41
General view into utility::MatrixWeighted.
Definition: MatrixLookupWeighted.h:63
const Data & training_data(size_t i) const
Definition: SubsetGenerator.h:384
Lookup into Kernel.
Definition: KernelLookup.h:67
const Target & target(void) const
Definition: SubsetGenerator.h:376
const Data & validation_data(size_t i) const
Definition: SubsetGenerator.h:417
SubsetGenerator(const Sampler &sampler, const Data &data)
Create SubDataSets.
Definition: SubsetGenerator.h:215
Interface class for dividing samples into training and validation.
Definition: Sampler.h:40
size_t size(void) const
Definition: SubsetGenerator.h:369
const Target & training_target(size_t i) const
Definition: SubsetGenerator.h:409

Generated on Wed Jan 25 2023 03:34:29 for yat by  doxygen 1.8.14