yat  0.13.2pre
EnsembleBuilder.h
1 #ifndef _theplu_yat_classifier_ensemblebuilder_
2 #define _theplu_yat_classifier_ensemblebuilder_
3 
4 // $Id: EnsembleBuilder.h 2226 2010-03-24 14:40:02Z peter $
5 
6 /*
7  Copyright (C) 2005 Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
11  Copyright (C) 2009 Jari Häkkinen, Peter Johansson
12  Copyright (C) 2010 Peter Johansson
13 
14  This file is part of the yat library, http://dev.thep.lu.se/yat
15 
16  The yat library is free software; you can redistribute it and/or
17  modify it under the terms of the GNU General Public License as
18  published by the Free Software Foundation; either version 3 of the
19  License, or (at your option) any later version.
20 
21  The yat library is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24  General Public License for more details.
25 
26  You should have received a copy of the GNU General Public License
27  along with yat. If not, see <http://www.gnu.org/licenses/>.
28 */
29 
30 #include "FeatureSelector.h"
31 #include "Sampler.h"
32 #include "SubsetGenerator.h"
33 #include "yat/statistics/Averager.h"
34 #include "yat/utility/Matrix.h"
35 #include "yat/utility/yat_assert.h"
36 
37 #include <vector>
38 
39 namespace theplu {
40 namespace yat {
41 namespace classifier {
42 
46  template <class Classifier, class Data>
48  {
49  public:
53  typedef Classifier classifier_type;
54 
59  typedef Data data_type;
60 
64  EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
65 
69  EnsembleBuilder(const Classifier&, const Data&, const Sampler&,
71 
75  virtual ~EnsembleBuilder(void);
76 
82  void build(void);
83 
87  const Classifier& classifier(size_t i) const;
88 
93  unsigned long size(void) const;
94 
100  const std::vector<std::vector<statistics::Averager> >& validate(void);
101 
111  void predict(const Data& data,
112  std::vector<std::vector<statistics::Averager> > &);
113 
114  private:
115  // no copying
117  const EnsembleBuilder& operator=(const EnsembleBuilder&);
118 
119 
120  const Classifier& mother_;
121  SubsetGenerator<Data>* subset_;
122  std::vector<Classifier*> classifier_;
123  KernelLookup test_data(const KernelLookup&, size_t k);
124  MatrixLookup test_data(const MatrixLookup&, size_t k);
125  MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
126  std::vector<std::vector<statistics::Averager> > validation_result_;
127 
128  };
129 
130 
131  // implementation
132 
133  template <class Classifier, class Data>
135  const Data& data,
136  const Sampler& sampler)
137  : mother_(sc),subset_(new SubsetGenerator<Data>(sampler,data))
138  {
139  }
140 
141 
142  template <class Classifier, class Data>
144  const Data& data,
145  const Sampler& sampler,
146  FeatureSelector& fs)
147  : mother_(sc),
148  subset_(new SubsetGenerator<Data>(sampler,data,fs))
149  {
150  }
151 
152 
153  template <class Classifier, class Data>
155  {
156  for(size_t i=0; i<classifier_.size(); i++)
157  delete classifier_[i];
158  delete subset_;
159  }
160 
161 
162  template <class Classifier, class Data>
164  {
165  if (classifier_.empty()){
166  for(unsigned long i=0; i<subset_->size();++i) {
167  Classifier* classifier = mother_.make_classifier();
168  classifier->train(subset_->training_data(i),
169  subset_->training_target(i));
170  classifier_.push_back(classifier);
171  }
172  }
173  }
174 
175 
176  template <class Classifier, class Data>
177  const Classifier& EnsembleBuilder<Classifier, Data>::classifier(size_t i) const
178  {
179  return *(classifier_[i]);
180  }
181 
182 
183  template <class Classifier, class Data>
185  (const Data& data, std::vector<std::vector<statistics::Averager> >& result)
186  {
187  result = std::vector<std::vector<statistics::Averager> >
188  (subset_->target().nof_classes(),
189  std::vector<statistics::Averager>(data.columns()));
190 
191  utility::Matrix prediction;
192 
193  for(unsigned long k=0;k<size();++k) {
194  Data sub_data = test_data(data, k);
195  classifier(k).predict(sub_data,prediction);
196  for(size_t i=0; i<prediction.rows();i++)
197  for(size_t j=0; j<prediction.columns();j++)
198  result[i][j].add(prediction(i,j));
199  }
200  }
201 
202 
203  template <class Classifier, class Data>
204  unsigned long EnsembleBuilder<Classifier, Data>::size(void) const
205  {
206  return classifier_.size();
207  }
208 
209 
210  template <class Classifier, class Data>
211  MatrixLookup EnsembleBuilder<Classifier,
212  Data>::test_data(const MatrixLookup& data,
213  size_t k)
214  {
215  return MatrixLookup(data, subset_->training_features(k), true);
216  }
217 
218 
219  template <class Classifier, class Data>
220  MatrixLookupWeighted
221  EnsembleBuilder<Classifier, Data>::test_data(const MatrixLookupWeighted& data,
222  size_t k)
223  {
224  return MatrixLookupWeighted(data, subset_->training_features(k),
225  utility::Index(data.columns()));
226  }
227 
228 
229  template <class Classifier, class Data>
230  KernelLookup
231  EnsembleBuilder<Classifier, Data>::test_data(const KernelLookup& kernel,
232  size_t k)
233  {
234  // weighted case
235  if (kernel.weighted()){
236  // no feature selection
237  if (kernel.data_weighted().rows()==subset_->training_features(k).size())
238  return KernelLookup(kernel, subset_->training_index(k), true);
239  MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
240  return subset_->training_data(k).test_kernel(mlw);
241 
242  }
243  // unweighted case
244 
245  // no feature selection
246  if (kernel.data().rows()==subset_->training_features(k).size())
247  return KernelLookup(kernel, subset_->training_index(k), true);
248 
249  // feature selection
250  MatrixLookup ml = test_data(kernel.data(),k);
251  return subset_->training_data(k).test_kernel(ml);
252  }
253 
254 
255  template <class Classifier, class Data>
256  const std::vector<std::vector<statistics::Averager> >&
258  {
259  // Don't recalculate validation_result_
260  if (!validation_result_.empty())
261  return validation_result_;
262 
263  validation_result_ = std::vector<std::vector<statistics::Averager> >
264  (subset_->target().nof_classes(),
265  std::vector<statistics::Averager>(subset_->target().size()));
266 
267  utility::Matrix prediction;
268  for(unsigned long k=0;k<size();k++) {
269  classifier(k).predict(subset_->validation_data(k),prediction);
270 
271  // map results to indices of samples in training + validation data set
272  for(size_t i=0; i<prediction.rows();i++)
273  for(size_t j=0; j<prediction.columns();j++) {
274  validation_result_[i][subset_->validation_index(k)[j]].
275  add(prediction(i,j));
276  }
277  }
278  return validation_result_;
279  }
280 
281 }}} // of namespace classifier, yat, and theplu
282 
283 #endif
General view into utility::Matrix.
Definition: MatrixLookup.h:70
Classifier classifier_type
Type of classifier that ensemble is built on.
Definition: EnsembleBuilder.h:53
unsigned long size(void) const
Definition: EnsembleBuilder.h:204
size_t rows(void) const
Interface class for FeatureSelection.
Definition: FeatureSelector.h:39
void build(void)
Generate ensemble.
Definition: EnsembleBuilder.h:163
Class for ensembles of supervised classifiers.
Definition: EnsembleBuilder.h:47
Class splitting Data into training and validation set.
Definition: SubsetGenerator.h:57
Class for storing indices of, e.g., a MatrixLookup.
Definition: Index.h:41
General view into utility::MatrixWeighted.
Definition: MatrixLookupWeighted.h:63
const Classifier & classifier(size_t i) const
Definition: EnsembleBuilder.h:177
Lookup into Kernel.
Definition: KernelLookup.h:67
EnsembleBuilder(const Classifier &, const Data &, const Sampler &)
Definition: EnsembleBuilder.h:134
void predict(const Data &data, std::vector< std::vector< statistics::Averager > > &)
Definition: EnsembleBuilder.h:185
Interface class for dividing samples into training and validation.
Definition: Sampler.h:39
Interface to GSL matrix.
Definition: Matrix.h:63
const std::vector< std::vector< statistics::Averager > > & validate(void)
Generate validation data for ensemble.
Definition: EnsembleBuilder.h:257
Data data_type
Definition: EnsembleBuilder.h:59
virtual ~EnsembleBuilder(void)
Definition: EnsembleBuilder.h:154
size_t columns(void) const

Generated on Wed Jan 4 2017 02:23:06 for yat by  doxygen 1.8.5