yat  0.12.3pre
NCC.h
1 #ifndef _theplu_yat_classifier_ncc_
2 #define _theplu_yat_classifier_ncc_
3 
4 // $Id: NCC.h 2384 2010-12-22 14:03:36Z peter $
5 
6 /*
7  Copyright (C) 2005 Peter Johansson, Markus Ringnér
8  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2009, 2010 Peter Johansson
10 
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12 
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17 
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 #include "MatrixLookup.h"
28 #include "MatrixLookupWeighted.h"
29 #include "SupervisedClassifier.h"
30 #include "Target.h"
31 
32 #include "yat/statistics/Averager.h"
33 #include "yat/statistics/AveragerWeighted.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/Exception.h"
36 #include "yat/utility/Matrix.h"
37 #include "yat/utility/MatrixWeighted.h"
38 #include "yat/utility/Vector.h"
40 #include "yat/utility/yat_assert.h"
41 
42 #include <boost/concept_check.hpp>
43 
44 #include <iterator>
45 #include <map>
46 #include <cmath>
47 
48 namespace theplu {
49 namespace yat {
50 namespace classifier {
51 
52 
64  template <typename Distance>
65  class NCC : public SupervisedClassifier
66  {
67 
68  public:
74  NCC(void);
75 
83  NCC(const Distance&);
84 
85 
89  virtual ~NCC(void);
90 
96  const utility::Matrix& centroids(void) const;
97 
98  NCC<Distance>* make_classifier(void) const;
99 
117  void predict(const MatrixLookup& data, utility::Matrix& results) const;
118 
140  void predict(const MatrixLookupWeighted& data, utility::Matrix& results) const;
141 
150  void train(const MatrixLookup& data, const Target& targets);
151 
152 
165  void train(const MatrixLookupWeighted& data, const Target& targets);
166 
167 
168  private:
169 
170  void predict_unweighted(const MatrixLookup&, utility::Matrix&) const;
171  void predict_weighted(const MatrixLookupWeighted&, utility::Matrix&) const;
172 
173  utility::Matrix centroids_;
174  bool centroids_nan_;
175  Distance distance_;
176  };
177 
178  // templates
179 
180  template <typename Distance>
182  : SupervisedClassifier(), centroids_nan_(false)
183  {
184  BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
185  }
186 
187  template <typename Distance>
188  NCC<Distance>::NCC(const Distance& dist)
189  : SupervisedClassifier(), centroids_nan_(false), distance_(dist)
190  {
191  BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
192  }
193 
194 
195  template <typename Distance>
197  {
198  }
199 
200 
201  template <typename Distance>
203  {
204  return centroids_;
205  }
206 
207 
208  template <typename Distance>
209  NCC<Distance>*
211  {
212  // All private members should be copied here to generate an
213  // identical but untrained classifier
214  return new NCC<Distance>(distance_);
215  }
216 
217  template <typename Distance>
218  void NCC<Distance>::train(const MatrixLookup& data, const Target& target)
219  {
220  centroids_.resize(data.rows(), target.nof_classes());
221  for(size_t i=0; i<data.rows(); i++) {
222  std::vector<statistics::Averager> class_averager;
223  class_averager.resize(target.nof_classes());
224  for(size_t j=0; j<data.columns(); j++) {
225  class_averager[target(j)].add(data(i,j));
226  }
227  for(size_t c=0;c<target.nof_classes();c++) {
228  centroids_(i,c) = class_averager[c].mean();
229  }
230  }
231  }
232 
233 
234  template <typename Distance>
235  void NCC<Distance>::train(const MatrixLookupWeighted& data, const Target& target)
236  {
237  centroids_.resize(data.rows(), target.nof_classes());
238  for(size_t i=0; i<data.rows(); i++) {
239  std::vector<statistics::AveragerWeighted> class_averager;
240  class_averager.resize(target.nof_classes());
241  for(size_t j=0; j<data.columns(); j++)
242  class_averager[target(j)].add(data.data(i,j),data.weight(i,j));
243  for(size_t c=0;c<target.nof_classes();c++) {
244  if(class_averager[c].sum_w()==0) {
245  centroids_nan_=true;
246  }
247  centroids_(i,c) = class_averager[c].mean();
248  }
249  }
250  }
251 
252 
253  template <typename Distance>
255  utility::Matrix& prediction) const
256  {
257  utility::yat_assert<utility::runtime_error>
258  (centroids_.rows()==test.rows(),
259  "NCC::predict test data with incorrect number of rows");
260 
261  prediction.resize(centroids_.columns(), test.columns());
262 
263  // If weighted training data has resulted in NaN in centroids: weighted calculations
264  if(centroids_nan_) {
265  predict_weighted(MatrixLookupWeighted(test),prediction);
266  }
267  // If unweighted training data: unweighted calculations
268  else {
269  predict_unweighted(test,prediction);
270  }
271  }
272 
273  template <typename Distance>
275  utility::Matrix& prediction) const
276  {
277  utility::yat_assert<utility::runtime_error>
278  (centroids_.rows()==test.rows(),
279  "NCC::predict test data with incorrect number of rows");
280 
281  prediction.resize(centroids_.columns(), test.columns());
282  predict_weighted(test,prediction);
283  }
284 
285 
286  template <typename Distance>
288  utility::Matrix& prediction) const
289  {
290  for(size_t j=0; j<test.columns();j++)
291  for(size_t k=0; k<centroids_.columns();k++)
292  prediction(k,j) = distance_(test.begin_column(j), test.end_column(j),
293  centroids_.begin_column(k));
294  }
295 
296  template <typename Distance>
297  void NCC<Distance>::predict_weighted(const MatrixLookupWeighted& test,
298  utility::Matrix& prediction) const
299  {
300  utility::MatrixWeighted weighted_centroids(centroids_);
301  for(size_t j=0; j<test.columns();j++)
302  for(size_t k=0; k<centroids_.columns();k++)
303  prediction(k,j) = distance_(test.begin_column(j), test.end_column(j),
304  weighted_centroids.begin_column(k));
305  }
306 
307 
308 }}} // of namespace classifier, yat, and theplu
309 
310 #endif
General view into utility::Matrix.
Definition: MatrixLookup.h:70
const utility::Matrix & centroids(void) const
Get the centroids for all classes.
Definition: NCC.h:202
Class for containing sample labels.
Definition: Target.h:47
virtual ~NCC(void)
Definition: NCC.h:196
void resize(size_t r, size_t c, double init_value=0)
Resize Matrix.
void predict(const MatrixLookup &data, utility::Matrix &results) const
Make predictions for unweighted test data.
Definition: NCC.h:254
NCC< Distance > * make_classifier(void) const
Create an untrained copy of the classifier.
Definition: NCC.h:210
Nearest Centroid Classifier.
Definition: NCC.h:65
Interface class for supervised classifiers that use data in a matrix format.
Definition: SupervisedClassifier.h:56
double weight(size_t row, size_t column) const
General view into utility::MatrixWeighted.
Definition: MatrixLookupWeighted.h:63
void train(const MatrixLookup &data, const Target &targets)
Train the NCC using unweighted training data with known targets.
Definition: NCC.h:218
const_column_iterator end_column(size_t) const
Concept check for a Distance.
Definition: concept_check.h:273
Interface to GSL matrix.
Definition: Matrix.h:63
Weighted Matrix.
Definition: MatrixWeighted.h:44
const_column_iterator begin_column(size_t) const
NCC(void)
Constructor.
Definition: NCC.h:181
size_t nof_classes(void) const
double data(size_t row, size_t column) const

Generated on Mon Jun 1 2015 12:29:51 for yat by  doxygen 1.8.5