yat  0.14.5pre
NCC.h
1 #ifndef _theplu_yat_classifier_ncc_
2 #define _theplu_yat_classifier_ncc_
3 
4 // $Id: NCC.h 3562 2017-01-04 01:16:07Z peter $
5 
6 /*
7  Copyright (C) 2005 Peter Johansson, Markus Ringnér
8  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2009, 2010, 2017 Peter Johansson
10 
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12 
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17 
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 #include "MatrixLookup.h"
28 #include "MatrixLookupWeighted.h"
29 #include "SupervisedClassifier.h"
30 #include "Target.h"
31 
32 #include "yat/statistics/Averager.h"
33 #include "yat/statistics/AveragerWeighted.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/Exception.h"
36 #include "yat/utility/Matrix.h"
37 #include "yat/utility/MatrixWeighted.h"
38 #include "yat/utility/Vector.h"
40 #include "yat/utility/yat_assert.h"
41 
42 #include <boost/concept_check.hpp>
43 
44 #include <iterator>
45 #include <map>
46 #include <cmath>
47 
48 namespace theplu {
49 namespace yat {
50 namespace classifier {
51 
52 
64  template <typename Distance>
65  class NCC : public SupervisedClassifier
66  {
67 
68  public:
74  NCC(void);
75 
83  NCC(const Distance&);
84 
85 
89  virtual ~NCC(void);
90 
96  const utility::Matrix& centroids(void) const;
97 
98  NCC<Distance>* make_classifier(void) const;
99 
117  void predict(const MatrixLookup& data, utility::Matrix& results) const;
118 
140  void predict(const MatrixLookupWeighted& data, utility::Matrix& results) const;
141 
150  void train(const MatrixLookup& data, const Target& targets);
151 
152 
165  void train(const MatrixLookupWeighted& data, const Target& targets);
166 
167 
168  private:
169 
170  void predict_unweighted(const MatrixLookup&, utility::Matrix&) const;
171  void predict_weighted(const MatrixLookupWeighted&, utility::Matrix&) const;
172 
173  utility::Matrix centroids_;
174  bool centroids_nan_;
175  Distance distance_;
176  };
177 
178  // templates
179 
180  template <typename Distance>
182  : SupervisedClassifier(), centroids_nan_(false)
183  {
184  BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
185  }
186 
187  template <typename Distance>
188  NCC<Distance>::NCC(const Distance& dist)
189  : SupervisedClassifier(), centroids_nan_(false), distance_(dist)
190  {
191  BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
192  }
193 
194 
195  template <typename Distance>
197  {
198  }
199 
200 
201  template <typename Distance>
203  {
204  return centroids_;
205  }
206 
207 
208  template <typename Distance>
211  {
212  // All private members should be copied here to generate an
213  // identical but untrained classifier
214  return new NCC<Distance>(distance_);
215  }
216 
217  template <typename Distance>
218  void NCC<Distance>::train(const MatrixLookup& data, const Target& target)
219  {
220  centroids_.resize(data.rows(), target.nof_classes());
221  for(size_t i=0; i<data.rows(); i++) {
222  std::vector<statistics::Averager> class_averager;
223  class_averager.resize(target.nof_classes());
224  for(size_t j=0; j<data.columns(); j++) {
225  class_averager[target(j)].add(data(i,j));
226  }
227  for(size_t c=0;c<target.nof_classes();c++) {
228  centroids_(i,c) = class_averager[c].mean();
229  }
230  }
231  }
232 
233 
234  template <typename Distance>
235  void NCC<Distance>::train(const MatrixLookupWeighted& data, const Target& target)
236  {
237  centroids_.resize(data.rows(), target.nof_classes());
238  for(size_t i=0; i<data.rows(); i++) {
239  std::vector<statistics::AveragerWeighted> class_averager;
240  class_averager.resize(target.nof_classes());
241  for(size_t j=0; j<data.columns(); j++)
242  class_averager[target(j)].add(data.data(i,j),data.weight(i,j));
243  for(size_t c=0;c<target.nof_classes();c++) {
244  if(class_averager[c].sum_w()==0) {
245  centroids_nan_=true;
246  }
247  centroids_(i,c) = class_averager[c].mean();
248  }
249  }
250  }
251 
252 
253  template <typename Distance>
255  utility::Matrix& prediction) const
256  {
257  utility::yat_assert<utility::runtime_error>
258  (centroids_.rows()==test.rows(),
259  "NCC::predict test data with incorrect number of rows");
260 
261  prediction.resize(centroids_.columns(), test.columns());
262 
263  // If weighted training data has resulted in NaN in centroids:
264  // weighted calculations
265  if(centroids_nan_) {
266  predict_weighted(MatrixLookupWeighted(test),prediction);
267  }
268  // If unweighted training data: unweighted calculations
269  else {
270  predict_unweighted(test,prediction);
271  }
272  }
273 
274  template <typename Distance>
276  utility::Matrix& prediction) const
277  {
278  utility::yat_assert<utility::runtime_error>
279  (centroids_.rows()==test.rows(),
280  "NCC::predict test data with incorrect number of rows");
281 
282  prediction.resize(centroids_.columns(), test.columns());
283  predict_weighted(test,prediction);
284  }
285 
286 
287  template <typename Distance>
289  utility::Matrix& prediction) const
290  {
291  for(size_t j=0; j<test.columns();j++)
292  for(size_t k=0; k<centroids_.columns();k++)
293  prediction(k,j) = distance_(test.begin_column(j), test.end_column(j),
294  centroids_.begin_column(k));
295  }
296 
297  template <typename Distance>
298  void NCC<Distance>::predict_weighted(const MatrixLookupWeighted& test,
299  utility::Matrix& prediction) const
300  {
301  utility::MatrixWeighted weighted_centroids(centroids_);
302  for(size_t j=0; j<test.columns();j++)
303  for(size_t k=0; k<centroids_.columns();k++)
304  prediction(k,j) = distance_(test.begin_column(j), test.end_column(j),
305  weighted_centroids.begin_column(k));
306  }
307 
308 
309 }}} // of namespace classifier, yat, and theplu
310 
311 #endif
General view into utility::Matrix.
Definition: MatrixLookup.h:70
const utility::Matrix & centroids(void) const
Get the centroids for all classes.
Definition: NCC.h:202
Class for containing sample labels.
Definition: Target.h:47
virtual ~NCC(void)
Definition: NCC.h:196
void resize(size_t r, size_t c, double init_value=0)
Resize Matrix.
void predict(const MatrixLookup &data, utility::Matrix &results) const
Make predictions for unweighted test data.
Definition: NCC.h:254
NCC< Distance > * make_classifier(void) const
Create an untrained copy of the classifier.
Definition: NCC.h:210
Nearest Centroid Classifier.
Definition: NCC.h:65
Interface class for supervised classifiers that use data in a matrix format.
Definition: SupervisedClassifier.h:56
double weight(size_t row, size_t column) const
General view into utility::MatrixWeighted.
Definition: MatrixLookupWeighted.h:63
void train(const MatrixLookup &data, const Target &targets)
Train the NCC using unweighted training data with known targets.
Definition: NCC.h:218
const_column_iterator end_column(size_t) const
Concept check for a Distance.
Definition: concept_check.h:278
Interface to GSL matrix.
Definition: Matrix.h:63
Weighted Matrix.
Definition: MatrixWeighted.h:44
const_column_iterator begin_column(size_t) const
NCC(void)
Constructor.
Definition: NCC.h:181
size_t nof_classes(void) const
double data(size_t row, size_t column) const

Generated on Tue Sep 26 2017 02:33:29 for yat by  doxygen 1.8.5