yat  0.21pre
NCC.h
1 #ifndef _theplu_yat_classifier_ncc_
2 #define _theplu_yat_classifier_ncc_
3 
4 // $Id: NCC.h 3701 2017-10-04 00:19:39Z peter $
5 
6 /*
7  Copyright (C) 2005 Peter Johansson, Markus Ringnér
8  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2009, 2010, 2017 Peter Johansson
10 
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12 
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17 
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 #include "MatrixLookup.h"
28 #include "MatrixLookupWeighted.h"
29 #include "SupervisedClassifier.h"
30 #include "Target.h"
31 
32 #include "yat/statistics/Averager.h"
33 #include "yat/statistics/AveragerWeighted.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/Exception.h"
36 #include "yat/utility/Matrix.h"
37 #include "yat/utility/MatrixWeighted.h"
38 #include "yat/utility/Vector.h"
39 #include "yat/utility/yat_assert.h"
40 
41 #include <boost/concept_check.hpp>
42 
43 #include <iterator>
44 #include <map>
45 #include <cmath>
46 
47 namespace theplu {
48 namespace yat {
49 namespace classifier {
50 
51 
63  template <typename Distance>
64  class NCC : public SupervisedClassifier
65  {
66 
67  public:
73  NCC(void);
74 
82  NCC(const Distance&);
83 
84 
88  virtual ~NCC(void);
89 
95  const utility::Matrix& centroids(void) const;
96 
97  NCC<Distance>* make_classifier(void) const;
98 
116  void predict(const MatrixLookup& data, utility::Matrix& results) const;
117 
139  void predict(const MatrixLookupWeighted& data, utility::Matrix& results) const;
140 
149  void train(const MatrixLookup& data, const Target& targets);
150 
151 
164  void train(const MatrixLookupWeighted& data, const Target& targets);
165 
166 
167  private:
168 
169  void predict_unweighted(const MatrixLookup&, utility::Matrix&) const;
170  void predict_weighted(const MatrixLookupWeighted&, utility::Matrix&) const;
171 
172  utility::Matrix centroids_;
173  bool centroids_nan_;
174  Distance distance_;
175  };
176 
177  // templates
178 
179  template <typename Distance>
181  : SupervisedClassifier(), centroids_nan_(false)
182  {
183  BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
184  }
185 
186  template <typename Distance>
187  NCC<Distance>::NCC(const Distance& dist)
188  : SupervisedClassifier(), centroids_nan_(false), distance_(dist)
189  {
190  BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
191  }
192 
193 
194  template <typename Distance>
196  {
197  }
198 
199 
200  template <typename Distance>
202  {
203  return centroids_;
204  }
205 
206 
207  template <typename Distance>
210  {
211  // All private members should be copied here to generate an
212  // identical but untrained classifier
213  return new NCC<Distance>(distance_);
214  }
215 
216  template <typename Distance>
217  void NCC<Distance>::train(const MatrixLookup& data, const Target& target)
218  {
219  centroids_.resize(data.rows(), target.nof_classes());
220  for(size_t i=0; i<data.rows(); i++) {
221  std::vector<statistics::Averager> class_averager;
222  class_averager.resize(target.nof_classes());
223  for(size_t j=0; j<data.columns(); j++) {
224  class_averager[target(j)].add(data(i,j));
225  }
226  for(size_t c=0;c<target.nof_classes();c++) {
227  centroids_(i,c) = class_averager[c].mean();
228  }
229  }
230  }
231 
232 
233  template <typename Distance>
234  void NCC<Distance>::train(const MatrixLookupWeighted& data, const Target& target)
235  {
236  centroids_.resize(data.rows(), target.nof_classes());
237  for(size_t i=0; i<data.rows(); i++) {
238  std::vector<statistics::AveragerWeighted> class_averager;
239  class_averager.resize(target.nof_classes());
240  for(size_t j=0; j<data.columns(); j++)
241  class_averager[target(j)].add(data.data(i,j),data.weight(i,j));
242  for(size_t c=0;c<target.nof_classes();c++) {
243  if(class_averager[c].sum_w()==0) {
244  centroids_nan_=true;
245  }
246  centroids_(i,c) = class_averager[c].mean();
247  }
248  }
249  }
250 
251 
252  template <typename Distance>
254  utility::Matrix& prediction) const
255  {
256  utility::yat_assert<utility::runtime_error>
257  (centroids_.rows()==test.rows(),
258  "NCC::predict test data with incorrect number of rows");
259 
260  prediction.resize(centroids_.columns(), test.columns());
261 
262  // If weighted training data has resulted in NaN in centroids:
263  // weighted calculations
264  if(centroids_nan_) {
265  predict_weighted(MatrixLookupWeighted(test),prediction);
266  }
267  // If unweighted training data: unweighted calculations
268  else {
269  predict_unweighted(test,prediction);
270  }
271  }
272 
273  template <typename Distance>
275  utility::Matrix& prediction) const
276  {
277  utility::yat_assert<utility::runtime_error>
278  (centroids_.rows()==test.rows(),
279  "NCC::predict test data with incorrect number of rows");
280 
281  prediction.resize(centroids_.columns(), test.columns());
282  predict_weighted(test,prediction);
283  }
284 
285 
286  template <typename Distance>
288  utility::Matrix& prediction) const
289  {
290  for(size_t j=0; j<test.columns();j++)
291  for(size_t k=0; k<centroids_.columns();k++)
292  prediction(k,j) = distance_(test.begin_column(j), test.end_column(j),
293  centroids_.begin_column(k));
294  }
295 
296  template <typename Distance>
297  void NCC<Distance>::predict_weighted(const MatrixLookupWeighted& test,
298  utility::Matrix& prediction) const
299  {
300  utility::MatrixWeighted weighted_centroids(centroids_);
301  for(size_t j=0; j<test.columns();j++)
302  for(size_t k=0; k<centroids_.columns();k++)
303  prediction(k,j) = distance_(test.begin_column(j), test.end_column(j),
304  weighted_centroids.begin_column(k));
305  }
306 
307 
308 }}} // of namespace classifier, yat, and theplu
309 
310 #endif
General view into utility::Matrix.
Definition: MatrixLookup.h:70
void predict(const MatrixLookup &data, utility::Matrix &results) const
Make predictions for unweighted test data.
Definition: NCC.h:253
Class for containing sample labels.
Definition: Target.h:47
The Department of Theoretical Physics namespace as we define it.
virtual ~NCC(void)
Definition: NCC.h:195
void resize(size_t r, size_t c, double init_value=0)
Resize Matrix.
size_t nof_classes(void) const
NCC< Distance > * make_classifier(void) const
Create an untrained copy of the classifier.
Definition: NCC.h:209
Nearest Centroid Classifier.
Definition: NCC.h:64
double weight(size_t row, size_t column) const
Interface class for supervised classifiers that use data in a matrix format.
Definition: SupervisedClassifier.h:57
const_column_iterator begin_column(size_t) const
General view into utility::MatrixWeighted.
Definition: MatrixLookupWeighted.h:63
const_column_iterator end_column(size_t) const
const utility::Matrix & centroids(void) const
Get the centroids for all classes.
Definition: NCC.h:201
void train(const MatrixLookup &data, const Target &targets)
Train the NCC using unweighted training data with known targets.
Definition: NCC.h:217
Concept check for a Distance.
Definition: concept_check.h:290
Interface to GSL matrix.
Definition: Matrix.h:104
Weighted Matrix.
Definition: MatrixWeighted.h:47
NCC(void)
Constructor.
Definition: NCC.h:180
double data(size_t row, size_t column) const

Generated on Wed Jan 25 2023 03:34:29 for yat by  doxygen 1.8.14