3316 |
19 Sep 14 |
peter |
1 |
#ifndef _theplu_yat_classifier_igp_ |
3316 |
19 Sep 14 |
peter |
2 |
#define _theplu_yat_classifier_igp_ |
581 |
04 May 06 |
markus |
3 |
|
616 |
31 Aug 06 |
jari |
// $Id$ |
581 |
04 May 06 |
markus |
5 |
|
675 |
10 Oct 06 |
jari |
6 |
/* |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2006 Jari Häkkinen, Markus Ringnér |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson, Markus Ringnér |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér |
3562 |
04 Jan 17 |
peter |
Copyright (C) 2009, 2010, 2014, 2017 Peter Johansson |
616 |
31 Aug 06 |
jari |
11 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
675 |
10 Oct 06 |
jari |
13 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
18 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
23 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
26 |
*/ |
675 |
10 Oct 06 |
jari |
27 |
|
925 |
02 Oct 07 |
markus |
28 |
#include "MatrixLookup.h" |
925 |
02 Oct 07 |
markus |
29 |
#include "Target.h" |
2334 |
15 Oct 10 |
peter |
30 |
#include "yat/utility/concept_check.h" |
3320 |
19 Sep 14 |
peter |
31 |
#include "yat/utility/Matrix.h" |
1120 |
21 Feb 08 |
peter |
32 |
#include "yat/utility/Vector.h" |
925 |
02 Oct 07 |
markus |
33 |
#include "yat/utility/yat_assert.h" |
675 |
10 Oct 06 |
jari |
34 |
|
2334 |
15 Oct 10 |
peter |
35 |
#include <boost/concept_check.hpp> |
2334 |
15 Oct 10 |
peter |
36 |
|
925 |
02 Oct 07 |
markus |
37 |
#include <cmath> |
925 |
02 Oct 07 |
markus |
38 |
#include <limits> |
936 |
05 Oct 07 |
peter |
39 |
#include <stdexcept> |
925 |
02 Oct 07 |
markus |
40 |
|
581 |
04 May 06 |
markus |
41 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
42 |
namespace yat { |
3316 |
19 Sep 14 |
peter |
43 |
namespace classifier { |
581 |
04 May 06 |
markus |
44 |
|
581 |
04 May 06 |
markus |
45 |
class Target; |
581 |
04 May 06 |
markus |
46 |
class MatrixLookup; |
581 |
04 May 06 |
markus |
47 |
|
3318 |
19 Sep 14 |
peter |
48 |
/** |
3318 |
19 Sep 14 |
peter |
\brief Class for In Group Proportions (IGP) |
3318 |
19 Sep 14 |
peter |
50 |
|
3318 |
19 Sep 14 |
peter |
IGP is defined to be the proportion of samples in a group whose |
3318 |
19 Sep 14 |
peter |
nearest neighbours are also in the same group. |
3318 |
19 Sep 14 |
peter |
53 |
|
3318 |
19 Sep 14 |
peter |
\see <a HREF=" |
3318 |
19 Sep 14 |
peter |
http://biostatistics.oxfordjournals.org/cgi/content/abstract/kxj029v1"> |
3318 |
19 Sep 14 |
peter |
Kapp and Tibshirani, Biostatistics (2006)</a>. |
3320 |
19 Sep 14 |
peter |
57 |
|
3551 |
03 Jan 17 |
peter |
Distance should model concept \ref concept_distance (but support |
3551 |
03 Jan 17 |
peter |
for weighted iterators is not required). |
3551 |
03 Jan 17 |
peter |
60 |
|
3320 |
19 Sep 14 |
peter |
\note Distance must be symmetric, i.e., Distance(x,y) == Distance(y,x). |
3318 |
19 Sep 14 |
peter |
62 |
*/ |
925 |
02 Oct 07 |
markus |
63 |
template <typename Distance> |
581 |
04 May 06 |
markus |
64 |
class IGP |
581 |
04 May 06 |
markus |
65 |
{ |
3316 |
19 Sep 14 |
peter |
66 |
|
581 |
04 May 06 |
markus |
67 |
public: |
1158 |
26 Feb 08 |
markus |
68 |
/// |
1158 |
26 Feb 08 |
markus |
/// Constructor taking the training data and the target vector and |
3316 |
19 Sep 14 |
peter |
/// as input. |
581 |
04 May 06 |
markus |
71 |
/// |
1158 |
26 Feb 08 |
markus |
72 |
IGP(const MatrixLookup&, const Target&); |
1158 |
26 Feb 08 |
markus |
73 |
|
1158 |
26 Feb 08 |
markus |
74 |
|
1158 |
26 Feb 08 |
markus |
75 |
/// |
581 |
04 May 06 |
markus |
/// Constructor taking the training data, the target vector and |
3316 |
19 Sep 14 |
peter |
/// the distance measure as input. |
581 |
04 May 06 |
markus |
78 |
/// |
1158 |
26 Feb 08 |
markus |
79 |
IGP(const MatrixLookup&, const Target&, const Distance&); |
581 |
04 May 06 |
markus |
80 |
|
581 |
04 May 06 |
markus |
81 |
/// |
581 |
04 May 06 |
markus |
/// Destrucutor |
581 |
04 May 06 |
markus |
83 |
/// |
581 |
04 May 06 |
markus |
84 |
virtual ~IGP(); |
581 |
04 May 06 |
markus |
85 |
|
581 |
04 May 06 |
markus |
86 |
/// |
581 |
04 May 06 |
markus |
/// @return the IGP score for each class as elements in a vector. |
581 |
04 May 06 |
markus |
88 |
/// |
1120 |
21 Feb 08 |
peter |
89 |
const utility::Vector& score(void) const; |
581 |
04 May 06 |
markus |
90 |
|
581 |
04 May 06 |
markus |
91 |
|
581 |
04 May 06 |
markus |
92 |
private: |
1158 |
26 Feb 08 |
markus |
93 |
void calculate(); |
1158 |
26 Feb 08 |
markus |
94 |
|
1120 |
21 Feb 08 |
peter |
95 |
utility::Vector igp_; |
1050 |
07 Feb 08 |
peter |
96 |
Distance distance_; |
581 |
04 May 06 |
markus |
97 |
|
581 |
04 May 06 |
markus |
98 |
const MatrixLookup& matrix_; |
581 |
04 May 06 |
markus |
99 |
const Target& target_; |
3316 |
19 Sep 14 |
peter |
100 |
}; |
925 |
02 Oct 07 |
markus |
101 |
|
3316 |
19 Sep 14 |
peter |
102 |
|
925 |
02 Oct 07 |
markus |
// templates |
925 |
02 Oct 07 |
markus |
104 |
|
925 |
02 Oct 07 |
markus |
105 |
template <typename Distance> |
3316 |
19 Sep 14 |
peter |
106 |
IGP<Distance>::IGP(const MatrixLookup& data, const Target& target) |
925 |
02 Oct 07 |
markus |
107 |
: matrix_(data), target_(target) |
3316 |
19 Sep 14 |
peter |
108 |
{ |
2334 |
15 Oct 10 |
peter |
109 |
BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>)); |
1158 |
26 Feb 08 |
markus |
110 |
calculate(); |
1158 |
26 Feb 08 |
markus |
111 |
} |
1158 |
26 Feb 08 |
markus |
112 |
|
1158 |
26 Feb 08 |
markus |
113 |
template <typename Distance> |
3319 |
19 Sep 14 |
peter |
114 |
IGP<Distance>::IGP(const MatrixLookup& data, const Target& target, |
3319 |
19 Sep 14 |
peter |
115 |
const Distance& dist) |
1158 |
26 Feb 08 |
markus |
116 |
: matrix_(data), target_(target), distance_(dist) |
3316 |
19 Sep 14 |
peter |
117 |
{ |
2334 |
15 Oct 10 |
peter |
118 |
BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>)); |
1158 |
26 Feb 08 |
markus |
119 |
calculate(); |
1158 |
26 Feb 08 |
markus |
120 |
} |
1158 |
26 Feb 08 |
markus |
121 |
|
3316 |
19 Sep 14 |
peter |
122 |
|
1158 |
26 Feb 08 |
markus |
123 |
template <typename Distance> |
3316 |
19 Sep 14 |
peter |
124 |
IGP<Distance>::~IGP() |
1158 |
26 Feb 08 |
markus |
125 |
{ |
1158 |
26 Feb 08 |
markus |
126 |
} |
1158 |
26 Feb 08 |
markus |
127 |
|
3319 |
19 Sep 14 |
peter |
128 |
|
1158 |
26 Feb 08 |
markus |
129 |
template <typename Distance> |
3316 |
19 Sep 14 |
peter |
130 |
void IGP<Distance>::calculate() |
1158 |
26 Feb 08 |
markus |
131 |
{ |
1876 |
20 Mar 09 |
peter |
132 |
YAT_ASSERT(target_.size()==matrix_.columns()); |
3316 |
19 Sep 14 |
peter |
133 |
|
925 |
02 Oct 07 |
markus |
// Calculate IGP for each class |
3320 |
19 Sep 14 |
peter |
135 |
igp_.resize(target_.nof_classes(), 0.0); |
3316 |
19 Sep 14 |
peter |
136 |
|
3320 |
19 Sep 14 |
peter |
// calculate distances |
3320 |
19 Sep 14 |
peter |
138 |
utility::Matrix dist(matrix_.columns(), matrix_.columns()); |
3320 |
19 Sep 14 |
peter |
139 |
for (size_t i=0; i<dist.rows(); ++i) |
3320 |
19 Sep 14 |
peter |
140 |
for (size_t j=i+1; j<dist.rows(); ++j) { |
3320 |
19 Sep 14 |
peter |
141 |
dist(i,j) = dist(j,i) = distance_(matrix_.begin_column(i), |
3320 |
19 Sep 14 |
peter |
142 |
matrix_.end_column(i), |
3320 |
19 Sep 14 |
peter |
143 |
matrix_.begin_column(j)); |
3320 |
19 Sep 14 |
peter |
144 |
} |
3320 |
19 Sep 14 |
peter |
145 |
|
3320 |
19 Sep 14 |
peter |
// find nearest neigbour for each sample |
1271 |
09 Apr 08 |
peter |
147 |
for(size_t i=0; i<target_.size(); i++) { |
1271 |
09 Apr 08 |
peter |
148 |
size_t neighbor=i; |
925 |
02 Oct 07 |
markus |
149 |
double mindist=std::numeric_limits<double>::max(); |
3316 |
19 Sep 14 |
peter |
150 |
for(size_t j=0; j<target_.size(); j++) { |
1874 |
17 Mar 09 |
peter |
151 |
if (i==j) // avoid self-self comparison |
1874 |
17 Mar 09 |
peter |
152 |
continue; |
3320 |
19 Sep 14 |
peter |
153 |
if(dist(i,j)<mindist) { |
3320 |
19 Sep 14 |
peter |
154 |
mindist=dist(i,j); |
925 |
02 Oct 07 |
markus |
155 |
neighbor=j; |
925 |
02 Oct 07 |
markus |
156 |
} |
925 |
02 Oct 07 |
markus |
157 |
} |
925 |
02 Oct 07 |
markus |
158 |
if(target_(i)==target_(neighbor)) |
925 |
02 Oct 07 |
markus |
159 |
igp_(target_(i))++; |
3316 |
19 Sep 14 |
peter |
160 |
|
925 |
02 Oct 07 |
markus |
161 |
} |
1271 |
09 Apr 08 |
peter |
162 |
for(size_t i=0; i<target_.nof_classes(); i++) { |
925 |
02 Oct 07 |
markus |
163 |
igp_(i)/=static_cast<double>(target_.size(i)); |
925 |
02 Oct 07 |
markus |
164 |
} |
925 |
02 Oct 07 |
markus |
165 |
} |
925 |
02 Oct 07 |
markus |
166 |
|
3316 |
19 Sep 14 |
peter |
167 |
|
925 |
02 Oct 07 |
markus |
168 |
template <typename Distance> |
3316 |
19 Sep 14 |
peter |
169 |
const utility::Vector& IGP<Distance>::score(void) const |
925 |
02 Oct 07 |
markus |
170 |
{ |
925 |
02 Oct 07 |
markus |
171 |
return igp_; |
925 |
02 Oct 07 |
markus |
172 |
} |
3316 |
19 Sep 14 |
peter |
173 |
|
680 |
11 Oct 06 |
jari |
174 |
}}} // of namespace classifier, yat, and theplu |
581 |
04 May 06 |
markus |
175 |
|
581 |
04 May 06 |
markus |
176 |
#endif |