4200 |
19 Aug 22 |
peter |
1 |
#ifndef _theplu_yat_classifier_svm_ |
4200 |
19 Aug 22 |
peter |
2 |
#define _theplu_yat_classifier_svm_ |
30 |
16 Jan 04 |
peter |
3 |
|
616 |
31 Aug 06 |
jari |
// $Id$ |
616 |
31 Aug 06 |
jari |
5 |
|
675 |
10 Oct 06 |
jari |
6 |
/* |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2009, 2010 Peter Johansson |
30 |
16 Jan 04 |
peter |
12 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
675 |
10 Oct 06 |
jari |
14 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
19 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
24 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
27 |
*/ |
675 |
10 Oct 06 |
jari |
28 |
|
680 |
11 Oct 06 |
jari |
29 |
#include "SVindex.h" |
1087 |
14 Feb 08 |
peter |
30 |
#include "Target.h" |
1120 |
21 Feb 08 |
peter |
31 |
#include "yat/utility/Vector.h" |
675 |
10 Oct 06 |
jari |
32 |
|
45 |
27 Feb 04 |
peter |
33 |
#include <utility> |
61 |
17 Apr 04 |
peter |
34 |
#include <vector> |
30 |
16 Jan 04 |
peter |
35 |
|
42 |
26 Feb 04 |
jari |
36 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
37 |
namespace yat { |
1121 |
22 Feb 08 |
peter |
38 |
namespace utility{ |
1121 |
22 Feb 08 |
peter |
39 |
class Matrix; |
1121 |
22 Feb 08 |
peter |
40 |
} |
1121 |
22 Feb 08 |
peter |
41 |
|
4200 |
19 Aug 22 |
peter |
42 |
namespace classifier { |
323 |
26 May 05 |
peter |
43 |
|
1102 |
18 Feb 08 |
peter |
44 |
class DataLookup1D; |
1102 |
18 Feb 08 |
peter |
45 |
class DataLookupWeighted1D; |
1102 |
18 Feb 08 |
peter |
46 |
class KernelLookup; |
747 |
11 Feb 07 |
peter |
47 |
|
1200 |
05 Mar 08 |
peter |
48 |
/** |
4200 |
19 Aug 22 |
peter |
\brief Support Vector Machine |
1200 |
05 Mar 08 |
peter |
50 |
*/ |
1087 |
14 Feb 08 |
peter |
51 |
class SVM |
30 |
16 Jan 04 |
peter |
52 |
{ |
4200 |
19 Aug 22 |
peter |
53 |
|
30 |
16 Jan 04 |
peter |
54 |
public: |
445 |
15 Dec 05 |
peter |
55 |
/// |
4200 |
19 Aug 22 |
peter |
/// \brief Constructor |
692 |
17 Oct 06 |
peter |
57 |
/// |
1100 |
18 Feb 08 |
peter |
58 |
SVM(void); |
323 |
26 May 05 |
peter |
59 |
|
1108 |
19 Feb 08 |
peter |
60 |
/** |
1200 |
05 Mar 08 |
peter |
\brief Copy constructor. |
4200 |
19 Aug 22 |
peter |
62 |
*/ |
1108 |
19 Feb 08 |
peter |
63 |
SVM(const SVM&); |
4200 |
19 Aug 22 |
peter |
64 |
|
493 |
09 Jan 06 |
peter |
65 |
/// |
1200 |
05 Mar 08 |
peter |
/// \brief Destructor |
547 |
06 Mar 06 |
peter |
67 |
/// |
547 |
06 Mar 06 |
peter |
68 |
virtual ~SVM(); |
547 |
06 Mar 06 |
peter |
69 |
|
1200 |
05 Mar 08 |
peter |
70 |
/** |
1200 |
05 Mar 08 |
peter |
\brief Create an untrained copy of SVM. |
1200 |
05 Mar 08 |
peter |
72 |
|
1200 |
05 Mar 08 |
peter |
\returns A dynamically allocated SVM, which has to be deleted |
1200 |
05 Mar 08 |
peter |
by the caller to avoid memory leaks. |
1200 |
05 Mar 08 |
peter |
75 |
*/ |
1100 |
18 Feb 08 |
peter |
76 |
SVM* make_classifier(void) const; |
493 |
09 Jan 06 |
peter |
77 |
|
692 |
17 Oct 06 |
peter |
78 |
/// |
1200 |
05 Mar 08 |
peter |
/// @return alpha parameters |
692 |
17 Oct 06 |
peter |
80 |
/// |
1120 |
21 Feb 08 |
peter |
81 |
const utility::Vector& alpha(void) const; |
520 |
22 Feb 06 |
peter |
82 |
|
692 |
17 Oct 06 |
peter |
83 |
/// |
323 |
26 May 05 |
peter |
/// The C-parameter is the balance term (see train()). A very |
323 |
26 May 05 |
peter |
/// large C means the training will be focused on getting samples |
323 |
26 May 05 |
peter |
/// correctly classified, with risk for overfitting and poor |
1200 |
05 Mar 08 |
peter |
/// generalisation. A too small C will result in a training, in |
1200 |
05 Mar 08 |
peter |
/// which misclassifications are not penalized. C is weighted with |
1200 |
05 Mar 08 |
peter |
/// respect to the size such that \f$ n_+C_+ = n_-C_- \f$, meaning |
1200 |
05 Mar 08 |
peter |
/// a misclassificaion of the smaller group is penalized |
323 |
26 May 05 |
peter |
/// harder. This balance is equivalent to the one occuring for |
1200 |
05 Mar 08 |
peter |
/// %regression with regularisation, or ANN-training with a |
323 |
26 May 05 |
peter |
/// weight-decay term. Default is C set to infinity. |
323 |
26 May 05 |
peter |
94 |
/// |
323 |
26 May 05 |
peter |
/// @returns mean of vector \f$ C_i \f$ |
692 |
17 Oct 06 |
peter |
96 |
/// |
720 |
26 Dec 06 |
jari |
97 |
double C(void) const; |
61 |
17 Apr 04 |
peter |
98 |
|
692 |
17 Oct 06 |
peter |
99 |
/// |
963 |
10 Oct 07 |
peter |
/// Default is max_epochs set to 100,000. |
323 |
26 May 05 |
peter |
101 |
/// |
162 |
21 Sep 04 |
peter |
/// @return number of maximal epochs |
162 |
21 Sep 04 |
peter |
103 |
/// |
1863 |
13 Mar 09 |
peter |
104 |
unsigned long int max_epochs(void) const; |
4200 |
19 Aug 22 |
peter |
105 |
|
966 |
11 Oct 07 |
peter |
106 |
/** |
964 |
10 Oct 07 |
peter |
\brief set maximal number of epochs in training |
964 |
10 Oct 07 |
peter |
108 |
*/ |
1863 |
13 Mar 09 |
peter |
109 |
void max_epochs(unsigned long int); |
4200 |
19 Aug 22 |
peter |
110 |
|
4200 |
19 Aug 22 |
peter |
111 |
/** |
692 |
17 Oct 06 |
peter |
The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij} |
692 |
17 Oct 06 |
peter |
+ bias \f$, where \f$ t \f$ is the target. |
4200 |
19 Aug 22 |
peter |
114 |
|
1200 |
05 Mar 08 |
peter |
@return output of training samples |
4200 |
19 Aug 22 |
peter |
116 |
*/ |
1120 |
21 Feb 08 |
peter |
117 |
const theplu::yat::utility::Vector& output(void) const; |
167 |
23 Sep 04 |
peter |
118 |
|
692 |
17 Oct 06 |
peter |
119 |
/** |
692 |
17 Oct 06 |
peter |
Generate prediction @a predict from @a input. The prediction |
692 |
17 Oct 06 |
peter |
is calculated as the output times the margin, i.e., geometric |
692 |
17 Oct 06 |
peter |
distance from decision hyperplane: \f$ \frac{ \sum \alpha_j |
1200 |
05 Mar 08 |
peter |
t_j K_{ij} + bias}{|w|} \f$ The output has 2 rows. The first row |
692 |
17 Oct 06 |
peter |
is for binary target true, and the second is for binary target |
692 |
17 Oct 06 |
peter |
false. The second row is superfluous as it is the first row |
692 |
17 Oct 06 |
peter |
negated. It exist just to be aligned with multi-class |
692 |
17 Oct 06 |
peter |
SupervisedClassifiers. Each column in @a input and @a output |
692 |
17 Oct 06 |
peter |
corresponds to a sample to predict. Each row in @a input |
692 |
17 Oct 06 |
peter |
corresponds to a training sample, and more exactly row i in @a |
692 |
17 Oct 06 |
peter |
input should correspond to row i in KernelLookup that was used |
692 |
17 Oct 06 |
peter |
for training. |
692 |
17 Oct 06 |
peter |
132 |
*/ |
1121 |
22 Feb 08 |
peter |
133 |
void predict(const KernelLookup& input, utility::Matrix& predict) const; |
505 |
02 Feb 06 |
markus |
134 |
|
1200 |
05 Mar 08 |
peter |
135 |
/* |
523 |
23 Feb 06 |
peter |
136 |
/// |
569 |
23 Mar 06 |
peter |
/// @return output times margin (i.e. geometric distance from |
569 |
23 Mar 06 |
peter |
/// decision hyperplane) from data @a input |
523 |
23 Feb 06 |
peter |
139 |
/// |
539 |
05 Mar 06 |
peter |
double predict(const DataLookup1D& input) const; |
505 |
02 Feb 06 |
markus |
141 |
|
505 |
02 Feb 06 |
markus |
142 |
/// |
569 |
23 Mar 06 |
peter |
/// @return output times margin from data @a input with |
569 |
23 Mar 06 |
peter |
/// corresponding @a weight |
542 |
05 Mar 06 |
peter |
145 |
/// |
628 |
05 Sep 06 |
peter |
double predict(const DataLookupWeighted1D& input) const; |
1200 |
05 Mar 08 |
peter |
147 |
*/ |
542 |
05 Mar 06 |
peter |
148 |
|
1859 |
08 Mar 09 |
peter |
149 |
/** |
1859 |
08 Mar 09 |
peter |
\brief make SVM untrained |
1859 |
08 Mar 09 |
peter |
151 |
|
1859 |
08 Mar 09 |
peter |
Setting variable trained to false; other variables are undefined. |
1859 |
08 Mar 09 |
peter |
153 |
|
1859 |
08 Mar 09 |
peter |
\since New in yat 0.6 |
1859 |
08 Mar 09 |
peter |
155 |
*/ |
1859 |
08 Mar 09 |
peter |
156 |
void reset(void); |
1859 |
08 Mar 09 |
peter |
157 |
|
542 |
05 Mar 06 |
peter |
158 |
/// |
571 |
06 Apr 06 |
peter |
/// @brief sets the C-Parameter |
571 |
06 Apr 06 |
peter |
160 |
/// |
571 |
06 Apr 06 |
peter |
161 |
void set_C(const double); |
571 |
06 Apr 06 |
peter |
162 |
|
648 |
14 Sep 06 |
peter |
163 |
/** |
648 |
14 Sep 06 |
peter |
Training the SVM following Platt's SMO, with Keerti's |
648 |
14 Sep 06 |
peter |
modifacation. Minimizing \f$ \frac{1}{2}\sum |
692 |
17 Oct 06 |
peter |
y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum |
1205 |
05 Mar 08 |
peter |
\alpha_i\f$, which corresponds to minimizing \f$ \sum |
692 |
17 Oct 06 |
peter |
w_i^2+\sum C_i\xi_i^2 \f$. |
692 |
17 Oct 06 |
peter |
169 |
|
692 |
17 Oct 06 |
peter |
@note If the training problem is not linearly separable and C |
692 |
17 Oct 06 |
peter |
is set to infinity, the minima will be located in the infinity, |
963 |
10 Oct 07 |
peter |
and thus the minimum will not be reached within the maximal |
692 |
17 Oct 06 |
peter |
number of epochs. More exactly, when the problem is not |
692 |
17 Oct 06 |
peter |
linearly separable, there exists an eigenvector to \f$ |
692 |
17 Oct 06 |
peter |
H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the |
692 |
17 Oct 06 |
peter |
conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0 |
692 |
17 Oct 06 |
peter |
\f$. As the eigenvalue is zero in this direction the quadratic |
692 |
17 Oct 06 |
peter |
term does not contribute to the objective, but the objective |
692 |
17 Oct 06 |
peter |
only consists of the linear term and hence there is no |
692 |
17 Oct 06 |
peter |
minumum. This problem only occurs when \f$ C \f$ is set to |
692 |
17 Oct 06 |
peter |
infinity because for a finite \f$ C \f$ all eigenvalues are |
692 |
17 Oct 06 |
peter |
finite. However, for a large \f$ C \f$ (and training problem is |
692 |
17 Oct 06 |
peter |
non-linearly separable) there exists an eigenvector |
692 |
17 Oct 06 |
peter |
corresponding to a small eigenvalue, which means the minima has |
692 |
17 Oct 06 |
peter |
moved from infinity to "very far away". In practice this will |
692 |
17 Oct 06 |
peter |
also result in that the minima is not reached withing the |
692 |
17 Oct 06 |
peter |
maximal number of epochs and the of \f$ C \f$ should be |
692 |
17 Oct 06 |
peter |
decreased. |
963 |
10 Oct 07 |
peter |
189 |
|
1200 |
05 Mar 08 |
peter |
Class for SVM using Keerthi's second modification of Platt's |
1200 |
05 Mar 08 |
peter |
Sequential Minimal Optimization. The SVM uses all data given for |
4200 |
19 Aug 22 |
peter |
training. |
4200 |
19 Aug 22 |
peter |
193 |
|
2210 |
05 Mar 10 |
peter |
\throw utility::runtime_error if maximal number of epoch is reach. |
648 |
14 Sep 06 |
peter |
195 |
*/ |
1100 |
18 Feb 08 |
peter |
196 |
void train(const KernelLookup& kernel, const Target& target); |
61 |
17 Apr 04 |
peter |
197 |
|
1859 |
08 Mar 09 |
peter |
198 |
/** |
1859 |
08 Mar 09 |
peter |
\return true if SVM is trained |
1859 |
08 Mar 09 |
peter |
200 |
|
1859 |
08 Mar 09 |
peter |
\since New in yat 0.6 |
1859 |
08 Mar 09 |
peter |
202 |
*/ |
1859 |
08 Mar 09 |
peter |
203 |
bool trained(void) const; |
4200 |
19 Aug 22 |
peter |
204 |
|
30 |
16 Jan 04 |
peter |
205 |
private: |
692 |
17 Oct 06 |
peter |
206 |
/// |
323 |
26 May 05 |
peter |
/// Calculates bounds for alpha2 |
323 |
26 May 05 |
peter |
208 |
/// |
323 |
26 May 05 |
peter |
209 |
void bounds(double&, double&) const; |
323 |
26 May 05 |
peter |
210 |
|
323 |
26 May 05 |
peter |
211 |
/// |
514 |
20 Feb 06 |
peter |
/// @brief calculates the bias term |
323 |
26 May 05 |
peter |
213 |
/// |
323 |
26 May 05 |
peter |
/// @return true if successful |
323 |
26 May 05 |
peter |
215 |
/// |
1049 |
07 Feb 08 |
peter |
216 |
void calculate_bias(void); |
323 |
26 May 05 |
peter |
217 |
|
323 |
26 May 05 |
peter |
218 |
/// |
569 |
23 Mar 06 |
peter |
/// Calculate margin that is inverse of w |
569 |
23 Mar 06 |
peter |
220 |
/// |
569 |
23 Mar 06 |
peter |
221 |
void calculate_margin(void); |
569 |
23 Mar 06 |
peter |
222 |
|
569 |
23 Mar 06 |
peter |
223 |
/// |
692 |
17 Oct 06 |
peter |
/// Private function choosing which two elements that should be |
692 |
17 Oct 06 |
peter |
/// updated. First checking for the biggest violation (output - target = |
692 |
17 Oct 06 |
peter |
/// 0) among support vectors (alpha!=0). If no violation was found check |
692 |
17 Oct 06 |
peter |
/// sequentially among the other samples. If no violation there as |
692 |
17 Oct 06 |
peter |
/// well training is completed |
323 |
26 May 05 |
peter |
229 |
/// |
323 |
26 May 05 |
peter |
/// @return true if a pair of samples that violate the conditions |
323 |
26 May 05 |
peter |
/// can be found |
692 |
17 Oct 06 |
peter |
232 |
/// |
1120 |
21 Feb 08 |
peter |
233 |
bool choose(const theplu::yat::utility::Vector&); |
114 |
15 Jul 04 |
peter |
234 |
|
323 |
26 May 05 |
peter |
235 |
/// |
330 |
01 Jun 05 |
peter |
/// @return kernel modified with diagonal term (soft margin) |
323 |
26 May 05 |
peter |
237 |
/// |
720 |
26 Dec 06 |
jari |
238 |
double kernel_mod(const size_t i, const size_t j) const; |
720 |
26 Dec 06 |
jari |
239 |
|
720 |
26 Dec 06 |
jari |
240 |
/// |
523 |
23 Feb 06 |
peter |
/// @return 1 if i belong to binary target true else -1 |
720 |
26 Dec 06 |
jari |
242 |
/// |
720 |
26 Dec 06 |
jari |
243 |
int target(size_t i) const; |
509 |
18 Feb 06 |
peter |
244 |
|
1120 |
21 Feb 08 |
peter |
245 |
utility::Vector alpha_; |
445 |
15 Dec 05 |
peter |
246 |
double bias_; |
445 |
15 Dec 05 |
peter |
247 |
double C_inverse_; |
1175 |
27 Feb 08 |
peter |
// not owned by SVM |
4200 |
19 Aug 22 |
peter |
249 |
const KernelLookup* kernel_; |
569 |
23 Mar 06 |
peter |
250 |
double margin_; |
445 |
15 Dec 05 |
peter |
251 |
unsigned long int max_epochs_; |
1120 |
21 Feb 08 |
peter |
252 |
utility::Vector output_; |
660 |
27 Sep 06 |
peter |
253 |
SVindex sample_; |
1087 |
14 Feb 08 |
peter |
254 |
Target target_; |
445 |
15 Dec 05 |
peter |
255 |
double tolerance_; |
1100 |
18 Feb 08 |
peter |
256 |
bool trained_; |
445 |
15 Dec 05 |
peter |
257 |
|
36 |
12 Feb 04 |
peter |
258 |
}; |
37 |
13 Feb 04 |
peter |
259 |
|
680 |
11 Oct 06 |
jari |
260 |
}}} // of namespace classifier, yat, and theplu |
42 |
26 Feb 04 |
jari |
261 |
|
30 |
16 Jan 04 |
peter |
262 |
#endif |