svndigest - svndigest

yat/classifier/SVM.h

: Code
: Comments
: Other

Rev	Date	Author	Line
4200	19 Aug 22	peter	1	#ifndef _theplu_yat_classifier_svm_
4200	19 Aug 22	peter	2	#define _theplu_yat_classifier_svm_
30	16 Jan 04	peter	3
616	31 Aug 06	jari	4	// $Id$
616	31 Aug 06	jari	5
675	10 Oct 06	jari	6	/*
2119	12 Dec 09	peter	7	Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
2119	12 Dec 09	peter	8	Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
4359	23 Aug 23	peter	9	Copyright (C) 2007 Peter Johansson
4359	23 Aug 23	peter	10	Copyright (C) 2008 Jari Häkkinen, Peter Johansson
4359	23 Aug 23	peter	11	Copyright (C) 2009, 2010 Peter Johansson
30	16 Jan 04	peter	12
1437	25 Aug 08	peter	13	This file is part of the yat library, http://dev.thep.lu.se/yat
675	10 Oct 06	jari	14
675	10 Oct 06	jari	15	The yat library is free software; you can redistribute it and/or
675	10 Oct 06	jari	16	modify it under the terms of the GNU General Public License as
1486	09 Sep 08	jari	17	published by the Free Software Foundation; either version 3 of the
675	10 Oct 06	jari	18	License, or (at your option) any later version.
675	10 Oct 06	jari	19
675	10 Oct 06	jari	20	The yat library is distributed in the hope that it will be useful,
675	10 Oct 06	jari	21	but WITHOUT ANY WARRANTY; without even the implied warranty of
675	10 Oct 06	jari	22	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
675	10 Oct 06	jari	23	General Public License for more details.
675	10 Oct 06	jari	24
675	10 Oct 06	jari	25	You should have received a copy of the GNU General Public License
1487	10 Sep 08	jari	26	along with yat. If not, see <http://www.gnu.org/licenses/>.
675	10 Oct 06	jari	27	*/
675	10 Oct 06	jari	28
680	11 Oct 06	jari	29	#include "SVindex.h"
1087	14 Feb 08	peter	30	#include "Target.h"
1120	21 Feb 08	peter	31	#include "yat/utility/Vector.h"
675	10 Oct 06	jari	32
45	27 Feb 04	peter	33	#include <utility>
61	17 Apr 04	peter	34	#include <vector>
30	16 Jan 04	peter	35
42	26 Feb 04	jari	36	namespace theplu {
680	11 Oct 06	jari	37	namespace yat {
1121	22 Feb 08	peter	38	namespace utility{
1121	22 Feb 08	peter	39	class Matrix;
1121	22 Feb 08	peter	40	}
1121	22 Feb 08	peter	41
4200	19 Aug 22	peter	42	namespace classifier {
323	26 May 05	peter	43
1102	18 Feb 08	peter	44	class DataLookup1D;
1102	18 Feb 08	peter	45	class DataLookupWeighted1D;
1102	18 Feb 08	peter	46	class KernelLookup;
747	11 Feb 07	peter	47
1200	05 Mar 08	peter	48	/**
4200	19 Aug 22	peter	49	\brief Support Vector Machine
1200	05 Mar 08	peter	50	*/
1087	14 Feb 08	peter	51	class SVM
30	16 Jan 04	peter	52	{
4200	19 Aug 22	peter	53
30	16 Jan 04	peter	54	public:
445	15 Dec 05	peter	55	///
4200	19 Aug 22	peter	56	/// \brief Constructor
692	17 Oct 06	peter	57	///
1100	18 Feb 08	peter	58	SVM(void);
323	26 May 05	peter	59
1108	19 Feb 08	peter	60	/**
1200	05 Mar 08	peter	61	\brief Copy constructor.
4200	19 Aug 22	peter	62	*/
1108	19 Feb 08	peter	63	SVM(const SVM&);
4200	19 Aug 22	peter	64
493	09 Jan 06	peter	65	///
1200	05 Mar 08	peter	66	/// \brief Destructor
547	06 Mar 06	peter	67	///
547	06 Mar 06	peter	68	virtual ~SVM();
547	06 Mar 06	peter	69
1200	05 Mar 08	peter	70	/**
1200	05 Mar 08	peter	71	\brief Create an untrained copy of SVM.
1200	05 Mar 08	peter	72
1200	05 Mar 08	peter	73	\returns A dynamically allocated SVM, which has to be deleted
1200	05 Mar 08	peter	74	by the caller to avoid memory leaks.
1200	05 Mar 08	peter	75	*/
1100	18 Feb 08	peter	76	SVM* make_classifier(void) const;
493	09 Jan 06	peter	77
692	17 Oct 06	peter	78	///
1200	05 Mar 08	peter	79	/// @return alpha parameters
692	17 Oct 06	peter	80	///
1120	21 Feb 08	peter	81	const utility::Vector& alpha(void) const;
520	22 Feb 06	peter	82
692	17 Oct 06	peter	83	///
323	26 May 05	peter	84	/// The C-parameter is the balance term (see train()). A very
323	26 May 05	peter	85	/// large C means the training will be focused on getting samples
323	26 May 05	peter	86	/// correctly classified, with risk for overfitting and poor
1200	05 Mar 08	peter	87	/// generalisation. A too small C will result in a training, in
1200	05 Mar 08	peter	88	/// which misclassifications are not penalized. C is weighted with
1200	05 Mar 08	peter	89	/// respect to the size such that \f$ n_+C_+ = n_-C_- \f$, meaning
1200	05 Mar 08	peter	90	/// a misclassificaion of the smaller group is penalized
323	26 May 05	peter	91	/// harder. This balance is equivalent to the one occuring for
1200	05 Mar 08	peter	92	/// %regression with regularisation, or ANN-training with a
323	26 May 05	peter	93	/// weight-decay term. Default is C set to infinity.
323	26 May 05	peter	94	///
323	26 May 05	peter	95	/// @returns mean of vector \f$ C_i \f$
692	17 Oct 06	peter	96	///
720	26 Dec 06	jari	97	double C(void) const;
61	17 Apr 04	peter	98
692	17 Oct 06	peter	99	///
963	10 Oct 07	peter	100	/// Default is max_epochs set to 100,000.
323	26 May 05	peter	101	///
162	21 Sep 04	peter	102	/// @return number of maximal epochs
162	21 Sep 04	peter	103	///
1863	13 Mar 09	peter	104	unsigned long int max_epochs(void) const;
4200	19 Aug 22	peter	105
966	11 Oct 07	peter	106	/**
964	10 Oct 07	peter	107	\brief set maximal number of epochs in training
964	10 Oct 07	peter	108	*/
1863	13 Mar 09	peter	109	void max_epochs(unsigned long int);
4200	19 Aug 22	peter	110
4200	19 Aug 22	peter	111	/**
692	17 Oct 06	peter	112	The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
692	17 Oct 06	peter	113	+ bias \f$, where \f$ t \f$ is the target.
4200	19 Aug 22	peter	114
1200	05 Mar 08	peter	115	@return output of training samples
4200	19 Aug 22	peter	116	*/
1120	21 Feb 08	peter	117	const theplu::yat::utility::Vector& output(void) const;
167	23 Sep 04	peter	118
692	17 Oct 06	peter	119	/**
692	17 Oct 06	peter	120	Generate prediction @a predict from @a input. The prediction
692	17 Oct 06	peter	121	is calculated as the output times the margin, i.e., geometric
692	17 Oct 06	peter	122	distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
1200	05 Mar 08	peter	123	t_j K_{ij} + bias}{\|w\|} \f$ The output has 2 rows. The first row
692	17 Oct 06	peter	124	is for binary target true, and the second is for binary target
692	17 Oct 06	peter	125	false. The second row is superfluous as it is the first row
692	17 Oct 06	peter	126	negated. It exist just to be aligned with multi-class
692	17 Oct 06	peter	127	SupervisedClassifiers. Each column in @a input and @a output
692	17 Oct 06	peter	128	corresponds to a sample to predict. Each row in @a input
692	17 Oct 06	peter	129	corresponds to a training sample, and more exactly row i in @a
692	17 Oct 06	peter	130	input should correspond to row i in KernelLookup that was used
692	17 Oct 06	peter	131	for training.
692	17 Oct 06	peter	132	*/
1121	22 Feb 08	peter	133	void predict(const KernelLookup& input, utility::Matrix& predict) const;
505	02 Feb 06	markus	134
1200	05 Mar 08	peter	135	/*
523	23 Feb 06	peter	136	///
569	23 Mar 06	peter	137	/// @return output times margin (i.e. geometric distance from
569	23 Mar 06	peter	138	/// decision hyperplane) from data @a input
523	23 Feb 06	peter	139	///
539	05 Mar 06	peter	140	double predict(const DataLookup1D& input) const;
505	02 Feb 06	markus	141
505	02 Feb 06	markus	142	///
569	23 Mar 06	peter	143	/// @return output times margin from data @a input with
569	23 Mar 06	peter	144	/// corresponding @a weight
542	05 Mar 06	peter	145	///
628	05 Sep 06	peter	146	double predict(const DataLookupWeighted1D& input) const;
1200	05 Mar 08	peter	147	*/
542	05 Mar 06	peter	148
1859	08 Mar 09	peter	149	/**
1859	08 Mar 09	peter	150	\brief make SVM untrained
1859	08 Mar 09	peter	151
1859	08 Mar 09	peter	152	Setting variable trained to false; other variables are undefined.
1859	08 Mar 09	peter	153
1859	08 Mar 09	peter	154	\since New in yat 0.6
1859	08 Mar 09	peter	155	*/
1859	08 Mar 09	peter	156	void reset(void);
1859	08 Mar 09	peter	157
542	05 Mar 06	peter	158	///
571	06 Apr 06	peter	159	/// @brief sets the C-Parameter
571	06 Apr 06	peter	160	///
571	06 Apr 06	peter	161	void set_C(const double);
571	06 Apr 06	peter	162
648	14 Sep 06	peter	163	/**
648	14 Sep 06	peter	164	Training the SVM following Platt's SMO, with Keerti's
648	14 Sep 06	peter	165	modifacation. Minimizing \f$ \frac{1}{2}\sum
692	17 Oct 06	peter	166	y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
1205	05 Mar 08	peter	167	\alpha_i\f$, which corresponds to minimizing \f$ \sum
692	17 Oct 06	peter	168	w_i^2+\sum C_i\xi_i^2 \f$.
692	17 Oct 06	peter	169
692	17 Oct 06	peter	170	@note If the training problem is not linearly separable and C
692	17 Oct 06	peter	171	is set to infinity, the minima will be located in the infinity,
963	10 Oct 07	peter	172	and thus the minimum will not be reached within the maximal
692	17 Oct 06	peter	173	number of epochs. More exactly, when the problem is not
692	17 Oct 06	peter	174	linearly separable, there exists an eigenvector to \f$
692	17 Oct 06	peter	175	H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
692	17 Oct 06	peter	176	conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
692	17 Oct 06	peter	177	\f$. As the eigenvalue is zero in this direction the quadratic
692	17 Oct 06	peter	178	term does not contribute to the objective, but the objective
692	17 Oct 06	peter	179	only consists of the linear term and hence there is no
692	17 Oct 06	peter	180	minumum. This problem only occurs when \f$ C \f$ is set to
692	17 Oct 06	peter	181	infinity because for a finite \f$ C \f$ all eigenvalues are
692	17 Oct 06	peter	182	finite. However, for a large \f$ C \f$ (and training problem is
692	17 Oct 06	peter	183	non-linearly separable) there exists an eigenvector
692	17 Oct 06	peter	184	corresponding to a small eigenvalue, which means the minima has
692	17 Oct 06	peter	185	moved from infinity to "very far away". In practice this will
692	17 Oct 06	peter	186	also result in that the minima is not reached withing the
692	17 Oct 06	peter	187	maximal number of epochs and the of \f$ C \f$ should be
692	17 Oct 06	peter	188	decreased.
963	10 Oct 07	peter	189
1200	05 Mar 08	peter	190	Class for SVM using Keerthi's second modification of Platt's
1200	05 Mar 08	peter	191	Sequential Minimal Optimization. The SVM uses all data given for
4200	19 Aug 22	peter	192	training.
4200	19 Aug 22	peter	193
2210	05 Mar 10	peter	194	\throw utility::runtime_error if maximal number of epoch is reach.
648	14 Sep 06	peter	195	*/
1100	18 Feb 08	peter	196	void train(const KernelLookup& kernel, const Target& target);
61	17 Apr 04	peter	197
1859	08 Mar 09	peter	198	/**
1859	08 Mar 09	peter	199	\return true if SVM is trained
1859	08 Mar 09	peter	200
1859	08 Mar 09	peter	201	\since New in yat 0.6
1859	08 Mar 09	peter	202	*/
1859	08 Mar 09	peter	203	bool trained(void) const;
4200	19 Aug 22	peter	204
30	16 Jan 04	peter	205	private:
692	17 Oct 06	peter	206	///
323	26 May 05	peter	207	/// Calculates bounds for alpha2
323	26 May 05	peter	208	///
323	26 May 05	peter	209	void bounds(double&, double&) const;
323	26 May 05	peter	210
323	26 May 05	peter	211	///
514	20 Feb 06	peter	212	/// @brief calculates the bias term
323	26 May 05	peter	213	///
323	26 May 05	peter	214	/// @return true if successful
323	26 May 05	peter	215	///
1049	07 Feb 08	peter	216	void calculate_bias(void);
323	26 May 05	peter	217
323	26 May 05	peter	218	///
569	23 Mar 06	peter	219	/// Calculate margin that is inverse of w
569	23 Mar 06	peter	220	///
569	23 Mar 06	peter	221	void calculate_margin(void);
569	23 Mar 06	peter	222
569	23 Mar 06	peter	223	///
692	17 Oct 06	peter	224	/// Private function choosing which two elements that should be
692	17 Oct 06	peter	225	/// updated. First checking for the biggest violation (output - target =
692	17 Oct 06	peter	226	/// 0) among support vectors (alpha!=0). If no violation was found check
692	17 Oct 06	peter	227	/// sequentially among the other samples. If no violation there as
692	17 Oct 06	peter	228	/// well training is completed
323	26 May 05	peter	229	///
323	26 May 05	peter	230	/// @return true if a pair of samples that violate the conditions
323	26 May 05	peter	231	/// can be found
692	17 Oct 06	peter	232	///
1120	21 Feb 08	peter	233	bool choose(const theplu::yat::utility::Vector&);
114	15 Jul 04	peter	234
323	26 May 05	peter	235	///
330	01 Jun 05	peter	236	/// @return kernel modified with diagonal term (soft margin)
323	26 May 05	peter	237	///
720	26 Dec 06	jari	238	double kernel_mod(const size_t i, const size_t j) const;
720	26 Dec 06	jari	239
720	26 Dec 06	jari	240	///
523	23 Feb 06	peter	241	/// @return 1 if i belong to binary target true else -1
720	26 Dec 06	jari	242	///
720	26 Dec 06	jari	243	int target(size_t i) const;
509	18 Feb 06	peter	244
1120	21 Feb 08	peter	245	utility::Vector alpha_;
445	15 Dec 05	peter	246	double bias_;
445	15 Dec 05	peter	247	double C_inverse_;
1175	27 Feb 08	peter	248	// not owned by SVM
4200	19 Aug 22	peter	249	const KernelLookup* kernel_;
569	23 Mar 06	peter	250	double margin_;
445	15 Dec 05	peter	251	unsigned long int max_epochs_;
1120	21 Feb 08	peter	252	utility::Vector output_;
660	27 Sep 06	peter	253	SVindex sample_;
1087	14 Feb 08	peter	254	Target target_;
445	15 Dec 05	peter	255	double tolerance_;
1100	18 Feb 08	peter	256	bool trained_;
445	15 Dec 05	peter	257
36	12 Feb 04	peter	258	};
37	13 Feb 04	peter	259
680	11 Oct 06	jari	260	}}} // of namespace classifier, yat, and theplu
42	26 Feb 04	jari	261
30	16 Jan 04	peter	262	#endif