svndigest - svndigest

yat/classifier/NBC.h

: Code
: Comments
: Other

Rev	Date	Author	Line
4200	19 Aug 22	peter	1	#ifndef _theplu_yat_classifier_nbc_
4200	19 Aug 22	peter	2	#define _theplu_yat_classifier_nbc_
662	27 Sep 06	peter	3
662	27 Sep 06	peter	4	// $Id$
662	27 Sep 06	peter	5
675	10 Oct 06	jari	6	/*
2119	12 Dec 09	peter	7	Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
4359	23 Aug 23	peter	8	Copyright (C) 2007 Peter Johansson
2119	12 Dec 09	peter	9	Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
662	27 Sep 06	peter	10
1437	25 Aug 08	peter	11	This file is part of the yat library, http://dev.thep.lu.se/yat
675	10 Oct 06	jari	12
675	10 Oct 06	jari	13	The yat library is free software; you can redistribute it and/or
675	10 Oct 06	jari	14	modify it under the terms of the GNU General Public License as
1486	09 Sep 08	jari	15	published by the Free Software Foundation; either version 3 of the
675	10 Oct 06	jari	16	License, or (at your option) any later version.
675	10 Oct 06	jari	17
675	10 Oct 06	jari	18	The yat library is distributed in the hope that it will be useful,
675	10 Oct 06	jari	19	but WITHOUT ANY WARRANTY; without even the implied warranty of
675	10 Oct 06	jari	20	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
675	10 Oct 06	jari	21	General Public License for more details.
675	10 Oct 06	jari	22
675	10 Oct 06	jari	23	You should have received a copy of the GNU General Public License
1487	10 Sep 08	jari	24	along with yat. If not, see <http://www.gnu.org/licenses/>.
675	10 Oct 06	jari	25	*/
675	10 Oct 06	jari	26
680	11 Oct 06	jari	27	#include "SupervisedClassifier.h"
1121	22 Feb 08	peter	28	#include "yat/utility/Matrix.h"
675	10 Oct 06	jari	29
662	27 Sep 06	peter	30	namespace theplu {
680	11 Oct 06	jari	31	namespace yat {
4200	19 Aug 22	peter	32	namespace classifier {
662	27 Sep 06	peter	33
662	27 Sep 06	peter	34	class MatrixLookup;
662	27 Sep 06	peter	35	class MatrixLookupWeighted;
662	27 Sep 06	peter	36	class Target;
662	27 Sep 06	peter	37
767	22 Feb 07	peter	38	/**
1152	25 Feb 08	peter	39	@brief Naive Bayesian Classifier.
4200	19 Aug 22	peter	40
767	22 Feb 07	peter	41	Each class is modelled as a multinormal distribution with
1184	28 Feb 08	peter	42	features being independent: \f$ P(x\|c) \propto \prod
767	22 Feb 07	peter	43	\frac{1}{\sqrt{2\pi\sigma_i^2}} \exp \left(
1184	28 Feb 08	peter	44	-\frac{(x_i-\mu_i)^2}{2\sigma_i^2)} \right)\f$
767	22 Feb 07	peter	45	*/
662	27 Sep 06	peter	46	class NBC : public SupervisedClassifier
662	27 Sep 06	peter	47	{
4200	19 Aug 22	peter	48
662	27 Sep 06	peter	49	public:
662	27 Sep 06	peter	50	///
4200	19 Aug 22	peter	51	/// @brief Constructor
662	27 Sep 06	peter	52	///
1157	26 Feb 08	markus	53	NBC(void);
1157	26 Feb 08	markus	54
4200	19 Aug 22	peter	55
662	27 Sep 06	peter	56	///
1157	26 Feb 08	markus	57	/// @brief Destructor
1157	26 Feb 08	markus	58	///
662	27 Sep 06	peter	59	virtual ~NBC();
662	27 Sep 06	peter	60
722	27 Dec 06	markus	61
1157	26 Feb 08	markus	62	NBC* make_classifier(void) const;
4200	19 Aug 22	peter	63
662	27 Sep 06	peter	64	///
1200	05 Mar 08	peter	65	/// \brief Train the NBC using training data and targets.
662	27 Sep 06	peter	66	///
767	22 Feb 07	peter	67	/// For each class mean and variance are estimated for each
1184	28 Feb 08	peter	68	/// feature (see statistics::Averager for details).
767	22 Feb 07	peter	69	///
1184	28 Feb 08	peter	70	/// If there is only one (or zero) samples in a class, parameters
1184	28 Feb 08	peter	71	/// cannot be estimated. In that case, parameters are set to NaN
1184	28 Feb 08	peter	72	/// for that particular class.
960	10 Oct 07	peter	73	///
1157	26 Feb 08	markus	74	void train(const MatrixLookup&, const Target&);
662	27 Sep 06	peter	75
1157	26 Feb 08	markus	76	///
1200	05 Mar 08	peter	77	/// \brief Train the NBC using weighted training data and
1184	28 Feb 08	peter	78	/// targets.
1157	26 Feb 08	markus	79	///
1184	28 Feb 08	peter	80	/// For each class mean and variance are estimated for each
1184	28 Feb 08	peter	81	/// feature (see statistics::AveragerWeighted for details).
1184	28 Feb 08	peter	82	///
1184	28 Feb 08	peter	83	/// To estimate the parameters of a class, each feature of the
1184	28 Feb 08	peter	84	/// class must have at least two non-zero data points. Otherwise
1184	28 Feb 08	peter	85	/// the parameters are set to NaN and any prediction will result
1184	28 Feb 08	peter	86	/// in NaN for that particular class.
1184	28 Feb 08	peter	87	///
1157	26 Feb 08	markus	88	void train(const MatrixLookupWeighted&, const Target&);
4200	19 Aug 22	peter	89
808	15 Mar 07	peter	90	/**
1184	28 Feb 08	peter	91	\brief Predict samples using unweighted data
1184	28 Feb 08	peter	92
813	16 Mar 07	peter	93	Each sample (column) in \a data is predicted and predictions
1184	28 Feb 08	peter	94	are returned in the corresponding column in passed \a
1184	28 Feb 08	peter	95	result. Each row in \a result corresponds to a class. The
1184	28 Feb 08	peter	96	prediction is the estimated probability that sample belong to
1184	28 Feb 08	peter	97	class \f$ j \f$:
813	16 Mar 07	peter	98
1184	28 Feb 08	peter	99	\f$ P_j = \frac{1}{Z}\prod_i\frac{1}{\sqrt{2\pi\sigma_i^2}}
1184	28 Feb 08	peter	100	\exp\left(-\frac{(x_i-\mu_i)^2}{2\sigma_i^2}\right)\f$, where \f$ \mu_i
813	16 Mar 07	peter	101	\f$ and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
1184	28 Feb 08	peter	102	respectively. Z is chosen such that total probability equals unity, \f$
1184	28 Feb 08	peter	103	\sum P_j = 1 \f$.
1184	28 Feb 08	peter	104
1184	28 Feb 08	peter	105	\note If parameters could not be estimated during training, due
1184	28 Feb 08	peter	106	to lack of number of sufficient data points, the output for
1184	28 Feb 08	peter	107	that class is NaN and not included in calculation of
1184	28 Feb 08	peter	108	normalization factor \f$ Z \f$.
808	15 Mar 07	peter	109	*/
1184	28 Feb 08	peter	110	void predict(const MatrixLookup& data, utility::Matrix& result) const;
662	27 Sep 06	peter	111
1160	26 Feb 08	markus	112	/**
1184	28 Feb 08	peter	113	\brief Predict samples using weighted data
1184	28 Feb 08	peter	114
1169	26 Feb 08	peter	115	Each sample (column) in \a data is predicted and predictions
1184	28 Feb 08	peter	116	are returned in the corresponding column in passed \a
1184	28 Feb 08	peter	117	result. Each row in \a result corresponds to a class. The
1184	28 Feb 08	peter	118	prediction is the estimated probability that sample belong to
1184	28 Feb 08	peter	119	class \f$ j \f$:
1182	28 Feb 08	peter	120
1184	28 Feb 08	peter	121	\f$ P_j = \frac{1}{Z} \exp\left(-N\frac{\sum
1200	05 Mar 08	peter	122	{w_i(x_i-\mu_i)^2}/(2\sigma_i^2)}{\sum w_i}\right)
1200	05 Mar 08	peter	123	\prod_i\frac{1}{\sqrt{2\pi\sigma_i^2}}\f$, where \f$ \mu_i \f$
1200	05 Mar 08	peter	124	and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
1200	05 Mar 08	peter	125	respectively. Z is chosen such that total probability equals
1200	05 Mar 08	peter	126	unity, \f$ \sum P_j = 1 \f$.
1184	28 Feb 08	peter	127
1184	28 Feb 08	peter	128	\note If parameters could not be estimated during training, due
1184	28 Feb 08	peter	129	to lack of number of sufficient data points, the output for
1184	28 Feb 08	peter	130	that class is NaN and not included in calculation of
1184	28 Feb 08	peter	131	normalization factor \f$ Z \f$.
1160	26 Feb 08	markus	132	*/
1184	28 Feb 08	peter	133	void predict(const MatrixLookupWeighted& data,utility::Matrix& result) const;
662	27 Sep 06	peter	134
1160	26 Feb 08	markus	135
662	27 Sep 06	peter	136	private:
1160	26 Feb 08	markus	137	void standardize_lnP(utility::Matrix& prediction) const;
1160	26 Feb 08	markus	138
1121	22 Feb 08	peter	139	utility::Matrix centroids_;
1121	22 Feb 08	peter	140	utility::Matrix sigma2_;
662	27 Sep 06	peter	141
959	10 Oct 07	peter	142	double sum_logsigma(size_t i) const;
959	10 Oct 07	peter	143
959	10 Oct 07	peter	144
662	27 Sep 06	peter	145	};
4200	19 Aug 22	peter	146
680	11 Oct 06	jari	147	}}} // of namespace classifier, yat, and theplu
662	27 Sep 06	peter	148
662	27 Sep 06	peter	149	#endif