680 |
11 Oct 06 |
jari |
1 |
#ifndef _theplu_yat_statistics_tscore_ |
4200 |
19 Aug 22 |
peter |
2 |
#define _theplu_yat_statistics_tscore_ |
616 |
31 Aug 06 |
jari |
3 |
|
98 |
10 Jun 04 |
peter |
// $Id$ |
98 |
10 Jun 04 |
peter |
5 |
|
675 |
10 Oct 06 |
jari |
6 |
/* |
831 |
27 Mar 07 |
peter |
Copyright (C) 2004, 2005 Peter Johansson |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
295 |
29 Apr 05 |
peter |
11 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
675 |
10 Oct 06 |
jari |
13 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
18 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
23 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
26 |
*/ |
675 |
10 Oct 06 |
jari |
27 |
|
680 |
11 Oct 06 |
jari |
28 |
#include "Score.h" |
675 |
10 Oct 06 |
jari |
29 |
|
779 |
05 Mar 07 |
peter |
30 |
#include <cmath> |
98 |
10 Jun 04 |
peter |
31 |
#include <gsl/gsl_cdf.h> |
98 |
10 Jun 04 |
peter |
32 |
|
98 |
10 Jun 04 |
peter |
33 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
34 |
namespace yat { |
1023 |
01 Feb 08 |
peter |
35 |
namespace utility { |
1023 |
01 Feb 08 |
peter |
36 |
class VectorBase; |
1023 |
01 Feb 08 |
peter |
37 |
} |
4200 |
19 Aug 22 |
peter |
38 |
namespace statistics { |
414 |
01 Dec 05 |
peter |
39 |
|
98 |
10 Jun 04 |
peter |
40 |
/// |
767 |
22 Feb 07 |
peter |
/// @brief Class for Fisher's t-test. |
4200 |
19 Aug 22 |
peter |
42 |
/// |
589 |
24 Aug 06 |
peter |
/// See <a href="http://en.wikipedia.org/wiki/Student's_t-test"> |
589 |
24 Aug 06 |
peter |
/// http://en.wikipedia.org/wiki/Student's_t-test</a> for more |
589 |
24 Aug 06 |
peter |
/// details on the t-test. |
4200 |
19 Aug 22 |
peter |
46 |
/// |
98 |
10 Jun 04 |
peter |
47 |
class tScore : public Score |
98 |
10 Jun 04 |
peter |
48 |
{ |
4200 |
19 Aug 22 |
peter |
49 |
|
98 |
10 Jun 04 |
peter |
50 |
public: |
98 |
10 Jun 04 |
peter |
51 |
/// |
779 |
05 Mar 07 |
peter |
/// @brief Default Constructor. |
102 |
15 Jun 04 |
peter |
53 |
/// |
181 |
04 Oct 04 |
peter |
54 |
tScore(bool absolute=true); |
102 |
15 Jun 04 |
peter |
55 |
|
4200 |
19 Aug 22 |
peter |
56 |
|
669 |
07 Oct 06 |
peter |
57 |
/** |
669 |
07 Oct 06 |
peter |
Calculates the value of t-score, i.e. the ratio between |
669 |
07 Oct 06 |
peter |
difference in mean and standard deviation of this |
669 |
07 Oct 06 |
peter |
difference. \f$ t = \frac{ m_x - m_y } |
669 |
07 Oct 06 |
peter |
{s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the |
669 |
07 Oct 06 |
peter |
mean, \f$ n \f$ is the number of data points and \f$ s^2 = |
669 |
07 Oct 06 |
peter |
\frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - |
669 |
07 Oct 06 |
peter |
2 } \f$ |
779 |
05 Mar 07 |
peter |
65 |
|
779 |
05 Mar 07 |
peter |
@return t-score. If absolute=true absolute value of t-score |
779 |
05 Mar 07 |
peter |
is returned |
779 |
05 Mar 07 |
peter |
68 |
*/ |
4200 |
19 Aug 22 |
peter |
69 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
70 |
const utility::VectorBase& value) const; |
779 |
05 Mar 07 |
peter |
71 |
|
779 |
05 Mar 07 |
peter |
72 |
/** |
779 |
05 Mar 07 |
peter |
Calculates the value of t-score, i.e. the ratio between |
779 |
05 Mar 07 |
peter |
difference in mean and standard deviation of this |
779 |
05 Mar 07 |
peter |
difference. \f$ t = \frac{ m_x - m_y } |
779 |
05 Mar 07 |
peter |
{s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the |
779 |
05 Mar 07 |
peter |
mean, \f$ n \f$ is the number of data points and \f$ s^2 = |
779 |
05 Mar 07 |
peter |
\frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - |
779 |
05 Mar 07 |
peter |
2 } \f$ |
4200 |
19 Aug 22 |
peter |
80 |
|
966 |
11 Oct 07 |
peter |
\param target Target defining the two groups |
966 |
11 Oct 07 |
peter |
\param value Vector with data points on which calculation is based |
779 |
05 Mar 07 |
peter |
@param dof double pointer in which approximation of degrees of |
779 |
05 Mar 07 |
peter |
freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. |
779 |
05 Mar 07 |
peter |
85 |
|
669 |
07 Oct 06 |
peter |
@return t-score. If absolute=true absolute value of t-score |
669 |
07 Oct 06 |
peter |
is returned |
669 |
07 Oct 06 |
peter |
88 |
*/ |
4200 |
19 Aug 22 |
peter |
89 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
90 |
const utility::VectorBase& value, double* dof) const; |
119 |
20 Jul 04 |
peter |
91 |
|
669 |
07 Oct 06 |
peter |
92 |
/** |
669 |
07 Oct 06 |
peter |
Calculates the weighted t-score, i.e. the ratio between |
669 |
07 Oct 06 |
peter |
difference in mean and standard deviation of this |
669 |
07 Oct 06 |
peter |
difference. \f$ t = \frac{ m_x - m_y }{ |
669 |
07 Oct 06 |
peter |
s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the |
669 |
07 Oct 06 |
peter |
weighted mean, n is the weighted version of number of data |
669 |
07 Oct 06 |
peter |
points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and |
669 |
07 Oct 06 |
peter |
\f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{ |
669 |
07 Oct 06 |
peter |
\sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 |
669 |
07 Oct 06 |
peter |
} \f$. See AveragerWeighted for details. |
4200 |
19 Aug 22 |
peter |
102 |
|
966 |
11 Oct 07 |
peter |
\param target Target defining the two groups |
966 |
11 Oct 07 |
peter |
\param value Vector with values/weights on which calculation is based |
779 |
05 Mar 07 |
peter |
@param dof double pointer in which approximation of degrees of |
779 |
05 Mar 07 |
peter |
freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. |
779 |
05 Mar 07 |
peter |
107 |
|
669 |
07 Oct 06 |
peter |
@return t-score. If absolute=true absolute value of t-score |
669 |
07 Oct 06 |
peter |
is returned |
669 |
07 Oct 06 |
peter |
110 |
*/ |
4200 |
19 Aug 22 |
peter |
111 |
double score(const classifier::Target& target, |
779 |
05 Mar 07 |
peter |
112 |
const classifier::DataLookupWeighted1D& value, |
4200 |
19 Aug 22 |
peter |
113 |
double* dof=0) const; |
623 |
05 Sep 06 |
peter |
114 |
|
779 |
05 Mar 07 |
peter |
115 |
/** |
779 |
05 Mar 07 |
peter |
Calculates the weighted t-score, i.e. the ratio between |
779 |
05 Mar 07 |
peter |
difference in mean and standard deviation of this |
779 |
05 Mar 07 |
peter |
difference. \f$ t = \frac{ m_x - m_y }{ |
779 |
05 Mar 07 |
peter |
s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the |
779 |
05 Mar 07 |
peter |
weighted mean, n is the weighted version of number of data |
779 |
05 Mar 07 |
peter |
points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and |
779 |
05 Mar 07 |
peter |
\f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{ |
779 |
05 Mar 07 |
peter |
\sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 |
779 |
05 Mar 07 |
peter |
} \f$. See AveragerWeighted for details. |
4200 |
19 Aug 22 |
peter |
125 |
|
779 |
05 Mar 07 |
peter |
@return t-score. If absolute=true absolute value of t-score |
779 |
05 Mar 07 |
peter |
is returned |
779 |
05 Mar 07 |
peter |
128 |
*/ |
4200 |
19 Aug 22 |
peter |
129 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
130 |
const classifier::DataLookupWeighted1D& value) const; |
779 |
05 Mar 07 |
peter |
131 |
|
779 |
05 Mar 07 |
peter |
132 |
/** |
779 |
05 Mar 07 |
peter |
Calculates the weighted t-score, i.e. the ratio between |
779 |
05 Mar 07 |
peter |
difference in mean and standard deviation of this |
779 |
05 Mar 07 |
peter |
difference. \f$ t = \frac{ m_x - m_y }{ |
779 |
05 Mar 07 |
peter |
\frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the |
779 |
05 Mar 07 |
peter |
weighted mean, n is the weighted version of number of data |
779 |
05 Mar 07 |
peter |
points and \f$ s2 \f$ is an estimation of the variance \f$ s^2 |
779 |
05 Mar 07 |
peter |
= \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x |
779 |
05 Mar 07 |
peter |
+ n_y - 2 } \f$. See AveragerWeighted for details. |
4200 |
19 Aug 22 |
peter |
141 |
|
779 |
05 Mar 07 |
peter |
@return t-score if absolute=true absolute value of t-score |
779 |
05 Mar 07 |
peter |
is returned |
779 |
05 Mar 07 |
peter |
144 |
*/ |
4200 |
19 Aug 22 |
peter |
145 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
146 |
const utility::VectorBase& value, |
4200 |
19 Aug 22 |
peter |
147 |
const utility::VectorBase& weight) const; |
112 |
07 Jul 04 |
peter |
148 |
|
779 |
05 Mar 07 |
peter |
149 |
/** |
779 |
05 Mar 07 |
peter |
Calculates the weighted t-score, i.e. the ratio between |
779 |
05 Mar 07 |
peter |
difference in mean and standard deviation of this |
779 |
05 Mar 07 |
peter |
difference. \f$ t = \frac{ m_x - m_y }{ |
779 |
05 Mar 07 |
peter |
\frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the |
779 |
05 Mar 07 |
peter |
weighted mean, n is the weighted version of number of data |
779 |
05 Mar 07 |
peter |
points and \f$ s2 \f$ is an estimation of the variance \f$ s^2 |
779 |
05 Mar 07 |
peter |
= \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x |
779 |
05 Mar 07 |
peter |
+ n_y - 2 } \f$. See AveragerWeighted for details. |
4200 |
19 Aug 22 |
peter |
158 |
|
966 |
11 Oct 07 |
peter |
\param target Target defining the two groups |
966 |
11 Oct 07 |
peter |
\param value Vector with data values on which calculation is based |
966 |
11 Oct 07 |
peter |
\param weight Vector with weight associated to \a value |
779 |
05 Mar 07 |
peter |
@param dof double pointer in which approximation of degrees of |
779 |
05 Mar 07 |
peter |
freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. |
179 |
04 Oct 04 |
peter |
164 |
|
779 |
05 Mar 07 |
peter |
@return t-score if absolute=true absolute value of t-score |
779 |
05 Mar 07 |
peter |
is returned |
779 |
05 Mar 07 |
peter |
167 |
*/ |
4200 |
19 Aug 22 |
peter |
168 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
169 |
const utility::VectorBase& value, |
1023 |
01 Feb 08 |
peter |
170 |
const utility::VectorBase& weight, |
4200 |
19 Aug 22 |
peter |
171 |
double* dof=0) const; |
779 |
05 Mar 07 |
peter |
172 |
|
822 |
18 Mar 07 |
peter |
173 |
/** |
822 |
18 Mar 07 |
peter |
Calcultate t-score from Averager like objects. Requirements for |
4200 |
19 Aug 22 |
peter |
T1 and T2 are: double mean(), double n(), double sum_xx_centered() |
4200 |
19 Aug 22 |
peter |
176 |
|
822 |
18 Mar 07 |
peter |
If \a dof is not a null pointer it is assigned to number of |
822 |
18 Mar 07 |
peter |
degrees of freedom. |
822 |
18 Mar 07 |
peter |
179 |
*/ |
4200 |
19 Aug 22 |
peter |
180 |
template<typename T1, typename T2> |
822 |
18 Mar 07 |
peter |
181 |
double score(const T1& pos, const T2& neg, double* dof=0) const; |
822 |
18 Mar 07 |
peter |
182 |
|
98 |
10 Jun 04 |
peter |
183 |
private: |
4200 |
19 Aug 22 |
peter |
184 |
|
98 |
10 Jun 04 |
peter |
185 |
}; |
98 |
10 Jun 04 |
peter |
186 |
|
4200 |
19 Aug 22 |
peter |
187 |
template<typename T1, typename T2> |
822 |
18 Mar 07 |
peter |
188 |
double tScore::score(const T1& pos, const T2& neg, double* dof) const |
822 |
18 Mar 07 |
peter |
189 |
{ |
822 |
18 Mar 07 |
peter |
190 |
double diff = pos.mean() - neg.mean(); |
822 |
18 Mar 07 |
peter |
191 |
if (dof) |
822 |
18 Mar 07 |
peter |
192 |
*dof=pos.n()+neg.n()-2; |
822 |
18 Mar 07 |
peter |
193 |
double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/ |
822 |
18 Mar 07 |
peter |
194 |
(pos.n()+neg.n()-2)); |
822 |
18 Mar 07 |
peter |
195 |
double t=diff/sqrt(s2/pos.n()+s2/neg.n()); |
822 |
18 Mar 07 |
peter |
196 |
if (t<0 && absolute_) |
822 |
18 Mar 07 |
peter |
197 |
return -t; |
822 |
18 Mar 07 |
peter |
198 |
return t; |
822 |
18 Mar 07 |
peter |
199 |
} |
822 |
18 Mar 07 |
peter |
200 |
|
683 |
11 Oct 06 |
jari |
201 |
}}} // of namespace statistics, yat, and theplu |
98 |
10 Jun 04 |
peter |
202 |
|
98 |
10 Jun 04 |
peter |
203 |
#endif |