779 |
05 Mar 07 |
peter |
1 |
#ifndef _theplu_yat_statistics_sam_score_ |
779 |
05 Mar 07 |
peter |
2 |
#define _theplu_yat_statistics_sam_score_ |
669 |
07 Oct 06 |
peter |
3 |
|
669 |
07 Oct 06 |
peter |
// $Id$ |
669 |
07 Oct 06 |
peter |
5 |
|
675 |
10 Oct 06 |
jari |
6 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2006 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
669 |
07 Oct 06 |
peter |
10 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
675 |
10 Oct 06 |
jari |
12 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
17 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
22 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
25 |
*/ |
675 |
10 Oct 06 |
jari |
26 |
|
680 |
11 Oct 06 |
jari |
27 |
#include "Score.h" |
675 |
10 Oct 06 |
jari |
28 |
|
779 |
05 Mar 07 |
peter |
29 |
#include <cmath> |
779 |
05 Mar 07 |
peter |
30 |
|
669 |
07 Oct 06 |
peter |
31 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
32 |
namespace yat { |
669 |
07 Oct 06 |
peter |
33 |
namespace utility { |
1023 |
01 Feb 08 |
peter |
34 |
class VectorBase; |
669 |
07 Oct 06 |
peter |
35 |
} |
669 |
07 Oct 06 |
peter |
36 |
namespace classifier { |
669 |
07 Oct 06 |
peter |
37 |
class DataLookWeighted1D; |
669 |
07 Oct 06 |
peter |
38 |
} |
4200 |
19 Aug 22 |
peter |
39 |
namespace statistics { |
669 |
07 Oct 06 |
peter |
40 |
|
669 |
07 Oct 06 |
peter |
41 |
/** |
669 |
07 Oct 06 |
peter |
@brief Class for score used in Significance Analysis of |
669 |
07 Oct 06 |
peter |
Microarrays (SAM). |
4200 |
19 Aug 22 |
peter |
44 |
|
669 |
07 Oct 06 |
peter |
The score is similar to the Student t-test but with an added |
669 |
07 Oct 06 |
peter |
fudge factor in denominator to avoid groups with small variance |
4200 |
19 Aug 22 |
peter |
getting a large score. \f$ \frac{m_x-m_y}{s+s_0} \f$ |
669 |
07 Oct 06 |
peter |
48 |
|
669 |
07 Oct 06 |
peter |
see http://www.pnas.org/cgi/content/abstract/98/9/5116 for |
669 |
07 Oct 06 |
peter |
details |
4200 |
19 Aug 22 |
peter |
51 |
*/ |
779 |
05 Mar 07 |
peter |
52 |
class SAMScore : public Score |
669 |
07 Oct 06 |
peter |
53 |
{ |
669 |
07 Oct 06 |
peter |
54 |
|
669 |
07 Oct 06 |
peter |
55 |
public: |
669 |
07 Oct 06 |
peter |
56 |
/// |
669 |
07 Oct 06 |
peter |
/// @param s0 \f$ s_0 \f$ is a fudge factor |
966 |
11 Oct 07 |
peter |
/// @param absolute if true max(score, -score) is used |
669 |
07 Oct 06 |
peter |
59 |
/// |
779 |
05 Mar 07 |
peter |
60 |
SAMScore(const double s0, bool absolute=true); |
669 |
07 Oct 06 |
peter |
61 |
|
669 |
07 Oct 06 |
peter |
62 |
/** |
669 |
07 Oct 06 |
peter |
\f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{1}{n_x}\sum |
669 |
07 Oct 06 |
peter |
x_i \f$, \f$ s^2 = \left(\frac{1}{n_x}+\frac{1}{n_y} \right) |
669 |
07 Oct 06 |
peter |
\frac{\sum (x_i-m_x)^2 + \sum(y_i-m_y)^2}{n_x+n_y-2} \f$, and |
669 |
07 Oct 06 |
peter |
\f$ s_0 \f$ is the fudge factor. |
669 |
07 Oct 06 |
peter |
67 |
|
669 |
07 Oct 06 |
peter |
@return SAM score. If absolute=true absolute value of t-score |
669 |
07 Oct 06 |
peter |
is returned |
669 |
07 Oct 06 |
peter |
70 |
*/ |
4200 |
19 Aug 22 |
peter |
71 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
72 |
const utility::VectorBase& value) const; |
669 |
07 Oct 06 |
peter |
73 |
|
669 |
07 Oct 06 |
peter |
74 |
/** |
669 |
07 Oct 06 |
peter |
\f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{\sum |
669 |
07 Oct 06 |
peter |
w_ix_i}{w_i} \f$, \f$ s_0 \f$ is the fudge factor, and \f$ s^2 |
669 |
07 Oct 06 |
peter |
= \left(\frac{1}{n_x}+\frac{1}{n_y} \right) \frac{\sum |
669 |
07 Oct 06 |
peter |
w_i(x_i-m_x)^2 + \sum w_i(y_i-m_y)^2}{n_x+n_y-2} \f$ where \f$ |
669 |
07 Oct 06 |
peter |
n \f$ is weighted version of number of data points \f$ |
669 |
07 Oct 06 |
peter |
\frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$. |
669 |
07 Oct 06 |
peter |
81 |
|
669 |
07 Oct 06 |
peter |
@return weighted version of SAM score. If absolute=true |
669 |
07 Oct 06 |
peter |
absolute value is returned |
669 |
07 Oct 06 |
peter |
84 |
*/ |
4200 |
19 Aug 22 |
peter |
85 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
86 |
const classifier::DataLookupWeighted1D& value) const; |
669 |
07 Oct 06 |
peter |
87 |
|
669 |
07 Oct 06 |
peter |
88 |
/** |
669 |
07 Oct 06 |
peter |
\f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{\sum |
669 |
07 Oct 06 |
peter |
w_ix_i}{w_i} \f$, \f$ s_0 \f$ is the fudge factor, and \f$ s^2 |
669 |
07 Oct 06 |
peter |
= \left(\frac{1}{n_x}+\frac{1}{n_y} \right) \frac{\sum |
669 |
07 Oct 06 |
peter |
w_i(x_i-m_x)^2 + \sum w_i(y_i-m_y)^2}{n_x+n_y-2} \f$ where \f$ |
669 |
07 Oct 06 |
peter |
n \f$ is weighted version of number of data points \f$ |
669 |
07 Oct 06 |
peter |
\frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$. |
669 |
07 Oct 06 |
peter |
95 |
|
669 |
07 Oct 06 |
peter |
@return weighted version of SAM score. If absolute=true |
669 |
07 Oct 06 |
peter |
absolute value is returned |
669 |
07 Oct 06 |
peter |
98 |
*/ |
4200 |
19 Aug 22 |
peter |
99 |
double score(const classifier::Target& target, |
4200 |
19 Aug 22 |
peter |
100 |
const utility::VectorBase& value, |
4200 |
19 Aug 22 |
peter |
101 |
const utility::VectorBase& weight) const; |
669 |
07 Oct 06 |
peter |
102 |
private: |
669 |
07 Oct 06 |
peter |
103 |
double s0_; |
779 |
05 Mar 07 |
peter |
104 |
|
4200 |
19 Aug 22 |
peter |
105 |
template<class T> |
779 |
05 Mar 07 |
peter |
106 |
double score(const T& positive, const T& negative) const |
779 |
05 Mar 07 |
peter |
107 |
{ |
4200 |
19 Aug 22 |
peter |
108 |
if(positive.n()+negative.n()<=2) |
779 |
05 Mar 07 |
peter |
109 |
return 0; |
779 |
05 Mar 07 |
peter |
110 |
double diff = positive.mean() - negative.mean(); |
4200 |
19 Aug 22 |
peter |
111 |
double s2 = ( (1.0/positive.n()+1.0/negative.n()) * |
779 |
05 Mar 07 |
peter |
112 |
(positive.sum_xx_centered()+negative.sum_xx_centered()) / |
779 |
05 Mar 07 |
peter |
113 |
(positive.n()+negative.n()-2) ); |
779 |
05 Mar 07 |
peter |
114 |
if (diff<0 && absolute_) |
779 |
05 Mar 07 |
peter |
115 |
return -diff/(sqrt(s2)+s0_); |
779 |
05 Mar 07 |
peter |
116 |
return diff/(sqrt(s2)+s0_); |
779 |
05 Mar 07 |
peter |
117 |
} |
669 |
07 Oct 06 |
peter |
118 |
}; |
669 |
07 Oct 06 |
peter |
119 |
|
683 |
11 Oct 06 |
jari |
120 |
}}} // of namespace statistics, yat, and theplu |
669 |
07 Oct 06 |
peter |
121 |
|
669 |
07 Oct 06 |
peter |
122 |
#endif |