680 |
11 Oct 06 |
jari |
1 |
#ifndef _theplu_yat_statistics_fisher_ |
3004 |
24 Mar 13 |
peter |
2 |
#define _theplu_yat_statistics_fisher_ |
186 |
07 Oct 04 |
peter |
3 |
|
616 |
31 Aug 06 |
jari |
// $Id$ |
616 |
31 Aug 06 |
jari |
5 |
|
675 |
10 Oct 06 |
jari |
6 |
/* |
831 |
27 Mar 07 |
peter |
Copyright (C) 2004, 2005 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2006 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
3562 |
04 Jan 17 |
peter |
Copyright (C) 2009, 2011, 2013, 2014, 2015, 2017 Peter Johansson |
186 |
07 Oct 04 |
peter |
12 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
675 |
10 Oct 06 |
jari |
14 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
19 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
24 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
27 |
*/ |
675 |
10 Oct 06 |
jari |
28 |
|
3004 |
24 Mar 13 |
peter |
29 |
#include <yat/utility/deprecate.h> |
3004 |
24 Mar 13 |
peter |
30 |
|
186 |
07 Oct 04 |
peter |
31 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
32 |
namespace yat { |
3004 |
24 Mar 13 |
peter |
33 |
namespace statistics { |
3004 |
24 Mar 13 |
peter |
34 |
|
648 |
14 Sep 06 |
peter |
35 |
/** |
3004 |
24 Mar 13 |
peter |
@brief Fisher's exact test. |
648 |
14 Sep 06 |
peter |
37 |
|
648 |
14 Sep 06 |
peter |
Fisher's Exact test is a procedure that you can use for data |
648 |
14 Sep 06 |
peter |
in a two by two contingency table: \f[ \begin{tabular}{|c|c|} |
648 |
14 Sep 06 |
peter |
\hline a&b \tabularnewline \hline c&d \tabularnewline \hline |
648 |
14 Sep 06 |
peter |
\end{tabular} \f] Fisher's Exact Test is based on exact |
648 |
14 Sep 06 |
peter |
probabilities from a specific distribution (the hypergeometric |
648 |
14 Sep 06 |
peter |
distribution). There's really no lower bound on the amount of |
648 |
14 Sep 06 |
peter |
data that is needed for Fisher's Exact Test. You do have to |
648 |
14 Sep 06 |
peter |
have at least one data value in each row and one data value in |
648 |
14 Sep 06 |
peter |
each column. If an entire row or column is zero, then you |
648 |
14 Sep 06 |
peter |
don't really have a 2 by 2 table. But you can use Fisher's |
648 |
14 Sep 06 |
peter |
Exact Test when one of the cells in your table has a zero in |
648 |
14 Sep 06 |
peter |
it. Fisher's Exact Test is also very useful for highly |
648 |
14 Sep 06 |
peter |
imbalanced tables. If one or two of the cells in a two by two |
648 |
14 Sep 06 |
peter |
table have numbers in the thousands and one or two of the |
648 |
14 Sep 06 |
peter |
other cells has numbers less than 5, you can still use |
648 |
14 Sep 06 |
peter |
Fisher's Exact Test. For very large tables (where all four |
648 |
14 Sep 06 |
peter |
entries in the two by two table are large), your computer may |
648 |
14 Sep 06 |
peter |
take too much time to compute Fisher's Exact Test. In these |
648 |
14 Sep 06 |
peter |
situations, though, you might as well use the Chi-square test |
648 |
14 Sep 06 |
peter |
because a large sample approximation (that the Chi-square test |
648 |
14 Sep 06 |
peter |
relies on) is very reasonable. If all elements are larger than |
3004 |
24 Mar 13 |
peter |
10 a Chi-square test is reasonable to use. |
3004 |
24 Mar 13 |
peter |
60 |
|
648 |
14 Sep 06 |
peter |
@note The statistica assumes that each column and row sum, |
648 |
14 Sep 06 |
peter |
respectively, are fixed. Just because you have a 2x2 table, this |
2555 |
18 Aug 11 |
peter |
assumtion does not necessarily match you experimental setup. See |
648 |
14 Sep 06 |
peter |
e.g. Barnard's test for alternative. |
648 |
14 Sep 06 |
peter |
65 |
*/ |
3004 |
24 Mar 13 |
peter |
66 |
|
777 |
04 Mar 07 |
peter |
67 |
class Fisher |
186 |
07 Oct 04 |
peter |
68 |
{ |
3004 |
24 Mar 13 |
peter |
69 |
|
186 |
07 Oct 04 |
peter |
70 |
public: |
186 |
07 Oct 04 |
peter |
71 |
/// |
186 |
07 Oct 04 |
peter |
/// Default Constructor. |
3298 |
08 Aug 14 |
peter |
73 |
/// |
3298 |
08 Aug 14 |
peter |
/// \param yates if true Yates's correction is used for |
3298 |
08 Aug 14 |
peter |
/// chi-squared calculation |
186 |
07 Oct 04 |
peter |
76 |
/// |
3561 |
04 Jan 17 |
peter |
/// \since Constructor with argument was introduced in yat 0.13 |
3561 |
04 Jan 17 |
peter |
78 |
/// |
3298 |
08 Aug 14 |
peter |
79 |
Fisher(bool yates=false); |
186 |
07 Oct 04 |
peter |
80 |
|
186 |
07 Oct 04 |
peter |
81 |
/// |
3004 |
24 Mar 13 |
peter |
/// Destructor |
186 |
07 Oct 04 |
peter |
83 |
/// |
777 |
04 Mar 07 |
peter |
84 |
virtual ~Fisher(void); |
3004 |
24 Mar 13 |
peter |
85 |
|
3004 |
24 Mar 13 |
peter |
86 |
|
1746 |
23 Jan 09 |
peter |
87 |
/** |
1746 |
23 Jan 09 |
peter |
The Chi2 score is calculated as \f$ \sum |
1746 |
23 Jan 09 |
peter |
\frac{(O_i-E_i)^2}{E_i}\f$ where \a E is expected value and \a |
1746 |
23 Jan 09 |
peter |
O is observed value. |
1746 |
23 Jan 09 |
peter |
91 |
|
3298 |
08 Aug 14 |
peter |
If indicated in constructor, Yates's correction is used, i.e., |
3298 |
08 Aug 14 |
peter |
Chi2 is calculated as \f$ \frac{(|O_i-E_i|-0.5)^2}{E_i} \f$ |
3298 |
08 Aug 14 |
peter |
94 |
|
3298 |
08 Aug 14 |
peter |
95 |
|
3004 |
24 Mar 13 |
peter |
\see expected(double&, double&, double&, double&) |
3004 |
24 Mar 13 |
peter |
97 |
|
1746 |
23 Jan 09 |
peter |
\return Chi2 score |
1746 |
23 Jan 09 |
peter |
99 |
*/ |
449 |
15 Dec 05 |
peter |
100 |
double Chi2(void) const; |
449 |
15 Dec 05 |
peter |
101 |
|
777 |
04 Mar 07 |
peter |
102 |
/** |
777 |
04 Mar 07 |
peter |
Calculates the expected values under the null hypothesis. |
777 |
04 Mar 07 |
peter |
\f$ a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$, |
777 |
04 Mar 07 |
peter |
\f$ b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$, |
777 |
04 Mar 07 |
peter |
\f$ c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$, |
777 |
04 Mar 07 |
peter |
\f$ d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$, |
777 |
04 Mar 07 |
peter |
108 |
*/ |
449 |
15 Dec 05 |
peter |
109 |
void expected(double& a, double& b, double& c, double& d) const; |
447 |
15 Dec 05 |
peter |
110 |
|
447 |
15 Dec 05 |
peter |
111 |
/// |
777 |
04 Mar 07 |
peter |
/// If all elements in table is at least minimum_size(), a Chi2 |
777 |
04 Mar 07 |
peter |
/// approximation is used for p-value calculation. |
447 |
15 Dec 05 |
peter |
114 |
/// |
447 |
15 Dec 05 |
peter |
/// @return reference to minimum_size |
447 |
15 Dec 05 |
peter |
116 |
/// |
1271 |
09 Apr 08 |
peter |
117 |
unsigned int& minimum_size(void); |
447 |
15 Dec 05 |
peter |
118 |
|
447 |
15 Dec 05 |
peter |
119 |
/// |
777 |
04 Mar 07 |
peter |
/// If all elements in table is at least minimum_size(), a Chi2 |
777 |
04 Mar 07 |
peter |
/// approximation is used for p-value calculation. |
447 |
15 Dec 05 |
peter |
122 |
/// |
777 |
04 Mar 07 |
peter |
/// @return const reference to minimum_size |
447 |
15 Dec 05 |
peter |
124 |
/// |
1271 |
09 Apr 08 |
peter |
125 |
const unsigned int& minimum_size(void) const; |
186 |
07 Oct 04 |
peter |
126 |
|
1746 |
23 Jan 09 |
peter |
127 |
/** |
3004 |
24 Mar 13 |
peter |
Calculates probability to get oddsratio (or smaller). |
3004 |
24 Mar 13 |
peter |
129 |
|
1746 |
23 Jan 09 |
peter |
If all elements in table is at least minimum_size(), a Chi2 |
1746 |
23 Jan 09 |
peter |
approximation is used. |
3004 |
24 Mar 13 |
peter |
132 |
|
3004 |
24 Mar 13 |
peter |
\since New in yat 0.11 |
3004 |
24 Mar 13 |
peter |
134 |
*/ |
3004 |
24 Mar 13 |
peter |
135 |
double p_left(void) const; |
3004 |
24 Mar 13 |
peter |
136 |
|
3004 |
24 Mar 13 |
peter |
137 |
/** |
3004 |
24 Mar 13 |
peter |
Calculates probability to get oddsratio (or greater). |
3004 |
24 Mar 13 |
peter |
139 |
|
3004 |
24 Mar 13 |
peter |
If all elements in table is at least minimum_size(), a Chi2 |
3004 |
24 Mar 13 |
peter |
approximation is used. |
3004 |
24 Mar 13 |
peter |
142 |
|
3004 |
24 Mar 13 |
peter |
\since New in yat 0.11 |
3004 |
24 Mar 13 |
peter |
144 |
*/ |
3004 |
24 Mar 13 |
peter |
145 |
double p_right(void) const; |
3004 |
24 Mar 13 |
peter |
146 |
|
3004 |
24 Mar 13 |
peter |
147 |
/** |
3004 |
24 Mar 13 |
peter |
If all elements in table is at least minimum_size(), a Chi2 |
3004 |
24 Mar 13 |
peter |
approximation is used. |
3004 |
24 Mar 13 |
peter |
150 |
|
1746 |
23 Jan 09 |
peter |
Otherwise a two-sided p-value is calculated using the |
1746 |
23 Jan 09 |
peter |
hypergeometric distribution |
1746 |
23 Jan 09 |
peter |
\f$ \sum_k P(k) \f$ where summation runs over \a k such that |
3441 |
23 Nov 15 |
peter |
\f$ P(k) \le P(a) \f$. |
1746 |
23 Jan 09 |
peter |
155 |
|
1746 |
23 Jan 09 |
peter |
\return two-sided p-value |
1746 |
23 Jan 09 |
peter |
157 |
*/ |
3004 |
24 Mar 13 |
peter |
158 |
double p_value(void) const; |
3004 |
24 Mar 13 |
peter |
159 |
|
623 |
05 Sep 06 |
peter |
160 |
/// |
777 |
04 Mar 07 |
peter |
/// One-sided p-value is probability to get larger (or equal) oddsratio. |
623 |
05 Sep 06 |
peter |
162 |
/// |
777 |
04 Mar 07 |
peter |
/// If all elements in table is at least minimum_size(), a Chi2 |
777 |
04 Mar 07 |
peter |
/// approximation is used. |
623 |
05 Sep 06 |
peter |
165 |
/// |
777 |
04 Mar 07 |
peter |
/// @return One-sided p-value |
760 |
20 Feb 07 |
peter |
167 |
/// |
3004 |
24 Mar 13 |
peter |
/// \deprecated Provided for backward compatibility with the 0.10 |
3004 |
24 Mar 13 |
peter |
/// API. Use p_right() instead. |
3004 |
24 Mar 13 |
peter |
170 |
/// |
3004 |
24 Mar 13 |
peter |
171 |
double p_value_one_sided() const YAT_DEPRECATE; |
3004 |
24 Mar 13 |
peter |
172 |
|
777 |
04 Mar 07 |
peter |
173 |
/** |
3004 |
24 Mar 13 |
peter |
Function calculating odds ratio from 2x2 table |
777 |
04 Mar 07 |
peter |
\f[ \begin{tabular}{|c|c|} |
777 |
04 Mar 07 |
peter |
\hline a&b \tabularnewline \hline c&d \tabularnewline \hline |
777 |
04 Mar 07 |
peter |
\end{tabular} \f] as \f$ \frac{ad}{bc} \f$ |
186 |
07 Oct 04 |
peter |
178 |
|
1746 |
23 Jan 09 |
peter |
Object will remember the values of \a a, \a b, \a c, and \a d. |
1746 |
23 Jan 09 |
peter |
180 |
|
3004 |
24 Mar 13 |
peter |
@return odds ratio. |
186 |
07 Oct 04 |
peter |
182 |
|
777 |
04 Mar 07 |
peter |
@throw If table is invalid a runtime_error is thrown. A table |
777 |
04 Mar 07 |
peter |
is invalid if a row or column sum is zero. |
777 |
04 Mar 07 |
peter |
185 |
*/ |
3004 |
24 Mar 13 |
peter |
186 |
double oddsratio(const unsigned int a, const unsigned int b, |
1271 |
09 Apr 08 |
peter |
187 |
const unsigned int c, const unsigned int d); |
718 |
26 Dec 06 |
jari |
188 |
|
2523 |
18 Jul 11 |
peter |
189 |
/** |
2523 |
18 Jul 11 |
peter |
\return oddsratio loaded via oddsratio(4) |
2523 |
18 Jul 11 |
peter |
191 |
|
2523 |
18 Jul 11 |
peter |
\since New in yat 0.8 |
2523 |
18 Jul 11 |
peter |
193 |
*/ |
2523 |
18 Jul 11 |
peter |
194 |
double oddsratio(void) const; |
2523 |
18 Jul 11 |
peter |
195 |
|
186 |
07 Oct 04 |
peter |
196 |
private: |
3004 |
24 Mar 13 |
peter |
197 |
bool calculate_p_exact(void) const; |
447 |
15 Dec 05 |
peter |
198 |
|
449 |
15 Dec 05 |
peter |
// two-sided |
447 |
15 Dec 05 |
peter |
200 |
double p_value_approximative(void) const; |
3441 |
23 Nov 15 |
peter |
// two-sided |
447 |
15 Dec 05 |
peter |
202 |
double p_value_exact(void) const; |
3441 |
23 Nov 15 |
peter |
// calculate two-sided p-value to get k (or fewer) wins when |
3441 |
23 Nov 15 |
peter |
// drawing t samples out of of a population of n1 wins and n2 losses |
3441 |
23 Nov 15 |
peter |
205 |
double p_value_exact(unsigned int k, unsigned int n1, unsigned int n2, |
3441 |
23 Nov 15 |
peter |
206 |
unsigned int t) const; |
447 |
15 Dec 05 |
peter |
207 |
|
3441 |
23 Nov 15 |
peter |
208 |
double lower_tail(unsigned int k, unsigned int n1, unsigned int n2, |
3441 |
23 Nov 15 |
peter |
209 |
unsigned int t) const; |
3441 |
23 Nov 15 |
peter |
210 |
|
3441 |
23 Nov 15 |
peter |
// return P(X=k+1) / P(X=k) |
3441 |
23 Nov 15 |
peter |
212 |
double hypergeometric_ratio(unsigned int k, unsigned int n1, |
3441 |
23 Nov 15 |
peter |
213 |
unsigned int n2, unsigned int t) const; |
3441 |
23 Nov 15 |
peter |
214 |
double choose_ratio(unsigned int n, unsigned int k) const; |
3441 |
23 Nov 15 |
peter |
215 |
|
3441 |
23 Nov 15 |
peter |
216 |
double p_left_exact(void) const; |
3441 |
23 Nov 15 |
peter |
217 |
double p_right_exact(void) const; |
3441 |
23 Nov 15 |
peter |
218 |
|
3298 |
08 Aug 14 |
peter |
219 |
double yates(unsigned int o, double e) const; |
3298 |
08 Aug 14 |
peter |
220 |
|
1271 |
09 Apr 08 |
peter |
221 |
unsigned int a_; |
1271 |
09 Apr 08 |
peter |
222 |
unsigned int b_; |
1271 |
09 Apr 08 |
peter |
223 |
unsigned int c_; |
1271 |
09 Apr 08 |
peter |
224 |
unsigned int d_; |
1271 |
09 Apr 08 |
peter |
225 |
unsigned int minimum_size_; |
449 |
15 Dec 05 |
peter |
226 |
double oddsratio_; |
3298 |
08 Aug 14 |
peter |
227 |
bool yates_; |
186 |
07 Oct 04 |
peter |
228 |
}; |
186 |
07 Oct 04 |
peter |
229 |
|
683 |
11 Oct 06 |
jari |
230 |
}}} // of namespace statistics, yat, and theplu |
186 |
07 Oct 04 |
peter |
231 |
|
186 |
07 Oct 04 |
peter |
232 |
#endif |