3723 |
05 Feb 18 |
peter |
// $Id$ |
3723 |
05 Feb 18 |
peter |
2 |
|
3723 |
05 Feb 18 |
peter |
3 |
/* |
4207 |
26 Aug 22 |
peter |
Copyright (C) 2018, 2022 Peter Johansson |
3723 |
05 Feb 18 |
peter |
5 |
|
3723 |
05 Feb 18 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
3723 |
05 Feb 18 |
peter |
7 |
|
3723 |
05 Feb 18 |
peter |
The yat library is free software; you can redistribute it and/or |
3723 |
05 Feb 18 |
peter |
modify it under the terms of the GNU General Public License as |
3723 |
05 Feb 18 |
peter |
published by the Free Software Foundation; either version 3 of the |
3723 |
05 Feb 18 |
peter |
License, or (at your option) any later version. |
3723 |
05 Feb 18 |
peter |
12 |
|
3723 |
05 Feb 18 |
peter |
The yat library is distributed in the hope that it will be useful, |
3723 |
05 Feb 18 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
3723 |
05 Feb 18 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3723 |
05 Feb 18 |
peter |
General Public License for more details. |
3723 |
05 Feb 18 |
peter |
17 |
|
3723 |
05 Feb 18 |
peter |
You should have received a copy of the GNU General Public License |
3723 |
05 Feb 18 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
3723 |
05 Feb 18 |
peter |
20 |
*/ |
3723 |
05 Feb 18 |
peter |
21 |
|
3723 |
05 Feb 18 |
peter |
22 |
#include <config.h> |
3723 |
05 Feb 18 |
peter |
23 |
|
3723 |
05 Feb 18 |
peter |
24 |
#include "Chi2.h" |
3723 |
05 Feb 18 |
peter |
25 |
|
3725 |
08 Feb 18 |
peter |
26 |
#include "yat/random/random.h" |
3723 |
05 Feb 18 |
peter |
27 |
#include "yat/utility/Vector.h" |
3723 |
05 Feb 18 |
peter |
28 |
|
3724 |
06 Feb 18 |
peter |
29 |
#include <gsl/gsl_cdf.h> |
3724 |
06 Feb 18 |
peter |
30 |
|
3723 |
05 Feb 18 |
peter |
31 |
#include <cassert> |
3723 |
05 Feb 18 |
peter |
32 |
#include <cmath> |
3725 |
08 Feb 18 |
peter |
33 |
#include <functional> |
3725 |
08 Feb 18 |
peter |
34 |
#include <numeric> |
3723 |
05 Feb 18 |
peter |
35 |
|
3723 |
05 Feb 18 |
peter |
36 |
namespace theplu { |
3723 |
05 Feb 18 |
peter |
37 |
namespace yat { |
3723 |
05 Feb 18 |
peter |
38 |
namespace statistics { |
3723 |
05 Feb 18 |
peter |
39 |
|
4125 |
14 Jan 22 |
peter |
40 |
void Chi2::calculate_colsum(const utility::MatrixBase& data) |
3725 |
08 Feb 18 |
peter |
41 |
{ |
3725 |
08 Feb 18 |
peter |
42 |
colsum_.resize(data.columns()); |
3725 |
08 Feb 18 |
peter |
43 |
for (size_t i=0; i<colsum_.size(); ++i) |
3725 |
08 Feb 18 |
peter |
44 |
colsum_(i) = sum(data.column_const_view(i)); |
3725 |
08 Feb 18 |
peter |
45 |
} |
3725 |
08 Feb 18 |
peter |
46 |
|
3725 |
08 Feb 18 |
peter |
47 |
|
4125 |
14 Jan 22 |
peter |
48 |
void Chi2::calculate_expected(const utility::MatrixBase& data) |
3725 |
08 Feb 18 |
peter |
49 |
{ |
3725 |
08 Feb 18 |
peter |
50 |
expected_.resize(data.rows(), data.columns()); |
3725 |
08 Feb 18 |
peter |
51 |
|
3725 |
08 Feb 18 |
peter |
52 |
for (size_t i=0; i<expected_.rows(); ++i) |
3725 |
08 Feb 18 |
peter |
53 |
for (size_t j=0; j<expected_.columns(); ++j) |
3725 |
08 Feb 18 |
peter |
54 |
expected_(i, j) = rowsum_(i) * colsum_(j) / total_; |
3725 |
08 Feb 18 |
peter |
55 |
} |
3725 |
08 Feb 18 |
peter |
56 |
|
3725 |
08 Feb 18 |
peter |
57 |
|
4125 |
14 Jan 22 |
peter |
58 |
void Chi2::calculate_rowsum(const utility::MatrixBase& data) |
3725 |
08 Feb 18 |
peter |
59 |
{ |
3725 |
08 Feb 18 |
peter |
60 |
rowsum_.resize(data.rows()); |
3725 |
08 Feb 18 |
peter |
61 |
for (size_t i=0; i<rowsum_.size(); ++i) |
3725 |
08 Feb 18 |
peter |
62 |
rowsum_(i) = sum(data.row_const_view(i)); |
3725 |
08 Feb 18 |
peter |
63 |
} |
3725 |
08 Feb 18 |
peter |
64 |
|
3725 |
08 Feb 18 |
peter |
65 |
|
4125 |
14 Jan 22 |
peter |
66 |
double Chi2::chi2sum(const utility::MatrixBase& data, |
3725 |
08 Feb 18 |
peter |
67 |
const utility::Matrix& expected) const |
3725 |
08 Feb 18 |
peter |
68 |
{ |
3725 |
08 Feb 18 |
peter |
69 |
assert(data.rows() == expected.rows()); |
3725 |
08 Feb 18 |
peter |
70 |
assert(data.columns() == expected.columns()); |
3725 |
08 Feb 18 |
peter |
71 |
return std::inner_product(data.begin(), data.end(), |
3725 |
08 Feb 18 |
peter |
72 |
expected.begin(), |
3725 |
08 Feb 18 |
peter |
73 |
0.0, std::plus<double>(), Calculator()); |
3725 |
08 Feb 18 |
peter |
74 |
} |
3725 |
08 Feb 18 |
peter |
75 |
|
3725 |
08 Feb 18 |
peter |
76 |
|
3723 |
05 Feb 18 |
peter |
77 |
unsigned int Chi2::dof(void) const |
3723 |
05 Feb 18 |
peter |
78 |
{ |
3723 |
05 Feb 18 |
peter |
79 |
return (expected_.rows()-1) * (expected_.columns()-1); |
3723 |
05 Feb 18 |
peter |
80 |
} |
3723 |
05 Feb 18 |
peter |
81 |
|
3723 |
05 Feb 18 |
peter |
82 |
|
3725 |
08 Feb 18 |
peter |
83 |
size_t Chi2::draw(const utility::Vector& hist, unsigned long int N) const |
3725 |
08 Feb 18 |
peter |
84 |
{ |
3725 |
08 Feb 18 |
peter |
85 |
random::DiscreteUniform urand; |
3725 |
08 Feb 18 |
peter |
86 |
unsigned long int u = urand(N); |
3725 |
08 Feb 18 |
peter |
87 |
|
3725 |
08 Feb 18 |
peter |
88 |
for (size_t index = 0; index<hist.size(); ++index) { |
3725 |
08 Feb 18 |
peter |
89 |
if (u < hist(index)) |
3725 |
08 Feb 18 |
peter |
90 |
return index; |
3725 |
08 Feb 18 |
peter |
91 |
u -= hist(index); |
3725 |
08 Feb 18 |
peter |
92 |
} |
3725 |
08 Feb 18 |
peter |
93 |
return 0; |
3725 |
08 Feb 18 |
peter |
94 |
} |
3725 |
08 Feb 18 |
peter |
95 |
|
3725 |
08 Feb 18 |
peter |
96 |
|
3723 |
05 Feb 18 |
peter |
97 |
const utility::Matrix& Chi2::expected(void) const |
3723 |
05 Feb 18 |
peter |
98 |
{ |
3723 |
05 Feb 18 |
peter |
99 |
return expected_; |
3723 |
05 Feb 18 |
peter |
100 |
} |
3723 |
05 Feb 18 |
peter |
101 |
|
3723 |
05 Feb 18 |
peter |
102 |
|
3723 |
05 Feb 18 |
peter |
103 |
double Chi2::p_value(void) const |
3723 |
05 Feb 18 |
peter |
104 |
{ |
3724 |
06 Feb 18 |
peter |
105 |
return gsl_cdf_chisq_Q(chi2_, dof()); |
3723 |
05 Feb 18 |
peter |
106 |
} |
3723 |
05 Feb 18 |
peter |
107 |
|
3723 |
05 Feb 18 |
peter |
108 |
|
3725 |
08 Feb 18 |
peter |
109 |
utility::Matrix Chi2::generate_random_data(void) const |
3725 |
08 Feb 18 |
peter |
110 |
{ |
3725 |
08 Feb 18 |
peter |
111 |
utility::Matrix X(rowsum_.size(), colsum_.size()); |
3725 |
08 Feb 18 |
peter |
// The hyper geometric distribution corresponds to having n1 balls |
3725 |
08 Feb 18 |
peter |
// of colour1 and n2 balls of colour2, and we pick t balls |
3725 |
08 Feb 18 |
peter |
// randomly without replacement, number of drawn samples of |
3725 |
08 Feb 18 |
peter |
// colour1 follows the hyper geometric distribution. |
3725 |
08 Feb 18 |
peter |
116 |
|
3725 |
08 Feb 18 |
peter |
// Here we generalise that in both dimensions: we have rowsum_(i) |
3725 |
08 Feb 18 |
peter |
// balls of colour i and we place them randomly into boxes such |
3725 |
08 Feb 18 |
peter |
// that box j contains colsum_(j) balls. In the resulting Matrix |
3725 |
08 Feb 18 |
peter |
// X, X(i,j) will describe how balls of colour i we have in box j. |
3725 |
08 Feb 18 |
peter |
121 |
|
3725 |
08 Feb 18 |
peter |
// number of balls left with color i |
3725 |
08 Feb 18 |
peter |
123 |
utility::Vector n_color(colsum_); |
3725 |
08 Feb 18 |
peter |
// number of balls left (= sum(color)) |
3725 |
08 Feb 18 |
peter |
125 |
unsigned long int N(total_); |
3725 |
08 Feb 18 |
peter |
126 |
|
3725 |
08 Feb 18 |
peter |
127 |
for (size_t box=0; box<X.rows(); ++box) { |
3725 |
08 Feb 18 |
peter |
128 |
const unsigned long int n = rowsum_(box); |
3725 |
08 Feb 18 |
peter |
129 |
for (size_t i=0; i<n; ++i) { |
3725 |
08 Feb 18 |
peter |
130 |
size_t color = draw(n_color, N); |
3725 |
08 Feb 18 |
peter |
131 |
n_color(color) -= 1.0; |
3725 |
08 Feb 18 |
peter |
132 |
N -= 1.0; |
3725 |
08 Feb 18 |
peter |
133 |
X(box, color) += 1.0; |
3725 |
08 Feb 18 |
peter |
134 |
} |
3725 |
08 Feb 18 |
peter |
135 |
} |
3725 |
08 Feb 18 |
peter |
136 |
|
3725 |
08 Feb 18 |
peter |
137 |
return X; |
3725 |
08 Feb 18 |
peter |
138 |
} |
3725 |
08 Feb 18 |
peter |
139 |
|
3725 |
08 Feb 18 |
peter |
140 |
|
3725 |
08 Feb 18 |
peter |
141 |
double Chi2::p_value(size_t N) const |
3725 |
08 Feb 18 |
peter |
142 |
{ |
3725 |
08 Feb 18 |
peter |
143 |
size_t count = 0; |
3725 |
08 Feb 18 |
peter |
144 |
for (size_t i=0; i<N; ++i) { |
3725 |
08 Feb 18 |
peter |
145 |
utility::Matrix X = generate_random_data(); |
3725 |
08 Feb 18 |
peter |
146 |
double s = chi2sum(X, expected_); |
3725 |
08 Feb 18 |
peter |
147 |
if (s >= chi2_) |
3725 |
08 Feb 18 |
peter |
148 |
++count; |
3725 |
08 Feb 18 |
peter |
149 |
} |
3725 |
08 Feb 18 |
peter |
150 |
|
3725 |
08 Feb 18 |
peter |
151 |
return static_cast<double>(count) / N; |
3725 |
08 Feb 18 |
peter |
152 |
} |
3725 |
08 Feb 18 |
peter |
153 |
|
3725 |
08 Feb 18 |
peter |
154 |
|
3723 |
05 Feb 18 |
peter |
155 |
double Chi2::operator()(void) |
3723 |
05 Feb 18 |
peter |
156 |
{ |
3723 |
05 Feb 18 |
peter |
157 |
return chi2_; |
3723 |
05 Feb 18 |
peter |
158 |
} |
3723 |
05 Feb 18 |
peter |
159 |
|
3723 |
05 Feb 18 |
peter |
160 |
|
4125 |
14 Jan 22 |
peter |
161 |
double Chi2::operator()(const utility::MatrixBase& data) |
3723 |
05 Feb 18 |
peter |
162 |
{ |
3725 |
08 Feb 18 |
peter |
163 |
calculate_rowsum(data); |
3725 |
08 Feb 18 |
peter |
164 |
calculate_colsum(data); |
3723 |
05 Feb 18 |
peter |
// calculate total using the shortest vector |
3725 |
08 Feb 18 |
peter |
166 |
total_ = (rowsum_.size() < colsum_.size()) ? sum(rowsum_) : sum(colsum_); |
3723 |
05 Feb 18 |
peter |
167 |
|
3725 |
08 Feb 18 |
peter |
168 |
calculate_expected(data); |
3725 |
08 Feb 18 |
peter |
169 |
chi2_ = chi2sum(data, expected_); |
3725 |
08 Feb 18 |
peter |
170 |
return chi2_; |
3725 |
08 Feb 18 |
peter |
171 |
} |
3723 |
05 Feb 18 |
peter |
172 |
|
3723 |
05 Feb 18 |
peter |
173 |
|
3725 |
08 Feb 18 |
peter |
174 |
double Chi2::Calculator::operator()(double x, double m) const |
3725 |
08 Feb 18 |
peter |
175 |
{ |
3725 |
08 Feb 18 |
peter |
176 |
double diff = x - m; |
3725 |
08 Feb 18 |
peter |
177 |
return diff * diff / m; |
3723 |
05 Feb 18 |
peter |
178 |
} |
3723 |
05 Feb 18 |
peter |
179 |
|
3725 |
08 Feb 18 |
peter |
180 |
|
3723 |
05 Feb 18 |
peter |
181 |
}}} // of namespace statistics, yat, and theplu |