4184 |
30 Jun 22 |
peter |
// $Id$ |
4184 |
30 Jun 22 |
peter |
2 |
|
4184 |
30 Jun 22 |
peter |
3 |
/* |
4184 |
30 Jun 22 |
peter |
Copyright (C) 2022 Peter Johansson |
4184 |
30 Jun 22 |
peter |
5 |
|
4184 |
30 Jun 22 |
peter |
This file is part of the yat library, https://dev.thep.lu.se/yat |
4184 |
30 Jun 22 |
peter |
7 |
|
4184 |
30 Jun 22 |
peter |
The yat library is free software; you can redistribute it and/or |
4184 |
30 Jun 22 |
peter |
modify it under the terms of the GNU General Public License as |
4184 |
30 Jun 22 |
peter |
published by the Free Software Foundation; either version 3 of the |
4184 |
30 Jun 22 |
peter |
License, or (at your option) any later version. |
4184 |
30 Jun 22 |
peter |
12 |
|
4184 |
30 Jun 22 |
peter |
The yat library is distributed in the hope that it will be useful, |
4184 |
30 Jun 22 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
4184 |
30 Jun 22 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
4184 |
30 Jun 22 |
peter |
General Public License for more details. |
4184 |
30 Jun 22 |
peter |
17 |
|
4184 |
30 Jun 22 |
peter |
You should have received a copy of the GNU General Public License |
4184 |
30 Jun 22 |
peter |
along with yat. If not, see <https://www.gnu.org/licenses/>. |
4184 |
30 Jun 22 |
peter |
20 |
*/ |
4184 |
30 Jun 22 |
peter |
21 |
|
4184 |
30 Jun 22 |
peter |
22 |
#include <config.h> |
4184 |
30 Jun 22 |
peter |
23 |
|
4184 |
30 Jun 22 |
peter |
24 |
#include "NegativeBinomialMixture.h" |
4184 |
30 Jun 22 |
peter |
25 |
#include "GaussianMixture.h" |
4184 |
30 Jun 22 |
peter |
26 |
|
4184 |
30 Jun 22 |
peter |
27 |
#include "yat/utility/BFGS2.h" |
4184 |
30 Jun 22 |
peter |
28 |
|
4184 |
30 Jun 22 |
peter |
29 |
#include <gsl/gsl_randist.h> |
4184 |
30 Jun 22 |
peter |
30 |
#include <gsl/gsl_sf_gamma.h> |
4184 |
30 Jun 22 |
peter |
31 |
#include <gsl/gsl_sf_psi.h> |
4184 |
30 Jun 22 |
peter |
32 |
|
4184 |
30 Jun 22 |
peter |
33 |
#include <cassert> |
4184 |
30 Jun 22 |
peter |
34 |
#include <cmath> |
4184 |
30 Jun 22 |
peter |
35 |
|
4184 |
30 Jun 22 |
peter |
36 |
namespace theplu { |
4184 |
30 Jun 22 |
peter |
37 |
namespace yat { |
4184 |
30 Jun 22 |
peter |
38 |
namespace statistics { |
4184 |
30 Jun 22 |
peter |
39 |
|
4184 |
30 Jun 22 |
peter |
40 |
void NegativeBinomialMixture::add(unsigned long int k, unsigned long int n) |
4184 |
30 Jun 22 |
peter |
41 |
{ |
4184 |
30 Jun 22 |
peter |
42 |
auto it = count_.lower_bound(k); |
4184 |
30 Jun 22 |
peter |
43 |
if (it == count_.end() || k < it->first) |
4184 |
30 Jun 22 |
peter |
44 |
count_.insert(it, std::make_pair(k, n)); |
4184 |
30 Jun 22 |
peter |
45 |
else |
4184 |
30 Jun 22 |
peter |
46 |
it->second += n; |
4184 |
30 Jun 22 |
peter |
47 |
} |
4184 |
30 Jun 22 |
peter |
48 |
|
4184 |
30 Jun 22 |
peter |
49 |
|
4184 |
30 Jun 22 |
peter |
50 |
const utility::VectorBase& NegativeBinomialMixture::alpha(void) const |
4184 |
30 Jun 22 |
peter |
51 |
{ |
4184 |
30 Jun 22 |
peter |
52 |
return alpha_; |
4184 |
30 Jun 22 |
peter |
53 |
} |
4184 |
30 Jun 22 |
peter |
54 |
|
4184 |
30 Jun 22 |
peter |
55 |
|
4184 |
30 Jun 22 |
peter |
56 |
void NegativeBinomialMixture::clear(void) |
4184 |
30 Jun 22 |
peter |
57 |
{ |
4184 |
30 Jun 22 |
peter |
58 |
count_.clear(); |
4184 |
30 Jun 22 |
peter |
59 |
} |
4184 |
30 Jun 22 |
peter |
60 |
|
4184 |
30 Jun 22 |
peter |
61 |
|
4184 |
30 Jun 22 |
peter |
62 |
void NegativeBinomialMixture::fit(size_t n) |
4184 |
30 Jun 22 |
peter |
63 |
{ |
4184 |
30 Jun 22 |
peter |
64 |
if (count_.empty()) |
4184 |
30 Jun 22 |
peter |
65 |
return; |
4184 |
30 Jun 22 |
peter |
66 |
init_fit(n); |
4184 |
30 Jun 22 |
peter |
67 |
optimize(); |
4184 |
30 Jun 22 |
peter |
68 |
} |
4184 |
30 Jun 22 |
peter |
69 |
|
4184 |
30 Jun 22 |
peter |
70 |
|
4184 |
30 Jun 22 |
peter |
71 |
void NegativeBinomialMixture::fit(const utility::VectorBase& m, |
4184 |
30 Jun 22 |
peter |
72 |
const utility::VectorBase& alpha, |
4184 |
30 Jun 22 |
peter |
73 |
const utility::VectorBase& tau) |
4184 |
30 Jun 22 |
peter |
74 |
{ |
4184 |
30 Jun 22 |
peter |
75 |
m_ = m; |
4184 |
30 Jun 22 |
peter |
76 |
alpha_ = alpha; |
4184 |
30 Jun 22 |
peter |
77 |
tau_ = tau; |
4184 |
30 Jun 22 |
peter |
78 |
optimize(); |
4184 |
30 Jun 22 |
peter |
79 |
} |
4184 |
30 Jun 22 |
peter |
80 |
|
4184 |
30 Jun 22 |
peter |
81 |
|
4184 |
30 Jun 22 |
peter |
82 |
void NegativeBinomialMixture::init_fit(size_t n) |
4184 |
30 Jun 22 |
peter |
83 |
{ |
4184 |
30 Jun 22 |
peter |
84 |
m_.resize(n); |
4184 |
30 Jun 22 |
peter |
85 |
alpha_.resize(n, 1.1); |
4184 |
30 Jun 22 |
peter |
86 |
tau_.resize(n, 1.0/n); |
4184 |
30 Jun 22 |
peter |
87 |
|
4184 |
30 Jun 22 |
peter |
88 |
statistics::GaussianMixture gm; |
4184 |
30 Jun 22 |
peter |
89 |
for (const auto& x : count_) |
4184 |
30 Jun 22 |
peter |
90 |
gm.add(x.first, x.second); |
4184 |
30 Jun 22 |
peter |
91 |
gm.fit(n); |
4184 |
30 Jun 22 |
peter |
92 |
|
4184 |
30 Jun 22 |
peter |
93 |
for (size_t i=0; i<n; ++i) |
4184 |
30 Jun 22 |
peter |
94 |
m_(i) = gm.mean(i); |
4184 |
30 Jun 22 |
peter |
95 |
} |
4184 |
30 Jun 22 |
peter |
96 |
|
4184 |
30 Jun 22 |
peter |
97 |
|
4184 |
30 Jun 22 |
peter |
98 |
double NegativeBinomialMixture::logL(void) const |
4184 |
30 Jun 22 |
peter |
99 |
{ |
4184 |
30 Jun 22 |
peter |
100 |
double sum = 0; |
4184 |
30 Jun 22 |
peter |
101 |
for (const auto& c : count_) |
4184 |
30 Jun 22 |
peter |
102 |
sum = c.second * std::log(pdf(c.first)); |
4184 |
30 Jun 22 |
peter |
103 |
return sum; |
4184 |
30 Jun 22 |
peter |
104 |
} |
4184 |
30 Jun 22 |
peter |
105 |
|
4184 |
30 Jun 22 |
peter |
106 |
|
4184 |
30 Jun 22 |
peter |
107 |
const utility::VectorBase& NegativeBinomialMixture::m(void) const |
4184 |
30 Jun 22 |
peter |
108 |
{ |
4184 |
30 Jun 22 |
peter |
109 |
return m_; |
4184 |
30 Jun 22 |
peter |
110 |
} |
4184 |
30 Jun 22 |
peter |
111 |
|
4184 |
30 Jun 22 |
peter |
112 |
|
4184 |
30 Jun 22 |
peter |
113 |
void NegativeBinomialMixture::optimize(void) |
4184 |
30 Jun 22 |
peter |
114 |
{ |
4184 |
30 Jun 22 |
peter |
115 |
assert(count_.size()); |
4184 |
30 Jun 22 |
peter |
116 |
utility::BFGS2 solver(tau_.size()); |
4184 |
30 Jun 22 |
peter |
117 |
utility::Matrix H(tau_.size(), count_.size()); |
4184 |
30 Jun 22 |
peter |
118 |
bool changed = true; |
4184 |
30 Jun 22 |
peter |
119 |
for (size_t i=0; i<100 && changed; ++i) { |
4184 |
30 Jun 22 |
peter |
120 |
changed = expectation_step(H); |
4184 |
30 Jun 22 |
peter |
121 |
if (optimize_tau(H)) |
4184 |
30 Jun 22 |
peter |
122 |
changed = true; |
4184 |
30 Jun 22 |
peter |
123 |
if (optimize_param(H, solver)) |
4184 |
30 Jun 22 |
peter |
124 |
changed=true; |
4184 |
30 Jun 22 |
peter |
125 |
} |
4184 |
30 Jun 22 |
peter |
126 |
} |
4184 |
30 Jun 22 |
peter |
127 |
|
4184 |
30 Jun 22 |
peter |
128 |
|
4184 |
30 Jun 22 |
peter |
129 |
double NegativeBinomialMixture::p(size_t i) const |
4184 |
30 Jun 22 |
peter |
130 |
{ |
4184 |
30 Jun 22 |
peter |
131 |
return 1.0 - 1.0/alpha()(i); |
4184 |
30 Jun 22 |
peter |
132 |
} |
4184 |
30 Jun 22 |
peter |
133 |
|
4184 |
30 Jun 22 |
peter |
134 |
|
4184 |
30 Jun 22 |
peter |
135 |
double NegativeBinomialMixture::pdf(unsigned long int k, size_t i) const |
4184 |
30 Jun 22 |
peter |
136 |
{ |
4184 |
30 Jun 22 |
peter |
137 |
return gsl_ran_negative_binomial_pdf(k, 1.0-p(i), r(i)); |
4184 |
30 Jun 22 |
peter |
138 |
} |
4184 |
30 Jun 22 |
peter |
139 |
|
4184 |
30 Jun 22 |
peter |
140 |
|
4184 |
30 Jun 22 |
peter |
141 |
double NegativeBinomialMixture::pdf(unsigned long int k) const |
4184 |
30 Jun 22 |
peter |
142 |
{ |
4184 |
30 Jun 22 |
peter |
143 |
double sum = 0; |
4184 |
30 Jun 22 |
peter |
144 |
for (size_t i=0; i<tau_.size(); ++i) |
4184 |
30 Jun 22 |
peter |
145 |
sum += tau_(i) * pdf(k, i); |
4184 |
30 Jun 22 |
peter |
146 |
return sum; |
4184 |
30 Jun 22 |
peter |
147 |
} |
4184 |
30 Jun 22 |
peter |
148 |
|
4184 |
30 Jun 22 |
peter |
149 |
|
4184 |
30 Jun 22 |
peter |
150 |
double NegativeBinomialMixture::r(size_t i) const |
4184 |
30 Jun 22 |
peter |
151 |
{ |
4184 |
30 Jun 22 |
peter |
152 |
return m()(i) / (alpha()(i)-1.0); |
4184 |
30 Jun 22 |
peter |
153 |
} |
4184 |
30 Jun 22 |
peter |
154 |
|
4184 |
30 Jun 22 |
peter |
155 |
|
4184 |
30 Jun 22 |
peter |
156 |
const utility::VectorBase& NegativeBinomialMixture::tau(void) const |
4184 |
30 Jun 22 |
peter |
157 |
{ |
4184 |
30 Jun 22 |
peter |
158 |
return tau_; |
4184 |
30 Jun 22 |
peter |
159 |
} |
4184 |
30 Jun 22 |
peter |
160 |
|
4184 |
30 Jun 22 |
peter |
161 |
|
4184 |
30 Jun 22 |
peter |
// Calculate P(Z_i=j) given current param_ |
4184 |
30 Jun 22 |
peter |
163 |
bool NegativeBinomialMixture::expectation_step(utility::Matrix& H) |
4184 |
30 Jun 22 |
peter |
164 |
{ |
4184 |
30 Jun 22 |
peter |
// loop over values |
4184 |
30 Jun 22 |
peter |
166 |
size_t j = 0; |
4184 |
30 Jun 22 |
peter |
167 |
for (auto const& x : count_) { |
4184 |
30 Jun 22 |
peter |
168 |
utility::VectorView column = H.column_view(j); |
4184 |
30 Jun 22 |
peter |
// loop over models |
4184 |
30 Jun 22 |
peter |
170 |
for (size_t i=0; i<column.size(); ++i) |
4184 |
30 Jun 22 |
peter |
171 |
column(i) = tau_(i) * pdf(x.first, m()(i), alpha()(i)); |
4184 |
30 Jun 22 |
peter |
172 |
column *= 1.0 / sum(column); |
4184 |
30 Jun 22 |
peter |
173 |
++j; |
4184 |
30 Jun 22 |
peter |
174 |
} |
4184 |
30 Jun 22 |
peter |
175 |
return false; |
4184 |
30 Jun 22 |
peter |
176 |
} |
4184 |
30 Jun 22 |
peter |
177 |
|
4184 |
30 Jun 22 |
peter |
178 |
|
4184 |
30 Jun 22 |
peter |
179 |
double NegativeBinomialMixture::pdf(unsigned long int k, double m, |
4184 |
30 Jun 22 |
peter |
180 |
double alpha) const |
4184 |
30 Jun 22 |
peter |
181 |
{ |
4184 |
30 Jun 22 |
peter |
182 |
if (m <= 0) { |
4184 |
30 Jun 22 |
peter |
183 |
if (m == 0 && k==0) |
4184 |
30 Jun 22 |
peter |
184 |
return 1.0; |
4184 |
30 Jun 22 |
peter |
185 |
return 0.0; |
4184 |
30 Jun 22 |
peter |
186 |
} |
4184 |
30 Jun 22 |
peter |
187 |
|
4184 |
30 Jun 22 |
peter |
188 |
if (alpha <= 1.0) |
4184 |
30 Jun 22 |
peter |
189 |
return gsl_ran_poisson_pdf(k, m); |
4184 |
30 Jun 22 |
peter |
190 |
double p = 1 - 1.0 / alpha; |
4184 |
30 Jun 22 |
peter |
191 |
double r = m / (alpha - 1); |
4184 |
30 Jun 22 |
peter |
192 |
return gsl_ran_negative_binomial_pdf(k, 1.0-p, r); |
4184 |
30 Jun 22 |
peter |
193 |
} |
4184 |
30 Jun 22 |
peter |
194 |
|
4184 |
30 Jun 22 |
peter |
195 |
|
4184 |
30 Jun 22 |
peter |
196 |
bool NegativeBinomialMixture::optimize_tau(const utility::Matrix& H) |
4184 |
30 Jun 22 |
peter |
197 |
{ |
4184 |
30 Jun 22 |
peter |
198 |
utility::Vector prev_tau(tau_); |
4184 |
30 Jun 22 |
peter |
199 |
|
4184 |
30 Jun 22 |
peter |
200 |
tau_.all(0.0); |
4184 |
30 Jun 22 |
peter |
201 |
auto it = count_.begin(); |
4184 |
30 Jun 22 |
peter |
// tau is updated as the average |
4184 |
30 Jun 22 |
peter |
203 |
for (size_t i=0; i<H.columns(); ++i) { |
4184 |
30 Jun 22 |
peter |
204 |
tau_ += it->second * H.column_const_view(i); |
4184 |
30 Jun 22 |
peter |
205 |
++it; |
4184 |
30 Jun 22 |
peter |
206 |
} |
4184 |
30 Jun 22 |
peter |
207 |
assert(!std::isnan(sum(tau_))); |
4184 |
30 Jun 22 |
peter |
208 |
assert(sum(tau_)); |
4184 |
30 Jun 22 |
peter |
209 |
tau_ *= 1.0 / sum(tau_); |
4184 |
30 Jun 22 |
peter |
210 |
return norm2_squared(utility::Vector(tau_-prev_tau)) > 1e-9; |
4184 |
30 Jun 22 |
peter |
211 |
} |
4184 |
30 Jun 22 |
peter |
212 |
|
4184 |
30 Jun 22 |
peter |
213 |
|
4184 |
30 Jun 22 |
peter |
214 |
bool |
4184 |
30 Jun 22 |
peter |
215 |
NegativeBinomialMixture:: |
4184 |
30 Jun 22 |
peter |
216 |
optimize_param(const utility::Matrix& H, |
4184 |
30 Jun 22 |
peter |
217 |
utility::MultiMinimizerDerivative& solver) |
4184 |
30 Jun 22 |
peter |
218 |
{ |
4184 |
30 Jun 22 |
peter |
219 |
/* |
4184 |
30 Jun 22 |
peter |
Maximize Q given current H matrix |
4184 |
30 Jun 22 |
peter |
Q = sum_i sum_j P(Z_j=i | K_j=k_j; params) ln(L(param_i; k_j, i) |
4184 |
30 Jun 22 |
peter |
= sum_i sum_j H_ij ln(L(param_i; k_j, i) |
4184 |
30 Jun 22 |
peter |
where i runs over models and j over samples |
4184 |
30 Jun 22 |
peter |
224 |
|
4184 |
30 Jun 22 |
peter |
We can minimize each model indpendently |
4184 |
30 Jun 22 |
peter |
Q = sum_j H_ij log(L(param_i; k_j, i) = |
4184 |
30 Jun 22 |
peter |
= sum_j H_ij log(tau_i*Gamma(r_i+k_j)/(Gamma(k_j+1)*Gamma(r_i)) * |
4184 |
30 Jun 22 |
peter |
(1-p_i)^r_i * p_j^k_i) = |
4184 |
30 Jun 22 |
peter |
= sum_j H_ij [log(Gamma(r_i+k_j)-log(Gamma(r_i))+r_i*log(1-p_i)+k_j*log(p_i) + constant = |
4184 |
30 Jun 22 |
peter |
230 |
*/ |
4184 |
30 Jun 22 |
peter |
231 |
|
4184 |
30 Jun 22 |
peter |
232 |
utility::Vector old_m = m_; |
4184 |
30 Jun 22 |
peter |
233 |
utility::Vector old_alpha = alpha_; |
4184 |
30 Jun 22 |
peter |
234 |
utility::Vector param(2); |
4184 |
30 Jun 22 |
peter |
235 |
for (size_t i=0; i<tau_.size(); ++i) { |
4184 |
30 Jun 22 |
peter |
236 |
utility::VectorConstView h = H.row_const_view(i); |
4184 |
30 Jun 22 |
peter |
237 |
param(0) = m_(i); |
4184 |
30 Jun 22 |
peter |
238 |
param(1) = alpha_(i); |
4184 |
30 Jun 22 |
peter |
239 |
Q func(count_, h); |
4252 |
18 Nov 22 |
peter |
240 |
solver(param, func, utility::MultiMinimizerDerivative::Gradient(1e-3)); |
4184 |
30 Jun 22 |
peter |
241 |
m_(i) = param(0); |
4184 |
30 Jun 22 |
peter |
242 |
alpha_(i) = param(1); |
4184 |
30 Jun 22 |
peter |
243 |
} |
4184 |
30 Jun 22 |
peter |
244 |
|
4184 |
30 Jun 22 |
peter |
245 |
double norm2 = norm2_squared(utility::Vector(m_ - old_m)) + |
4184 |
30 Jun 22 |
peter |
246 |
norm2_squared(utility::Vector(alpha_ - old_alpha)); |
4184 |
30 Jun 22 |
peter |
247 |
return norm2 > 1e-9; |
4184 |
30 Jun 22 |
peter |
248 |
} |
4184 |
30 Jun 22 |
peter |
249 |
|
4184 |
30 Jun 22 |
peter |
250 |
|
4184 |
30 Jun 22 |
peter |
251 |
NegativeBinomialMixture:: |
4184 |
30 Jun 22 |
peter |
252 |
Q::Q(const std::map<unsigned long int, unsigned long int>& count, |
4184 |
30 Jun 22 |
peter |
253 |
const utility::VectorBase& h) |
4184 |
30 Jun 22 |
peter |
254 |
: count_(count), h_(h) |
4184 |
30 Jun 22 |
peter |
255 |
{ |
4184 |
30 Jun 22 |
peter |
256 |
} |
4184 |
30 Jun 22 |
peter |
257 |
|
4184 |
30 Jun 22 |
peter |
258 |
|
4184 |
30 Jun 22 |
peter |
259 |
double |
4184 |
30 Jun 22 |
peter |
260 |
NegativeBinomialMixture::Q::operator() (const utility::VectorBase& x) |
4184 |
30 Jun 22 |
peter |
261 |
{ |
4184 |
30 Jun 22 |
peter |
262 |
assert(x.size()==2); |
4184 |
30 Jun 22 |
peter |
263 |
double m = x(0); |
4184 |
30 Jun 22 |
peter |
264 |
double alpha = x(1); |
4184 |
30 Jun 22 |
peter |
265 |
double sum = 0; |
4184 |
30 Jun 22 |
peter |
266 |
|
4184 |
30 Jun 22 |
peter |
267 |
size_t i = 0; |
4184 |
30 Jun 22 |
peter |
268 |
for (const auto& x : count_) { |
4184 |
30 Jun 22 |
peter |
269 |
unsigned long int k = x.first; |
4184 |
30 Jun 22 |
peter |
270 |
unsigned long int N = x.second; |
4184 |
30 Jun 22 |
peter |
271 |
sum -= N * h_(i) * log_L(m, alpha, k); |
4184 |
30 Jun 22 |
peter |
272 |
++i; |
4184 |
30 Jun 22 |
peter |
273 |
} |
4184 |
30 Jun 22 |
peter |
274 |
return sum; |
4184 |
30 Jun 22 |
peter |
275 |
} |
4184 |
30 Jun 22 |
peter |
276 |
|
4184 |
30 Jun 22 |
peter |
277 |
|
4184 |
30 Jun 22 |
peter |
278 |
void NegativeBinomialMixture::Q::operator() |
4184 |
30 Jun 22 |
peter |
279 |
(const utility::VectorBase& x, utility::VectorMutable& gradient) |
4184 |
30 Jun 22 |
peter |
280 |
{ |
4184 |
30 Jun 22 |
peter |
281 |
assert(gradient.size() == 2); |
4184 |
30 Jun 22 |
peter |
282 |
assert(count_.size() == h_.size()); |
4184 |
30 Jun 22 |
peter |
283 |
gradient.all(0.0); |
4184 |
30 Jun 22 |
peter |
284 |
|
4184 |
30 Jun 22 |
peter |
285 |
assert(x.size()==2); |
4184 |
30 Jun 22 |
peter |
286 |
double m = x(0); |
4184 |
30 Jun 22 |
peter |
287 |
double alpha = x(1); |
4184 |
30 Jun 22 |
peter |
288 |
|
4184 |
30 Jun 22 |
peter |
289 |
size_t i = 0; |
4184 |
30 Jun 22 |
peter |
290 |
for (const auto& x : count_) { |
4184 |
30 Jun 22 |
peter |
291 |
unsigned long int k = x.first; |
4184 |
30 Jun 22 |
peter |
292 |
unsigned long int N = x.second; |
4184 |
30 Jun 22 |
peter |
293 |
gradient -= N * h_(i) * calc_gradient(m, alpha, k); |
4184 |
30 Jun 22 |
peter |
294 |
++i; |
4184 |
30 Jun 22 |
peter |
295 |
} |
4184 |
30 Jun 22 |
peter |
296 |
} |
4184 |
30 Jun 22 |
peter |
297 |
|
4184 |
30 Jun 22 |
peter |
298 |
|
4184 |
30 Jun 22 |
peter |
// return dlnL/dp |
4184 |
30 Jun 22 |
peter |
300 |
double |
4184 |
30 Jun 22 |
peter |
301 |
NegativeBinomialMixture::Q::calculate_dp(double p, double r, |
4184 |
30 Jun 22 |
peter |
302 |
unsigned long int k) const |
4184 |
30 Jun 22 |
peter |
303 |
{ |
4184 |
30 Jun 22 |
peter |
304 |
/* |
4184 |
30 Jun 22 |
peter |
lnL = ln(tau) + ln(Gamma(r+k)) - ln(Gamma(k+1)) - ln(Gamma(r)) + |
4184 |
30 Jun 22 |
peter |
r*ln(1-p) + k*ln(p) |
4184 |
30 Jun 22 |
peter |
307 |
|
4184 |
30 Jun 22 |
peter |
dlnL/dp = d r*ln(1-p)/dp + d k*ln(p)/dp = |
4184 |
30 Jun 22 |
peter |
= -r/(1-p) + k/p |
4184 |
30 Jun 22 |
peter |
310 |
*/ |
4184 |
30 Jun 22 |
peter |
311 |
return k/p - r/(1-p); |
4184 |
30 Jun 22 |
peter |
312 |
} |
4184 |
30 Jun 22 |
peter |
313 |
|
4184 |
30 Jun 22 |
peter |
314 |
|
4184 |
30 Jun 22 |
peter |
315 |
double |
4184 |
30 Jun 22 |
peter |
316 |
NegativeBinomialMixture::Q::calculate_dr(double p, double r, |
4184 |
30 Jun 22 |
peter |
317 |
unsigned long int k) const |
4184 |
30 Jun 22 |
peter |
318 |
{ |
4184 |
30 Jun 22 |
peter |
319 |
/* |
4184 |
30 Jun 22 |
peter |
lnL = ln(tau) + ln(Gamma(r+k)) - ln(Gamma(k+1)) - ln(Gamma(r)) + |
4184 |
30 Jun 22 |
peter |
r*ln(1-p) + k*ln(p) |
4184 |
30 Jun 22 |
peter |
322 |
|
4184 |
30 Jun 22 |
peter |
dlnL/dr = d ln(Gamma(r+k))/dr - d ln(Gamma(r))/dr + ln(1-p) |
4184 |
30 Jun 22 |
peter |
324 |
|
4184 |
30 Jun 22 |
peter |
325 |
*/ |
4184 |
30 Jun 22 |
peter |
326 |
return gsl_sf_psi(r+k) - gsl_sf_psi(r) + std::log(1.0-p); |
4184 |
30 Jun 22 |
peter |
327 |
} |
4184 |
30 Jun 22 |
peter |
328 |
|
4184 |
30 Jun 22 |
peter |
329 |
|
4184 |
30 Jun 22 |
peter |
330 |
double |
4184 |
30 Jun 22 |
peter |
331 |
NegativeBinomialMixture::Q::log_L(double m, double alpha, |
4184 |
30 Jun 22 |
peter |
332 |
unsigned long int k) const |
4184 |
30 Jun 22 |
peter |
333 |
{ |
4184 |
30 Jun 22 |
peter |
334 |
if (alpha > 1.001) { |
4184 |
30 Jun 22 |
peter |
// logL = log(Gamma(r+k)) - log(Gamma(k+1)) - log(Gamma(r)) + |
4184 |
30 Jun 22 |
peter |
// r*log(1-p) + k*log(p) |
4184 |
30 Jun 22 |
peter |
337 |
double r = m / (alpha-1.0); |
4184 |
30 Jun 22 |
peter |
338 |
double p = 1.0 - 1.0/alpha; |
4184 |
30 Jun 22 |
peter |
339 |
return gsl_sf_lngamma(r+k) - gsl_sf_lngamma(k+1) - gsl_sf_lngamma(r) + |
4184 |
30 Jun 22 |
peter |
340 |
r*log(1.0-p) + k*log(p); |
4184 |
30 Jun 22 |
peter |
341 |
} |
4184 |
30 Jun 22 |
peter |
342 |
|
4184 |
30 Jun 22 |
peter |
// alpha = 1+epsilon |
4184 |
30 Jun 22 |
peter |
// r = m / (alpha-1) = m / epsilon |
4184 |
30 Jun 22 |
peter |
// p = (alpha-1) / alpha = epsilon / (1+epsilon) |
4184 |
30 Jun 22 |
peter |
// 1-p = (1+epsilon-epsilon)/(1+epsilon) = 1/(1+epsilon) |
4184 |
30 Jun 22 |
peter |
347 |
|
4184 |
30 Jun 22 |
peter |
// Taylor expand around alpha=1, f(1) + f'(1) * (alpha-1) |
4184 |
30 Jun 22 |
peter |
// where f(1) is the Poisson logL |
4184 |
30 Jun 22 |
peter |
// logL = k*log(m) - m - log(Gamma(k+1)) + |
4184 |
30 Jun 22 |
peter |
// + [(k-1)*k/(2*m) + m/2 - k] (alpha-1) |
4184 |
30 Jun 22 |
peter |
352 |
|
4184 |
30 Jun 22 |
peter |
353 |
double res = k*std::log(m) - m - gsl_sf_lngamma(k+1); |
4184 |
30 Jun 22 |
peter |
354 |
if (alpha > 1.0) |
4184 |
30 Jun 22 |
peter |
355 |
res += (alpha-1.0) * 0.5*((k-1)*k/m + m - 2*k); |
4184 |
30 Jun 22 |
peter |
356 |
else // penalizing alpha < 1 |
4184 |
30 Jun 22 |
peter |
357 |
res -= 1000*std::pow(alpha-1.0, 2); |
4184 |
30 Jun 22 |
peter |
358 |
|
4184 |
30 Jun 22 |
peter |
359 |
return res; |
4184 |
30 Jun 22 |
peter |
360 |
} |
4184 |
30 Jun 22 |
peter |
361 |
|
4184 |
30 Jun 22 |
peter |
362 |
|
4184 |
30 Jun 22 |
peter |
363 |
utility::Vector |
4184 |
30 Jun 22 |
peter |
364 |
NegativeBinomialMixture::Q::calc_gradient(double m, double alpha, |
4184 |
30 Jun 22 |
peter |
365 |
unsigned long int k) const |
4184 |
30 Jun 22 |
peter |
366 |
{ |
4184 |
30 Jun 22 |
peter |
367 |
utility::Vector res(2, 0); |
4184 |
30 Jun 22 |
peter |
368 |
if (alpha > 1.001) { |
4184 |
30 Jun 22 |
peter |
369 |
double r = m / (alpha-1.0); |
4184 |
30 Jun 22 |
peter |
370 |
double p = 1.0 - 1.0/alpha; |
4184 |
30 Jun 22 |
peter |
371 |
double dp = calculate_dp(p, r, k); |
4184 |
30 Jun 22 |
peter |
372 |
double dr = calculate_dr(p, r, k); |
4184 |
30 Jun 22 |
peter |
// df/dm = df/dr * dr/dm + df/dp * dp/dm |
4184 |
30 Jun 22 |
peter |
// = df/dr * 1/(alpha-1) |
4184 |
30 Jun 22 |
peter |
375 |
res(0) = dr * 1.0 / (alpha-1.0); |
4184 |
30 Jun 22 |
peter |
// df/dalpha = df/dp dp/dalpha + df/dr dr/dalpha = |
4184 |
30 Jun 22 |
peter |
// = df/dp 1/alpha^2 - df/dr m/(alpha-1)^2 |
4184 |
30 Jun 22 |
peter |
378 |
res(1) = dp / std::pow(alpha,2) - dr*m/std::pow(alpha-1, 2); |
4184 |
30 Jun 22 |
peter |
379 |
} |
4184 |
30 Jun 22 |
peter |
380 |
else { |
4184 |
30 Jun 22 |
peter |
// Taylor expansion at alpha=1: |
4184 |
30 Jun 22 |
peter |
// logL = k*log(m) - m - log(Gamma(k+1)) + |
4184 |
30 Jun 22 |
peter |
// + [(k-1)*k/(2*m) + m/2 - k] (alpha-1) |
4184 |
30 Jun 22 |
peter |
384 |
|
4184 |
30 Jun 22 |
peter |
// df/dm |
4184 |
30 Jun 22 |
peter |
386 |
res(0) = k/m - 1; |
4184 |
30 Jun 22 |
peter |
387 |
if (alpha > 1.0) |
4184 |
30 Jun 22 |
peter |
388 |
res(0) += (alpha-1.0) * 0.5 * (1.0 - (k-1)*k/(m*m)); |
4184 |
30 Jun 22 |
peter |
389 |
|
4184 |
30 Jun 22 |
peter |
//df/dalpha |
4184 |
30 Jun 22 |
peter |
391 |
res(1) += (k-1)*k/(2*m) + m/2 - k; |
4184 |
30 Jun 22 |
peter |
392 |
if (alpha < 1) // penalizing alpha < 1 |
4184 |
30 Jun 22 |
peter |
393 |
res(1) -= 2*1000*(alpha-1); |
4184 |
30 Jun 22 |
peter |
394 |
|
4184 |
30 Jun 22 |
peter |
395 |
} |
4184 |
30 Jun 22 |
peter |
396 |
return res; |
4184 |
30 Jun 22 |
peter |
397 |
} |
4184 |
30 Jun 22 |
peter |
398 |
}}} |