4184 |
30 Jun 22 |
peter |
1 |
#ifndef theplu_yat_statistics_negative_binomial_mixture_ |
4184 |
30 Jun 22 |
peter |
2 |
#define theplu_yat_statistics_negative_binomial_mixture_ |
4184 |
30 Jun 22 |
peter |
3 |
|
4184 |
30 Jun 22 |
peter |
// $Id$ |
4184 |
30 Jun 22 |
peter |
5 |
|
4184 |
30 Jun 22 |
peter |
6 |
/* |
4184 |
30 Jun 22 |
peter |
Copyright (C) 2022 Peter Johansson |
4184 |
30 Jun 22 |
peter |
8 |
|
4184 |
30 Jun 22 |
peter |
This file is part of the yat library, https://dev.thep.lu.se/yat |
4184 |
30 Jun 22 |
peter |
10 |
|
4184 |
30 Jun 22 |
peter |
The yat library is free software; you can redistribute it and/or |
4184 |
30 Jun 22 |
peter |
modify it under the terms of the GNU General Public License as |
4184 |
30 Jun 22 |
peter |
published by the Free Software Foundation; either version 3 of the |
4184 |
30 Jun 22 |
peter |
License, or (at your option) any later version. |
4184 |
30 Jun 22 |
peter |
15 |
|
4184 |
30 Jun 22 |
peter |
The yat library is distributed in the hope that it will be useful, |
4184 |
30 Jun 22 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
4184 |
30 Jun 22 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
4184 |
30 Jun 22 |
peter |
General Public License for more details. |
4184 |
30 Jun 22 |
peter |
20 |
|
4184 |
30 Jun 22 |
peter |
You should have received a copy of the GNU General Public License |
4184 |
30 Jun 22 |
peter |
along with yat. If not, see <https://www.gnu.org/licenses/>. |
4184 |
30 Jun 22 |
peter |
23 |
*/ |
4184 |
30 Jun 22 |
peter |
24 |
|
4184 |
30 Jun 22 |
peter |
25 |
|
4184 |
30 Jun 22 |
peter |
26 |
#include <yat/utility/Matrix.h> |
4184 |
30 Jun 22 |
peter |
27 |
#include <yat/utility/Vector.h> |
4184 |
30 Jun 22 |
peter |
28 |
|
4184 |
30 Jun 22 |
peter |
29 |
#include <map> |
4184 |
30 Jun 22 |
peter |
30 |
|
4184 |
30 Jun 22 |
peter |
31 |
namespace theplu { |
4184 |
30 Jun 22 |
peter |
32 |
namespace yat { |
4184 |
30 Jun 22 |
peter |
33 |
namespace utility { |
4184 |
30 Jun 22 |
peter |
34 |
class Matrix; |
4184 |
30 Jun 22 |
peter |
35 |
class MultiMinimizerDerivative; |
4184 |
30 Jun 22 |
peter |
36 |
} |
4184 |
30 Jun 22 |
peter |
37 |
namespace statistics { |
4184 |
30 Jun 22 |
peter |
38 |
|
4184 |
30 Jun 22 |
peter |
39 |
|
4184 |
30 Jun 22 |
peter |
40 |
/** |
4184 |
30 Jun 22 |
peter |
Data is modeled as a mixture of negative binomial distributions and |
4184 |
30 Jun 22 |
peter |
the likelihood is calculated as |
4184 |
30 Jun 22 |
peter |
43 |
|
4184 |
30 Jun 22 |
peter |
\f$ L = \sum_i \tau_i L_i = |
4192 |
11 Aug 22 |
peter |
\sum_i \tau_i {k+r_i-1 \choose r_i-1} (1-p_i)^{r_i} p_i^k \f$ |
4184 |
30 Jun 22 |
peter |
where \f$ \sum_i \tau_i = 1.0 \f$. |
4184 |
30 Jun 22 |
peter |
47 |
|
4184 |
30 Jun 22 |
peter |
\since New in yat 0.20 |
4184 |
30 Jun 22 |
peter |
49 |
*/ |
4184 |
30 Jun 22 |
peter |
50 |
class NegativeBinomialMixture |
4184 |
30 Jun 22 |
peter |
51 |
{ |
4184 |
30 Jun 22 |
peter |
52 |
public: |
4184 |
30 Jun 22 |
peter |
53 |
/** |
4184 |
30 Jun 22 |
peter |
Add \a n data points with value \a k |
4184 |
30 Jun 22 |
peter |
55 |
*/ |
4184 |
30 Jun 22 |
peter |
56 |
void add(unsigned long int k, unsigned long int n=1); |
4184 |
30 Jun 22 |
peter |
57 |
|
4184 |
30 Jun 22 |
peter |
58 |
/** |
4184 |
30 Jun 22 |
peter |
The \a alpha values are defined as \f$ alpha = \frac{1}{1-p} \f$ |
4184 |
30 Jun 22 |
peter |
60 |
*/ |
4184 |
30 Jun 22 |
peter |
61 |
const utility::VectorBase& alpha(void) const; |
4184 |
30 Jun 22 |
peter |
62 |
|
4184 |
30 Jun 22 |
peter |
63 |
/** |
4184 |
30 Jun 22 |
peter |
\brief remove all data |
4184 |
30 Jun 22 |
peter |
65 |
*/ |
4184 |
30 Jun 22 |
peter |
66 |
void clear(void); |
4184 |
30 Jun 22 |
peter |
67 |
|
4184 |
30 Jun 22 |
peter |
68 |
/** |
4184 |
30 Jun 22 |
peter |
Fit the data to a model with \a n sub-distributions. |
4184 |
30 Jun 22 |
peter |
70 |
*/ |
4184 |
30 Jun 22 |
peter |
71 |
void fit(size_t n); |
4184 |
30 Jun 22 |
peter |
72 |
|
4184 |
30 Jun 22 |
peter |
73 |
/** |
4184 |
30 Jun 22 |
peter |
Fit the data initiating the search for the optimal model |
4184 |
30 Jun 22 |
peter |
parameters at the values passed. |
4184 |
30 Jun 22 |
peter |
76 |
*/ |
4184 |
30 Jun 22 |
peter |
77 |
void fit(const utility::VectorBase& m, |
4184 |
30 Jun 22 |
peter |
78 |
const utility::VectorBase& alpha, |
4184 |
30 Jun 22 |
peter |
79 |
const utility::VectorBase& tau); |
4184 |
30 Jun 22 |
peter |
80 |
|
4184 |
30 Jun 22 |
peter |
81 |
/** |
4184 |
30 Jun 22 |
peter |
\return log likelihood of getting data given inferred model |
4184 |
30 Jun 22 |
peter |
83 |
*/ |
4184 |
30 Jun 22 |
peter |
84 |
double logL(void) const; |
4184 |
30 Jun 22 |
peter |
85 |
|
4184 |
30 Jun 22 |
peter |
86 |
/** |
4184 |
30 Jun 22 |
peter |
Probabily to get \a k from the inferred model. |
4184 |
30 Jun 22 |
peter |
88 |
*/ |
4184 |
30 Jun 22 |
peter |
89 |
double pdf(unsigned long int k) const; |
4184 |
30 Jun 22 |
peter |
90 |
|
4184 |
30 Jun 22 |
peter |
91 |
/** |
4184 |
30 Jun 22 |
peter |
Probability to get \a k from the \a ith model |
4184 |
30 Jun 22 |
peter |
93 |
*/ |
4184 |
30 Jun 22 |
peter |
94 |
double pdf(unsigned long int k, size_t i) const; |
4184 |
30 Jun 22 |
peter |
95 |
|
4184 |
30 Jun 22 |
peter |
96 |
/** |
4184 |
30 Jun 22 |
peter |
The mean is calculated as \f$ m = \frac{pr}{1-p} \f$ |
4184 |
30 Jun 22 |
peter |
98 |
|
4184 |
30 Jun 22 |
peter |
\return the mean of each sub-distribution |
4184 |
30 Jun 22 |
peter |
100 |
*/ |
4184 |
30 Jun 22 |
peter |
101 |
const utility::VectorBase& m(void) const; |
4184 |
30 Jun 22 |
peter |
102 |
|
4184 |
30 Jun 22 |
peter |
103 |
/** |
4192 |
11 Aug 22 |
peter |
\return tau values in model, describing how much each |
4184 |
30 Jun 22 |
peter |
sub-distribution contributes to the total |
4184 |
30 Jun 22 |
peter |
distribution. |
4184 |
30 Jun 22 |
peter |
107 |
*/ |
4184 |
30 Jun 22 |
peter |
108 |
const utility::VectorBase& tau(void) const; |
4184 |
30 Jun 22 |
peter |
109 |
private: |
4184 |
30 Jun 22 |
peter |
110 |
std::map<unsigned long int, unsigned long int> count_; |
4184 |
30 Jun 22 |
peter |
111 |
|
4184 |
30 Jun 22 |
peter |
112 |
utility::Vector tau_; |
4184 |
30 Jun 22 |
peter |
113 |
|
4184 |
30 Jun 22 |
peter |
114 |
utility::Vector m_; |
4184 |
30 Jun 22 |
peter |
115 |
utility::Vector alpha_; |
4184 |
30 Jun 22 |
peter |
116 |
|
4184 |
30 Jun 22 |
peter |
117 |
|
4184 |
30 Jun 22 |
peter |
// p = (alpha-1.0) / alpha = 1.0 - 1.0/alpha |
4184 |
30 Jun 22 |
peter |
119 |
double p(size_t i) const; |
4184 |
30 Jun 22 |
peter |
// r = m*(1-p)/p = m / alpha^2 |
4184 |
30 Jun 22 |
peter |
121 |
double r(size_t i) const; |
4184 |
30 Jun 22 |
peter |
122 |
|
4184 |
30 Jun 22 |
peter |
123 |
void init_fit(size_t n); |
4184 |
30 Jun 22 |
peter |
124 |
void optimize(void); |
4184 |
30 Jun 22 |
peter |
125 |
|
4184 |
30 Jun 22 |
peter |
126 |
bool expectation_step(utility::Matrix& H); |
4184 |
30 Jun 22 |
peter |
127 |
bool optimize_tau(const utility::Matrix& H); |
4184 |
30 Jun 22 |
peter |
128 |
bool optimize_param(const utility::Matrix& H, |
4184 |
30 Jun 22 |
peter |
129 |
utility::MultiMinimizerDerivative& solver); |
4184 |
30 Jun 22 |
peter |
130 |
|
4184 |
30 Jun 22 |
peter |
131 |
double pdf(unsigned long int k, double m, double alpha) const; |
4184 |
30 Jun 22 |
peter |
132 |
|
4184 |
30 Jun 22 |
peter |
133 |
class Q |
4184 |
30 Jun 22 |
peter |
134 |
{ |
4184 |
30 Jun 22 |
peter |
135 |
public: |
4184 |
30 Jun 22 |
peter |
136 |
Q(const std::map<unsigned long int, unsigned long int>& count, |
4184 |
30 Jun 22 |
peter |
137 |
const utility::VectorBase& h); |
4184 |
30 Jun 22 |
peter |
138 |
double operator()(const utility::VectorBase& x); |
4184 |
30 Jun 22 |
peter |
139 |
void operator()(const utility::VectorBase& x, |
4184 |
30 Jun 22 |
peter |
140 |
utility::VectorMutable& gradient); |
4184 |
30 Jun 22 |
peter |
141 |
|
4184 |
30 Jun 22 |
peter |
142 |
private: |
4184 |
30 Jun 22 |
peter |
143 |
std::map<unsigned long int, unsigned long int> count_; |
4184 |
30 Jun 22 |
peter |
144 |
const utility::VectorBase& h_; |
4184 |
30 Jun 22 |
peter |
145 |
|
4184 |
30 Jun 22 |
peter |
146 |
double log_L(double m, double alpha, unsigned long int k) const; |
4184 |
30 Jun 22 |
peter |
147 |
|
4184 |
30 Jun 22 |
peter |
// return <dlogL/dm, dlogL/dalpha> |
4184 |
30 Jun 22 |
peter |
149 |
utility::Vector |
4184 |
30 Jun 22 |
peter |
150 |
calc_gradient(double m, double alpha, unsigned long int k) const; |
4184 |
30 Jun 22 |
peter |
151 |
|
4184 |
30 Jun 22 |
peter |
152 |
double calculate_dp(double p, double r, unsigned long int k) const; |
4184 |
30 Jun 22 |
peter |
153 |
double calculate_dr(double p, double r, unsigned long int k) const; |
4184 |
30 Jun 22 |
peter |
154 |
}; |
4184 |
30 Jun 22 |
peter |
155 |
}; |
4184 |
30 Jun 22 |
peter |
156 |
|
4184 |
30 Jun 22 |
peter |
157 |
}}} |
4184 |
30 Jun 22 |
peter |
158 |
#endif |