2649 |
14 Nov 11 |
peter |
// $Id$ |
2649 |
14 Nov 11 |
peter |
2 |
|
2649 |
14 Nov 11 |
peter |
3 |
/* |
4089 |
07 Sep 21 |
peter |
Copyright (C) 2011, 2012, 2020, 2021 Peter Johansson |
2649 |
14 Nov 11 |
peter |
5 |
|
2649 |
14 Nov 11 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2649 |
14 Nov 11 |
peter |
7 |
|
2649 |
14 Nov 11 |
peter |
The yat library is free software; you can redistribute it and/or |
2649 |
14 Nov 11 |
peter |
modify it under the terms of the GNU General Public License as |
2649 |
14 Nov 11 |
peter |
published by the Free Software Foundation; either version 3 of the |
2649 |
14 Nov 11 |
peter |
License, or (at your option) any later version. |
2649 |
14 Nov 11 |
peter |
12 |
|
2649 |
14 Nov 11 |
peter |
The yat library is distributed in the hope that it will be useful, |
2649 |
14 Nov 11 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2649 |
14 Nov 11 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2649 |
14 Nov 11 |
peter |
General Public License for more details. |
2649 |
14 Nov 11 |
peter |
17 |
|
2649 |
14 Nov 11 |
peter |
You should have received a copy of the GNU General Public License |
2649 |
14 Nov 11 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2649 |
14 Nov 11 |
peter |
20 |
*/ |
2649 |
14 Nov 11 |
peter |
21 |
|
2881 |
18 Nov 12 |
peter |
22 |
#include <config.h> |
2881 |
18 Nov 12 |
peter |
23 |
|
2649 |
14 Nov 11 |
peter |
24 |
#include "Kendall.h" |
2649 |
14 Nov 11 |
peter |
25 |
|
4083 |
27 Aug 21 |
peter |
26 |
#include <yat/utility/Ranking.h> |
4083 |
27 Aug 21 |
peter |
27 |
#include <yat/utility/stl_utility.h> |
4083 |
27 Aug 21 |
peter |
28 |
|
2662 |
21 Nov 11 |
peter |
29 |
#include <gsl/gsl_cdf.h> |
2662 |
21 Nov 11 |
peter |
30 |
|
4083 |
27 Aug 21 |
peter |
31 |
#include <boost/scoped_ptr.hpp> |
4083 |
27 Aug 21 |
peter |
32 |
|
2662 |
21 Nov 11 |
peter |
33 |
#include <algorithm> |
2662 |
21 Nov 11 |
peter |
34 |
#include <cassert> |
2652 |
15 Nov 11 |
peter |
35 |
#include <cmath> |
4083 |
27 Aug 21 |
peter |
36 |
#include <iterator> |
2649 |
14 Nov 11 |
peter |
37 |
#include <limits> |
2663 |
21 Nov 11 |
peter |
38 |
#include <map> |
4083 |
27 Aug 21 |
peter |
39 |
#include <set> |
4083 |
27 Aug 21 |
peter |
40 |
#include <utility> |
2651 |
15 Nov 11 |
peter |
41 |
#include <vector> |
2649 |
14 Nov 11 |
peter |
42 |
|
2649 |
14 Nov 11 |
peter |
43 |
namespace theplu { |
2649 |
14 Nov 11 |
peter |
44 |
namespace yat { |
2649 |
14 Nov 11 |
peter |
45 |
namespace statistics { |
2649 |
14 Nov 11 |
peter |
46 |
|
2662 |
21 Nov 11 |
peter |
47 |
|
2662 |
21 Nov 11 |
peter |
48 |
/** |
2662 |
21 Nov 11 |
peter |
Calculate sum over all pair |
2662 |
21 Nov 11 |
peter |
50 |
|
2662 |
21 Nov 11 |
peter |
\sum_ij f((first[i]-first[j])(first2[i]-first2[j])) |
2662 |
21 Nov 11 |
peter |
where f(x) is -1 if x<0, |
2662 |
21 Nov 11 |
peter |
0 if x=0, |
2662 |
21 Nov 11 |
peter |
and +1 if x>0. |
2662 |
21 Nov 11 |
peter |
55 |
*/ |
2662 |
21 Nov 11 |
peter |
56 |
template<typename Iterator1, typename Iterator2> |
2662 |
21 Nov 11 |
peter |
57 |
long int count(Iterator1 first1, Iterator1 last1, Iterator2 first2) |
2662 |
21 Nov 11 |
peter |
58 |
{ |
2662 |
21 Nov 11 |
peter |
59 |
long int count=0; |
2662 |
21 Nov 11 |
peter |
60 |
for (Iterator1 i=first1 ; i!=last1; ++i) |
2662 |
21 Nov 11 |
peter |
61 |
for (Iterator1 j=first1; j<i; ++j) { |
2662 |
21 Nov 11 |
peter |
62 |
if (*i==*j || first2[i-first1]==first2[j-first1]) |
2662 |
21 Nov 11 |
peter |
63 |
continue; |
2662 |
21 Nov 11 |
peter |
64 |
if ((*i > *j) == (first2[i-first1] > first2[j-first1])) |
2662 |
21 Nov 11 |
peter |
65 |
++count; |
2662 |
21 Nov 11 |
peter |
66 |
else |
2662 |
21 Nov 11 |
peter |
67 |
--count; |
2662 |
21 Nov 11 |
peter |
68 |
} |
2662 |
21 Nov 11 |
peter |
69 |
return count; |
2662 |
21 Nov 11 |
peter |
70 |
} |
2662 |
21 Nov 11 |
peter |
71 |
|
2662 |
21 Nov 11 |
peter |
72 |
|
2662 |
21 Nov 11 |
peter |
73 |
|
2649 |
14 Nov 11 |
peter |
74 |
class Kendall::Pimpl |
2649 |
14 Nov 11 |
peter |
75 |
{ |
4083 |
27 Aug 21 |
peter |
76 |
class Count |
2653 |
15 Nov 11 |
peter |
77 |
{ |
4083 |
27 Aug 21 |
peter |
78 |
public: |
4083 |
27 Aug 21 |
peter |
79 |
Count(const std::multiset<std::pair<double, double> >& data); |
4083 |
27 Aug 21 |
peter |
80 |
long int count(void) const; |
4083 |
27 Aug 21 |
peter |
81 |
double score(void) const; |
4083 |
27 Aug 21 |
peter |
82 |
class Ties |
4083 |
27 Aug 21 |
peter |
83 |
{ |
4083 |
27 Aug 21 |
peter |
84 |
public: |
4083 |
27 Aug 21 |
peter |
85 |
Ties(void); |
4083 |
27 Aug 21 |
peter |
86 |
void add(size_t n); |
4083 |
27 Aug 21 |
peter |
87 |
bool have_ties(void) const { return n_pairs_; } |
4083 |
27 Aug 21 |
peter |
// \return \sum x * (x-1) |
4083 |
27 Aug 21 |
peter |
89 |
unsigned long int n_pairs(void) const { return n_pairs_;} |
4083 |
27 Aug 21 |
peter |
// \return \sum x * (x-1) * (x-2) |
4083 |
27 Aug 21 |
peter |
91 |
unsigned long int n_triples(void) const { return n_triples_; } |
4083 |
27 Aug 21 |
peter |
// \return \sum x * (x-1) * (2*x+5) |
4083 |
27 Aug 21 |
peter |
93 |
unsigned long int v_correction(void) const { return v_correction_; } |
4083 |
27 Aug 21 |
peter |
94 |
private: |
4083 |
27 Aug 21 |
peter |
95 |
unsigned long int n_pairs_; |
4083 |
27 Aug 21 |
peter |
96 |
unsigned long int n_triples_; |
4083 |
27 Aug 21 |
peter |
97 |
unsigned long int v_correction_; |
4083 |
27 Aug 21 |
peter |
98 |
}; |
2651 |
15 Nov 11 |
peter |
99 |
|
4083 |
27 Aug 21 |
peter |
100 |
const Ties& x_ties(void) const; |
4083 |
27 Aug 21 |
peter |
101 |
const Ties& y_ties(void) const; |
2651 |
15 Nov 11 |
peter |
102 |
|
4083 |
27 Aug 21 |
peter |
103 |
private: |
4083 |
27 Aug 21 |
peter |
104 |
Ties x_ties_; |
4083 |
27 Aug 21 |
peter |
105 |
Ties y_ties_; |
2653 |
15 Nov 11 |
peter |
106 |
|
4083 |
27 Aug 21 |
peter |
// # pairs such that (x_i-x_j)(y_i-y_j) > 0 |
4083 |
27 Aug 21 |
peter |
108 |
long int concordant_; |
4083 |
27 Aug 21 |
peter |
// # pairs such that (x_i-x_j)(y_i-y_j) < 0 |
4083 |
27 Aug 21 |
peter |
110 |
long int discordant_; |
4083 |
27 Aug 21 |
peter |
// # pairs such that x_i!=x_j && y_i==y_j |
4083 |
27 Aug 21 |
peter |
112 |
long int extraX_; |
4083 |
27 Aug 21 |
peter |
// # pairs such that x_i==x_j && y_i!=y_j |
4083 |
27 Aug 21 |
peter |
114 |
long int extraY_; |
4083 |
27 Aug 21 |
peter |
// # pairs such that x_i==x_j && y_i==y_j |
4083 |
27 Aug 21 |
peter |
//long int spare_; |
2653 |
15 Nov 11 |
peter |
117 |
|
4083 |
27 Aug 21 |
peter |
118 |
template<typename Iterator> |
4083 |
27 Aug 21 |
peter |
119 |
void calculate_ties(Iterator first, Iterator last, Ties& ties) |
4083 |
27 Aug 21 |
peter |
120 |
{ |
4117 |
22 Oct 21 |
peter |
121 |
assert(std::is_sorted(first, last)); |
4083 |
27 Aug 21 |
peter |
122 |
while (first != last) { |
4083 |
27 Aug 21 |
peter |
123 |
Iterator upper = first; |
4083 |
27 Aug 21 |
peter |
124 |
size_t n = 1; |
4083 |
27 Aug 21 |
peter |
125 |
++upper; |
4083 |
27 Aug 21 |
peter |
126 |
while (upper!=last && *upper==*first) { |
4083 |
27 Aug 21 |
peter |
127 |
++n; |
4083 |
27 Aug 21 |
peter |
128 |
++upper; |
4083 |
27 Aug 21 |
peter |
129 |
} |
4083 |
27 Aug 21 |
peter |
130 |
ties.add(n); |
4083 |
27 Aug 21 |
peter |
131 |
first = upper; |
2761 |
08 Jul 12 |
peter |
132 |
} |
2761 |
08 Jul 12 |
peter |
133 |
} |
4083 |
27 Aug 21 |
peter |
134 |
}; |
2651 |
15 Nov 11 |
peter |
135 |
|
4083 |
27 Aug 21 |
peter |
136 |
public: |
4083 |
27 Aug 21 |
peter |
137 |
Pimpl(void); |
4083 |
27 Aug 21 |
peter |
138 |
Pimpl(const Pimpl& other); |
4083 |
27 Aug 21 |
peter |
139 |
Pimpl& operator=(const Pimpl& rhs); |
4083 |
27 Aug 21 |
peter |
140 |
void add(double x, double y); |
4083 |
27 Aug 21 |
peter |
141 |
size_t n(void) const; |
4083 |
27 Aug 21 |
peter |
/// \return one-sided p-value |
4083 |
27 Aug 21 |
peter |
143 |
double p_approx(bool right) const; |
4083 |
27 Aug 21 |
peter |
144 |
double p_exact(bool right, bool left) const; |
4083 |
27 Aug 21 |
peter |
145 |
void reset(void); |
4083 |
27 Aug 21 |
peter |
146 |
double score(void) const; |
2653 |
15 Nov 11 |
peter |
147 |
private: |
4083 |
27 Aug 21 |
peter |
// return # concordant pairs minus # discordant pairs |
4083 |
27 Aug 21 |
peter |
149 |
long int count(void) const; |
4083 |
27 Aug 21 |
peter |
// return estimated variance of score |
4083 |
27 Aug 21 |
peter |
151 |
double variance(void) const; |
4083 |
27 Aug 21 |
peter |
// data always sort wrt first and then second (if first are equal) |
4083 |
27 Aug 21 |
peter |
153 |
std::multiset<std::pair<double, double> > data_; |
4083 |
27 Aug 21 |
peter |
// calculated in score(void) |
4083 |
27 Aug 21 |
peter |
155 |
boost::scoped_ptr<Count> count_; |
2649 |
14 Nov 11 |
peter |
156 |
}; |
2649 |
14 Nov 11 |
peter |
157 |
|
2649 |
14 Nov 11 |
peter |
158 |
|
2649 |
14 Nov 11 |
peter |
// Kendall class |
2649 |
14 Nov 11 |
peter |
160 |
Kendall::Kendall(void) |
2649 |
14 Nov 11 |
peter |
161 |
: pimpl_(new Pimpl) |
2649 |
14 Nov 11 |
peter |
162 |
{ |
2649 |
14 Nov 11 |
peter |
163 |
} |
2649 |
14 Nov 11 |
peter |
164 |
|
2649 |
14 Nov 11 |
peter |
165 |
|
2760 |
08 Jul 12 |
peter |
166 |
Kendall::Kendall(const Kendall& rhs) |
2760 |
08 Jul 12 |
peter |
167 |
: pimpl_(new Pimpl(*rhs.pimpl_)) |
2760 |
08 Jul 12 |
peter |
168 |
{ |
2760 |
08 Jul 12 |
peter |
169 |
} |
2760 |
08 Jul 12 |
peter |
170 |
|
2760 |
08 Jul 12 |
peter |
171 |
|
4083 |
27 Aug 21 |
peter |
172 |
Kendall::Kendall(Kendall&& rhs) |
4083 |
27 Aug 21 |
peter |
173 |
: pimpl_(rhs.pimpl_) |
4083 |
27 Aug 21 |
peter |
174 |
{ |
4083 |
27 Aug 21 |
peter |
175 |
rhs.pimpl_ = nullptr; |
4083 |
27 Aug 21 |
peter |
176 |
} |
4083 |
27 Aug 21 |
peter |
177 |
|
4083 |
27 Aug 21 |
peter |
178 |
|
2649 |
14 Nov 11 |
peter |
179 |
Kendall::~Kendall(void) |
2649 |
14 Nov 11 |
peter |
180 |
{ |
2649 |
14 Nov 11 |
peter |
181 |
delete pimpl_; |
2649 |
14 Nov 11 |
peter |
182 |
} |
2649 |
14 Nov 11 |
peter |
183 |
|
2649 |
14 Nov 11 |
peter |
184 |
|
2649 |
14 Nov 11 |
peter |
185 |
void Kendall::add(double x, double y) |
2649 |
14 Nov 11 |
peter |
186 |
{ |
2649 |
14 Nov 11 |
peter |
187 |
pimpl_->add(x, y); |
2649 |
14 Nov 11 |
peter |
188 |
} |
2649 |
14 Nov 11 |
peter |
189 |
|
2649 |
14 Nov 11 |
peter |
190 |
|
2758 |
08 Jul 12 |
peter |
191 |
size_t Kendall::n(void) const |
2758 |
08 Jul 12 |
peter |
192 |
{ |
2758 |
08 Jul 12 |
peter |
193 |
return pimpl_->n(); |
2758 |
08 Jul 12 |
peter |
194 |
} |
2758 |
08 Jul 12 |
peter |
195 |
|
2758 |
08 Jul 12 |
peter |
196 |
|
2649 |
14 Nov 11 |
peter |
197 |
double Kendall::score(void) const |
2649 |
14 Nov 11 |
peter |
198 |
{ |
2649 |
14 Nov 11 |
peter |
199 |
return pimpl_->score(); |
2649 |
14 Nov 11 |
peter |
200 |
} |
2649 |
14 Nov 11 |
peter |
201 |
|
2649 |
14 Nov 11 |
peter |
202 |
|
2649 |
14 Nov 11 |
peter |
203 |
double Kendall::p_left(bool exact) const |
2649 |
14 Nov 11 |
peter |
204 |
{ |
2662 |
21 Nov 11 |
peter |
205 |
if (!exact) |
2662 |
21 Nov 11 |
peter |
206 |
return pimpl_->p_approx(false); |
2649 |
14 Nov 11 |
peter |
207 |
return pimpl_->p_exact(false, true); |
2649 |
14 Nov 11 |
peter |
208 |
} |
2649 |
14 Nov 11 |
peter |
209 |
|
2649 |
14 Nov 11 |
peter |
210 |
|
2649 |
14 Nov 11 |
peter |
211 |
double Kendall::p_right(bool exact) const |
2649 |
14 Nov 11 |
peter |
212 |
{ |
2662 |
21 Nov 11 |
peter |
213 |
if (!exact) |
2662 |
21 Nov 11 |
peter |
214 |
return pimpl_->p_approx(true); |
2649 |
14 Nov 11 |
peter |
215 |
return pimpl_->p_exact(true, false); |
2649 |
14 Nov 11 |
peter |
216 |
} |
2649 |
14 Nov 11 |
peter |
217 |
|
2649 |
14 Nov 11 |
peter |
218 |
|
2649 |
14 Nov 11 |
peter |
219 |
double Kendall::p_value(bool exact) const |
2649 |
14 Nov 11 |
peter |
220 |
{ |
2649 |
14 Nov 11 |
peter |
221 |
if (exact) |
2649 |
14 Nov 11 |
peter |
222 |
return pimpl_->p_exact(true, true); |
2649 |
14 Nov 11 |
peter |
223 |
if (score()>0.0) |
2662 |
21 Nov 11 |
peter |
224 |
return 2*p_right(false); |
2662 |
21 Nov 11 |
peter |
225 |
return 2*p_left(false); |
2649 |
14 Nov 11 |
peter |
226 |
} |
2649 |
14 Nov 11 |
peter |
227 |
|
2649 |
14 Nov 11 |
peter |
228 |
|
2649 |
14 Nov 11 |
peter |
229 |
void Kendall::reset(void) |
2649 |
14 Nov 11 |
peter |
230 |
{ |
2649 |
14 Nov 11 |
peter |
231 |
pimpl_->reset(); |
2649 |
14 Nov 11 |
peter |
232 |
} |
2649 |
14 Nov 11 |
peter |
233 |
|
2760 |
08 Jul 12 |
peter |
234 |
|
2760 |
08 Jul 12 |
peter |
235 |
Kendall& Kendall::operator=(const Kendall& rhs) |
2760 |
08 Jul 12 |
peter |
236 |
{ |
2760 |
08 Jul 12 |
peter |
237 |
if (&rhs == this) |
2760 |
08 Jul 12 |
peter |
238 |
return *this; |
2760 |
08 Jul 12 |
peter |
239 |
|
2760 |
08 Jul 12 |
peter |
240 |
assert(pimpl_); |
2760 |
08 Jul 12 |
peter |
241 |
assert(rhs.pimpl_); |
2760 |
08 Jul 12 |
peter |
242 |
*pimpl_ = *rhs.pimpl_; |
2760 |
08 Jul 12 |
peter |
243 |
return *this; |
2760 |
08 Jul 12 |
peter |
244 |
} |
2760 |
08 Jul 12 |
peter |
245 |
|
4083 |
27 Aug 21 |
peter |
246 |
|
4083 |
27 Aug 21 |
peter |
247 |
Kendall& Kendall::operator=(Kendall&& rhs) |
4083 |
27 Aug 21 |
peter |
248 |
{ |
4083 |
27 Aug 21 |
peter |
249 |
std::swap(pimpl_, rhs.pimpl_); |
4083 |
27 Aug 21 |
peter |
250 |
return *this; |
4083 |
27 Aug 21 |
peter |
251 |
} |
4083 |
27 Aug 21 |
peter |
252 |
|
4083 |
27 Aug 21 |
peter |
253 |
|
4083 |
27 Aug 21 |
peter |
254 |
Kendall::Pimpl::Count::Count(const std::multiset<std::pair<double,double>>& data) |
4083 |
27 Aug 21 |
peter |
255 |
{ |
4083 |
27 Aug 21 |
peter |
// We follow 3 Algorithm SDTau for some-duplicate datasets in |
4083 |
27 Aug 21 |
peter |
// 'Fast Algorithms For The Calculation Of Kendall's Tau' |
4083 |
27 Aug 21 |
peter |
// by David Christen (Computational Statistics, March 2005) |
4083 |
27 Aug 21 |
peter |
259 |
|
4083 |
27 Aug 21 |
peter |
// data is sorted w.r.t. ::first |
4083 |
27 Aug 21 |
peter |
261 |
calculate_ties(utility::pair_first_iterator(data.begin()), |
4083 |
27 Aug 21 |
peter |
262 |
utility::pair_first_iterator(data.end()), |
4083 |
27 Aug 21 |
peter |
263 |
x_ties_); |
4083 |
27 Aug 21 |
peter |
264 |
|
4110 |
27 Sep 21 |
peter |
265 |
std::vector<double> vec(utility::pair_second_iterator(data.begin()), |
4110 |
27 Sep 21 |
peter |
266 |
utility::pair_second_iterator(data.end())); |
4110 |
27 Sep 21 |
peter |
267 |
std::sort(vec.begin(), vec.end()); |
4110 |
27 Sep 21 |
peter |
268 |
calculate_ties(vec.begin(), vec.end(), y_ties_); |
4110 |
27 Sep 21 |
peter |
269 |
|
4083 |
27 Aug 21 |
peter |
270 |
/* |
4083 |
27 Aug 21 |
peter |
y1 < y2 y2 == y2 y2 > y2 |
4083 |
27 Aug 21 |
peter |
x1 < x2 C eX D |
4083 |
27 Aug 21 |
peter |
x1 == x2 eY spare - |
4083 |
27 Aug 21 |
peter |
x1 > x2 - - - |
4083 |
27 Aug 21 |
peter |
275 |
|
4083 |
27 Aug 21 |
peter |
We categorise pairs into five categories: |
4083 |
27 Aug 21 |
peter |
C: Concordant |
4083 |
27 Aug 21 |
peter |
D: Discordant |
4083 |
27 Aug 21 |
peter |
eX: extra X; Ys and only Ys are equal |
4083 |
27 Aug 21 |
peter |
eY: extra Y; Xs and only Xs are equal |
4083 |
27 Aug 21 |
peter |
spare: both Xs and Yy are equal |
4083 |
27 Aug 21 |
peter |
282 |
|
4083 |
27 Aug 21 |
peter |
Due to symmetry reasons and because data container is sorted, we |
4083 |
27 Aug 21 |
peter |
can ignore lower part of the matrix above. |
4083 |
27 Aug 21 |
peter |
285 |
*/ |
4083 |
27 Aug 21 |
peter |
286 |
|
4083 |
27 Aug 21 |
peter |
287 |
concordant_ = 0; |
4083 |
27 Aug 21 |
peter |
288 |
discordant_ = 0; |
4083 |
27 Aug 21 |
peter |
289 |
extraX_ = 0; |
4083 |
27 Aug 21 |
peter |
290 |
extraY_ = 0; |
4083 |
27 Aug 21 |
peter |
291 |
|
4083 |
27 Aug 21 |
peter |
292 |
unsigned long int eY = 0; |
4083 |
27 Aug 21 |
peter |
// size of the current equal range, i.e., number of data points |
4083 |
27 Aug 21 |
peter |
// for X_i : X_j == X_i, Y_j == Y_i, j <= i including the current |
4083 |
27 Aug 21 |
peter |
// point |
4083 |
27 Aug 21 |
peter |
296 |
unsigned long int ties = 1; // because loop below skip first entry |
4083 |
27 Aug 21 |
peter |
297 |
utility::Ranking<double> Y; |
4083 |
27 Aug 21 |
peter |
298 |
|
4083 |
27 Aug 21 |
peter |
// loop over data, which is sorted w.r.t. ::first |
4083 |
27 Aug 21 |
peter |
300 |
auto previous = data.cbegin(); |
4083 |
27 Aug 21 |
peter |
301 |
assert(previous != data.cend()); |
4083 |
27 Aug 21 |
peter |
302 |
Y.insert(previous->second); |
4083 |
27 Aug 21 |
peter |
303 |
auto it = std::next(previous); |
4083 |
27 Aug 21 |
peter |
304 |
while (it!=data.cend()) { |
4083 |
27 Aug 21 |
peter |
305 |
assert(previous->first <= it->first); |
4083 |
27 Aug 21 |
peter |
// X not equal |
4083 |
27 Aug 21 |
peter |
307 |
if (it->first != previous->first) { |
4083 |
27 Aug 21 |
peter |
308 |
eY = 0; |
4083 |
27 Aug 21 |
peter |
309 |
ties = 1; |
4083 |
27 Aug 21 |
peter |
310 |
} |
4083 |
27 Aug 21 |
peter |
// y also equal |
4083 |
27 Aug 21 |
peter |
312 |
else if (it->second == previous->second) |
4083 |
27 Aug 21 |
peter |
313 |
++ties; |
4083 |
27 Aug 21 |
peter |
314 |
else { // x equal, y not equal |
4083 |
27 Aug 21 |
peter |
315 |
eY += ties; |
4083 |
27 Aug 21 |
peter |
316 |
ties = 1; |
4083 |
27 Aug 21 |
peter |
317 |
} |
4083 |
27 Aug 21 |
peter |
318 |
|
4083 |
27 Aug 21 |
peter |
319 |
auto lower = Y.insert(it->second); |
4083 |
27 Aug 21 |
peter |
320 |
if (lower!=Y.begin() && *std::prev(lower) == it->second) |
4083 |
27 Aug 21 |
peter |
321 |
lower = Y.lower_bound(it->second); |
4083 |
27 Aug 21 |
peter |
// number of element in Y smaller than it->second |
4083 |
27 Aug 21 |
peter |
323 |
int n_smaller = Y.ranking(lower); |
4083 |
27 Aug 21 |
peter |
// number of element in Y equal to it->second |
4083 |
27 Aug 21 |
peter |
325 |
int n_equal = 1; |
4083 |
27 Aug 21 |
peter |
326 |
assert(lower != Y.cend()); |
4083 |
27 Aug 21 |
peter |
327 |
auto upper = std::next(lower); |
4083 |
27 Aug 21 |
peter |
328 |
while (upper!=Y.cend() && *upper==*lower) { |
4083 |
27 Aug 21 |
peter |
329 |
++upper; |
4083 |
27 Aug 21 |
peter |
330 |
++n_equal; |
4083 |
27 Aug 21 |
peter |
331 |
} |
4083 |
27 Aug 21 |
peter |
332 |
size_t i = Y.size(); |
4083 |
27 Aug 21 |
peter |
333 |
|
4083 |
27 Aug 21 |
peter |
// n_smaller (y<yi) is the union of concordant (y<yi,x<xi) |
4083 |
27 Aug 21 |
peter |
// and eY (y<yi,x==xi) |
4083 |
27 Aug 21 |
peter |
336 |
int C = n_smaller - eY; |
4083 |
27 Aug 21 |
peter |
337 |
|
4083 |
27 Aug 21 |
peter |
338 |
int eX = n_equal - ties; |
4083 |
27 Aug 21 |
peter |
339 |
|
4083 |
27 Aug 21 |
peter |
340 |
int D = i - (C + eX + eY + ties); |
4083 |
27 Aug 21 |
peter |
341 |
|
4083 |
27 Aug 21 |
peter |
342 |
extraY_ += eY; |
4083 |
27 Aug 21 |
peter |
343 |
extraX_ += eX; |
4083 |
27 Aug 21 |
peter |
344 |
concordant_ += C; |
4083 |
27 Aug 21 |
peter |
345 |
discordant_ += D; |
4083 |
27 Aug 21 |
peter |
346 |
previous = it; |
4083 |
27 Aug 21 |
peter |
347 |
++it; |
4083 |
27 Aug 21 |
peter |
348 |
} |
4083 |
27 Aug 21 |
peter |
349 |
|
4083 |
27 Aug 21 |
peter |
350 |
} |
4083 |
27 Aug 21 |
peter |
351 |
|
4083 |
27 Aug 21 |
peter |
352 |
|
4083 |
27 Aug 21 |
peter |
353 |
long int Kendall::Pimpl::Count::count(void) const |
4083 |
27 Aug 21 |
peter |
354 |
{ |
4083 |
27 Aug 21 |
peter |
355 |
return concordant_ - discordant_; |
4083 |
27 Aug 21 |
peter |
356 |
} |
4083 |
27 Aug 21 |
peter |
357 |
|
4083 |
27 Aug 21 |
peter |
358 |
|
4083 |
27 Aug 21 |
peter |
359 |
double Kendall::Pimpl::Count::score(void) const |
4083 |
27 Aug 21 |
peter |
360 |
{ |
4083 |
27 Aug 21 |
peter |
361 |
double numerator = count(); |
4083 |
27 Aug 21 |
peter |
362 |
double denominator = concordant_ + discordant_; |
4083 |
27 Aug 21 |
peter |
363 |
if (extraX_ || extraY_) { |
4083 |
27 Aug 21 |
peter |
364 |
denominator = |
4083 |
27 Aug 21 |
peter |
365 |
std::sqrt((denominator + extraX_)*(denominator + extraY_)); |
4083 |
27 Aug 21 |
peter |
366 |
} |
4083 |
27 Aug 21 |
peter |
367 |
return numerator / denominator; |
4083 |
27 Aug 21 |
peter |
368 |
} |
4083 |
27 Aug 21 |
peter |
369 |
|
4083 |
27 Aug 21 |
peter |
370 |
|
4083 |
27 Aug 21 |
peter |
371 |
const Kendall::Pimpl::Count::Ties& Kendall::Pimpl::Count::x_ties(void) const |
4083 |
27 Aug 21 |
peter |
372 |
{ |
4083 |
27 Aug 21 |
peter |
373 |
return x_ties_; |
4083 |
27 Aug 21 |
peter |
374 |
} |
4083 |
27 Aug 21 |
peter |
375 |
|
4083 |
27 Aug 21 |
peter |
376 |
|
4083 |
27 Aug 21 |
peter |
377 |
const Kendall::Pimpl::Count::Ties& Kendall::Pimpl::Count::y_ties(void) const |
4083 |
27 Aug 21 |
peter |
378 |
{ |
4083 |
27 Aug 21 |
peter |
379 |
return y_ties_; |
4083 |
27 Aug 21 |
peter |
380 |
} |
4083 |
27 Aug 21 |
peter |
381 |
|
4083 |
27 Aug 21 |
peter |
382 |
|
4083 |
27 Aug 21 |
peter |
383 |
double Kendall::Pimpl::variance(void) const |
4083 |
27 Aug 21 |
peter |
384 |
{ |
4083 |
27 Aug 21 |
peter |
385 |
/* |
4083 |
27 Aug 21 |
peter |
According to wikipedia, |
4083 |
27 Aug 21 |
peter |
z = k / sqrt(v) |
4083 |
27 Aug 21 |
peter |
is approximately standard normal |
4083 |
27 Aug 21 |
peter |
v = (v0 - vt - vu)/18 + v1 + v2 |
4083 |
27 Aug 21 |
peter |
v0 = n(n-1)(2n+5) |
4083 |
27 Aug 21 |
peter |
vt = \sum t(t-1)(2t+5) |
4083 |
27 Aug 21 |
peter |
vu = \sum u(u-1)(2u+5) |
4083 |
27 Aug 21 |
peter |
v1 = \sum t(t-1)) * \sum u(u-1) / (2n(n-1)) |
4083 |
27 Aug 21 |
peter |
v2 = sum t(t-1)(t-2) \sum u(u-1)(u-2) / (9n(n-1)(n-2)) |
4083 |
27 Aug 21 |
peter |
395 |
|
4110 |
27 Sep 21 |
peter |
where t is number of equal x values in group i and similarly u for |
4083 |
27 Aug 21 |
peter |
y. |
4083 |
27 Aug 21 |
peter |
398 |
*/ |
4083 |
27 Aug 21 |
peter |
399 |
double n = data_.size(); |
4083 |
27 Aug 21 |
peter |
400 |
double v0 = n*(n-1)*(2*n+5); |
4083 |
27 Aug 21 |
peter |
401 |
double vt = 0; |
4083 |
27 Aug 21 |
peter |
402 |
double vu = 0; |
4083 |
27 Aug 21 |
peter |
403 |
double v1 = 0; |
4083 |
27 Aug 21 |
peter |
404 |
double v2 = 0; |
4083 |
27 Aug 21 |
peter |
405 |
assert(count_); |
4083 |
27 Aug 21 |
peter |
406 |
auto& x_ties = count_->x_ties(); |
4083 |
27 Aug 21 |
peter |
407 |
auto& y_ties = count_->y_ties(); |
4083 |
27 Aug 21 |
peter |
// all correction terms above are zero in absence of ties |
4083 |
27 Aug 21 |
peter |
409 |
bool x_have_ties = x_ties.have_ties(); |
4083 |
27 Aug 21 |
peter |
410 |
bool y_have_ties = y_ties.have_ties(); |
4083 |
27 Aug 21 |
peter |
411 |
if (x_have_ties || y_have_ties) { |
4083 |
27 Aug 21 |
peter |
412 |
if (x_have_ties) |
4083 |
27 Aug 21 |
peter |
413 |
vt = x_ties.v_correction(); |
4083 |
27 Aug 21 |
peter |
414 |
if (y_have_ties) { |
4083 |
27 Aug 21 |
peter |
415 |
vu = y_ties.v_correction(); |
4083 |
27 Aug 21 |
peter |
416 |
if (x_have_ties) { |
4083 |
27 Aug 21 |
peter |
417 |
v1 = x_ties.n_pairs() * (y_ties.n_pairs() / (2*n*(n-1))); |
4083 |
27 Aug 21 |
peter |
418 |
v2 = x_ties.n_triples(); |
4083 |
27 Aug 21 |
peter |
419 |
if (v2) |
4083 |
27 Aug 21 |
peter |
420 |
v2 *= y_ties.n_triples() / (9*n*(n-1)*(n-2)); |
4083 |
27 Aug 21 |
peter |
421 |
} |
4083 |
27 Aug 21 |
peter |
422 |
} |
4083 |
27 Aug 21 |
peter |
423 |
} |
4083 |
27 Aug 21 |
peter |
424 |
return (v0 - vt - vu)/18 + v1 + v2; |
4083 |
27 Aug 21 |
peter |
425 |
} |
4083 |
27 Aug 21 |
peter |
426 |
|
4083 |
27 Aug 21 |
peter |
427 |
|
4083 |
27 Aug 21 |
peter |
428 |
Kendall::Pimpl::Count::Ties::Ties(void) |
4083 |
27 Aug 21 |
peter |
429 |
: n_pairs_(0), n_triples_(0), v_correction_(0) |
4083 |
27 Aug 21 |
peter |
430 |
{} |
4083 |
27 Aug 21 |
peter |
431 |
|
4083 |
27 Aug 21 |
peter |
432 |
|
4083 |
27 Aug 21 |
peter |
433 |
void Kendall::Pimpl::Count::Ties::add(size_t n) |
4083 |
27 Aug 21 |
peter |
434 |
{ |
4083 |
27 Aug 21 |
peter |
435 |
unsigned long int factor = n * (n-1); |
4083 |
27 Aug 21 |
peter |
436 |
n_pairs_ += factor; |
4083 |
27 Aug 21 |
peter |
437 |
n_triples_ += factor * (n-2); |
4083 |
27 Aug 21 |
peter |
438 |
v_correction_ += factor * (2*n+5); |
4083 |
27 Aug 21 |
peter |
439 |
} |
4083 |
27 Aug 21 |
peter |
440 |
|
4083 |
27 Aug 21 |
peter |
441 |
|
4083 |
27 Aug 21 |
peter |
442 |
Kendall::Pimpl::Pimpl(void) |
4083 |
27 Aug 21 |
peter |
443 |
{} |
4083 |
27 Aug 21 |
peter |
444 |
|
4083 |
27 Aug 21 |
peter |
445 |
|
4083 |
27 Aug 21 |
peter |
446 |
Kendall::Pimpl::Pimpl(const Pimpl& other) |
4083 |
27 Aug 21 |
peter |
447 |
: data_(other.data_) |
4083 |
27 Aug 21 |
peter |
448 |
{} |
4083 |
27 Aug 21 |
peter |
449 |
|
4083 |
27 Aug 21 |
peter |
450 |
|
4083 |
27 Aug 21 |
peter |
451 |
Kendall::Pimpl& Kendall::Pimpl::operator=(const Pimpl& rhs) |
4083 |
27 Aug 21 |
peter |
452 |
{ |
4083 |
27 Aug 21 |
peter |
453 |
data_ = rhs.data_; |
4083 |
27 Aug 21 |
peter |
454 |
count_.reset(); |
4083 |
27 Aug 21 |
peter |
455 |
return *this; |
4083 |
27 Aug 21 |
peter |
456 |
} |
4083 |
27 Aug 21 |
peter |
457 |
|
4083 |
27 Aug 21 |
peter |
458 |
|
4083 |
27 Aug 21 |
peter |
459 |
void Kendall::Pimpl::add(double x, double y) |
4083 |
27 Aug 21 |
peter |
460 |
{ |
4083 |
27 Aug 21 |
peter |
461 |
data_.insert(std::make_pair(x, y)); |
4083 |
27 Aug 21 |
peter |
462 |
count_.reset(); |
4083 |
27 Aug 21 |
peter |
463 |
} |
4083 |
27 Aug 21 |
peter |
464 |
|
4083 |
27 Aug 21 |
peter |
465 |
|
4083 |
27 Aug 21 |
peter |
466 |
size_t Kendall::Pimpl::n(void) const |
4083 |
27 Aug 21 |
peter |
467 |
{ |
4083 |
27 Aug 21 |
peter |
468 |
return data_.size(); |
4083 |
27 Aug 21 |
peter |
469 |
} |
4083 |
27 Aug 21 |
peter |
470 |
|
4083 |
27 Aug 21 |
peter |
471 |
|
4083 |
27 Aug 21 |
peter |
472 |
double Kendall::Pimpl::p_approx(bool right) const |
4083 |
27 Aug 21 |
peter |
473 |
{ |
4083 |
27 Aug 21 |
peter |
474 |
double k = count(); |
4083 |
27 Aug 21 |
peter |
475 |
if (!right) |
4083 |
27 Aug 21 |
peter |
476 |
k = -k; |
4083 |
27 Aug 21 |
peter |
477 |
return gsl_cdf_gaussian_Q(k, std::sqrt(variance())); |
4083 |
27 Aug 21 |
peter |
478 |
} |
4083 |
27 Aug 21 |
peter |
479 |
|
4083 |
27 Aug 21 |
peter |
480 |
|
4083 |
27 Aug 21 |
peter |
481 |
double Kendall::Pimpl::p_exact(bool right, bool left) const |
4083 |
27 Aug 21 |
peter |
482 |
{ |
4083 |
27 Aug 21 |
peter |
483 |
long int upper = 0; |
4083 |
27 Aug 21 |
peter |
484 |
long int lower = 0; |
4083 |
27 Aug 21 |
peter |
485 |
if (right) { |
4083 |
27 Aug 21 |
peter |
486 |
if (left) { |
4083 |
27 Aug 21 |
peter |
487 |
upper = std::max(count(), -count()); |
4083 |
27 Aug 21 |
peter |
488 |
lower = -upper; |
4083 |
27 Aug 21 |
peter |
489 |
} |
4083 |
27 Aug 21 |
peter |
490 |
else { |
4083 |
27 Aug 21 |
peter |
491 |
upper = count(); |
4083 |
27 Aug 21 |
peter |
492 |
lower = std::numeric_limits<long int>::min(); |
4083 |
27 Aug 21 |
peter |
493 |
} |
4083 |
27 Aug 21 |
peter |
494 |
} |
4083 |
27 Aug 21 |
peter |
495 |
else { |
4083 |
27 Aug 21 |
peter |
496 |
assert(left && "left or right must be true"); |
4083 |
27 Aug 21 |
peter |
497 |
upper = std::numeric_limits<long int>::max(); |
4083 |
27 Aug 21 |
peter |
498 |
lower = count(); |
4083 |
27 Aug 21 |
peter |
499 |
} |
4083 |
27 Aug 21 |
peter |
500 |
|
4083 |
27 Aug 21 |
peter |
// create a copy of the data, sort it with respect to ::second and |
4083 |
27 Aug 21 |
peter |
// then iterate through the permutations of second while keeping |
4083 |
27 Aug 21 |
peter |
// first constant. It means we need to do one extra initial sort, |
4083 |
27 Aug 21 |
peter |
// but OTOH the permuted data is always almost sorted. |
4083 |
27 Aug 21 |
peter |
505 |
std::vector<std::pair<double,double>> data(data_.begin(), data_.end()); |
4083 |
27 Aug 21 |
peter |
506 |
using utility::pair_second_iterator; |
4083 |
27 Aug 21 |
peter |
507 |
std::sort(pair_second_iterator(data.begin()), |
4083 |
27 Aug 21 |
peter |
508 |
pair_second_iterator(data.end())); |
4083 |
27 Aug 21 |
peter |
509 |
unsigned int n = 0; |
4083 |
27 Aug 21 |
peter |
510 |
unsigned int total = 0; |
4083 |
27 Aug 21 |
peter |
511 |
do { |
4083 |
27 Aug 21 |
peter |
512 |
std::multiset<std::pair<double,double>> |
4083 |
27 Aug 21 |
peter |
513 |
dataset(data.begin(), data.end()); |
4083 |
27 Aug 21 |
peter |
514 |
Count count(dataset); |
4083 |
27 Aug 21 |
peter |
515 |
if (count.count() <= lower || count.count() >= upper) |
4083 |
27 Aug 21 |
peter |
516 |
++n; |
4083 |
27 Aug 21 |
peter |
517 |
++total; |
4083 |
27 Aug 21 |
peter |
518 |
} |
4083 |
27 Aug 21 |
peter |
519 |
while (std::next_permutation(pair_second_iterator(data.begin()), |
4083 |
27 Aug 21 |
peter |
520 |
pair_second_iterator(data.end()))); |
4083 |
27 Aug 21 |
peter |
521 |
|
4083 |
27 Aug 21 |
peter |
522 |
return static_cast<double>(n)/static_cast<double>(total); |
4083 |
27 Aug 21 |
peter |
523 |
} |
4083 |
27 Aug 21 |
peter |
524 |
|
4083 |
27 Aug 21 |
peter |
525 |
|
4083 |
27 Aug 21 |
peter |
526 |
void Kendall::Pimpl::reset(void) |
4083 |
27 Aug 21 |
peter |
527 |
{ |
4083 |
27 Aug 21 |
peter |
528 |
Pimpl tmp; |
4083 |
27 Aug 21 |
peter |
529 |
*this = tmp; |
4083 |
27 Aug 21 |
peter |
530 |
} |
4083 |
27 Aug 21 |
peter |
531 |
|
4083 |
27 Aug 21 |
peter |
532 |
|
4083 |
27 Aug 21 |
peter |
533 |
double Kendall::Pimpl::score(void) const |
4083 |
27 Aug 21 |
peter |
534 |
{ |
4083 |
27 Aug 21 |
peter |
535 |
count(); |
4083 |
27 Aug 21 |
peter |
536 |
assert(count_.get()); |
4083 |
27 Aug 21 |
peter |
537 |
return count_->score(); |
4083 |
27 Aug 21 |
peter |
538 |
} |
4083 |
27 Aug 21 |
peter |
539 |
|
4083 |
27 Aug 21 |
peter |
540 |
|
4083 |
27 Aug 21 |
peter |
541 |
long int Kendall::Pimpl::count(void) const |
4083 |
27 Aug 21 |
peter |
542 |
{ |
4083 |
27 Aug 21 |
peter |
543 |
if (!count_) |
4083 |
27 Aug 21 |
peter |
// const_cast to allow lazy eval is more restrictive than |
4083 |
27 Aug 21 |
peter |
// making count_ mutable. |
4083 |
27 Aug 21 |
peter |
546 |
const_cast<Pimpl*>(this)->count_.reset(new Count(data_)); |
4083 |
27 Aug 21 |
peter |
547 |
return count_->count(); |
4083 |
27 Aug 21 |
peter |
548 |
} |
4083 |
27 Aug 21 |
peter |
549 |
|
2649 |
14 Nov 11 |
peter |
550 |
}}} // of namespace statistics, yat, and theplu |