2635 |
09 Nov 11 |
peter |
// $Id$ |
2635 |
09 Nov 11 |
peter |
2 |
|
2635 |
09 Nov 11 |
peter |
3 |
/* |
2919 |
19 Dec 12 |
peter |
Copyright (C) 2011, 2012 Peter Johansson |
2635 |
09 Nov 11 |
peter |
5 |
|
2635 |
09 Nov 11 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2635 |
09 Nov 11 |
peter |
7 |
|
2635 |
09 Nov 11 |
peter |
The yat library is free software; you can redistribute it and/or |
2635 |
09 Nov 11 |
peter |
modify it under the terms of the GNU General Public License as |
2635 |
09 Nov 11 |
peter |
published by the Free Software Foundation; either version 3 of the |
2635 |
09 Nov 11 |
peter |
License, or (at your option) any later version. |
2635 |
09 Nov 11 |
peter |
12 |
|
2635 |
09 Nov 11 |
peter |
The yat library is distributed in the hope that it will be useful, |
2635 |
09 Nov 11 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2635 |
09 Nov 11 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2635 |
09 Nov 11 |
peter |
General Public License for more details. |
2635 |
09 Nov 11 |
peter |
17 |
|
2635 |
09 Nov 11 |
peter |
You should have received a copy of the GNU General Public License |
2635 |
09 Nov 11 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2635 |
09 Nov 11 |
peter |
20 |
*/ |
2635 |
09 Nov 11 |
peter |
21 |
|
2881 |
18 Nov 12 |
peter |
22 |
#include <config.h> |
2881 |
18 Nov 12 |
peter |
23 |
|
2635 |
09 Nov 11 |
peter |
24 |
#include "Spearman.h" |
2635 |
09 Nov 11 |
peter |
25 |
|
2638 |
10 Nov 11 |
peter |
26 |
#include "Averager.h" |
2638 |
10 Nov 11 |
peter |
27 |
#include "AveragerPair.h" |
2642 |
10 Nov 11 |
peter |
28 |
#include "utility.h" |
2638 |
10 Nov 11 |
peter |
29 |
|
2642 |
10 Nov 11 |
peter |
30 |
#include <gsl/gsl_cdf.h> |
2642 |
10 Nov 11 |
peter |
31 |
|
2642 |
10 Nov 11 |
peter |
32 |
#include <algorithm> |
2638 |
10 Nov 11 |
peter |
33 |
#include <cassert> |
2642 |
10 Nov 11 |
peter |
34 |
#include <cmath> |
2642 |
10 Nov 11 |
peter |
35 |
#include <limits> |
2638 |
10 Nov 11 |
peter |
36 |
#include <list> |
2638 |
10 Nov 11 |
peter |
37 |
#include <map> |
2638 |
10 Nov 11 |
peter |
38 |
|
2635 |
09 Nov 11 |
peter |
39 |
namespace theplu { |
2635 |
09 Nov 11 |
peter |
40 |
namespace yat { |
2642 |
10 Nov 11 |
peter |
41 |
namespace statistics { |
2635 |
09 Nov 11 |
peter |
42 |
|
2638 |
10 Nov 11 |
peter |
// forward declaration |
2638 |
10 Nov 11 |
peter |
44 |
class Entry; |
2638 |
10 Nov 11 |
peter |
// for convenience |
2638 |
10 Nov 11 |
peter |
46 |
typedef std::list<Entry> List; |
2638 |
10 Nov 11 |
peter |
47 |
|
2638 |
10 Nov 11 |
peter |
// struct for storing one pair of data |
2638 |
10 Nov 11 |
peter |
49 |
struct Entry |
2635 |
09 Nov 11 |
peter |
50 |
{ |
2638 |
10 Nov 11 |
peter |
// ranks are double so we can handle ties |
2638 |
10 Nov 11 |
peter |
52 |
double x_rank_; |
2638 |
10 Nov 11 |
peter |
53 |
double y_rank_; |
2642 |
10 Nov 11 |
peter |
54 |
double& rank(bool x) |
2642 |
10 Nov 11 |
peter |
55 |
{ |
2642 |
10 Nov 11 |
peter |
56 |
if (x) |
2642 |
10 Nov 11 |
peter |
57 |
return x_rank_; |
2638 |
10 Nov 11 |
peter |
58 |
return y_rank_; |
2638 |
10 Nov 11 |
peter |
59 |
} |
2638 |
10 Nov 11 |
peter |
60 |
std::multimap<double, List::iterator>::iterator x_map_; |
2638 |
10 Nov 11 |
peter |
61 |
std::multimap<double, List::iterator>::iterator y_map_; |
2638 |
10 Nov 11 |
peter |
62 |
}; |
2635 |
09 Nov 11 |
peter |
63 |
|
2638 |
10 Nov 11 |
peter |
64 |
|
2638 |
10 Nov 11 |
peter |
/// internal class in Spearman |
2638 |
10 Nov 11 |
peter |
66 |
class Spearman::Pimpl |
2638 |
10 Nov 11 |
peter |
67 |
{ |
2638 |
10 Nov 11 |
peter |
68 |
public: |
2642 |
10 Nov 11 |
peter |
69 |
Pimpl(void) |
2645 |
14 Nov 11 |
peter |
70 |
: have_ties_(false) |
2642 |
10 Nov 11 |
peter |
71 |
{ |
2642 |
10 Nov 11 |
peter |
72 |
} |
2642 |
10 Nov 11 |
peter |
73 |
|
2642 |
10 Nov 11 |
peter |
74 |
|
2638 |
10 Nov 11 |
peter |
75 |
void add(double x, double y) |
2638 |
10 Nov 11 |
peter |
76 |
{ |
2638 |
10 Nov 11 |
peter |
77 |
list_.push_back(Entry()); |
2638 |
10 Nov 11 |
peter |
78 |
List::iterator iter = list_.end(); |
2638 |
10 Nov 11 |
peter |
// take a step back to element we just added |
2638 |
10 Nov 11 |
peter |
80 |
--iter; |
2638 |
10 Nov 11 |
peter |
81 |
Entry& e = *iter; |
2638 |
10 Nov 11 |
peter |
82 |
e.x_map_ = x_.insert(std::make_pair(x, iter)); |
2638 |
10 Nov 11 |
peter |
83 |
e.y_map_ = y_.insert(std::make_pair(y, iter)); |
2638 |
10 Nov 11 |
peter |
84 |
assert(x_.size()==y_.size()); |
2642 |
10 Nov 11 |
peter |
// invalidate cache |
2645 |
14 Nov 11 |
peter |
86 |
ap_.reset(); |
2638 |
10 Nov 11 |
peter |
87 |
} |
2638 |
10 Nov 11 |
peter |
88 |
|
2638 |
10 Nov 11 |
peter |
89 |
|
2642 |
10 Nov 11 |
peter |
90 |
size_t n(void) const |
2638 |
10 Nov 11 |
peter |
91 |
{ |
2642 |
10 Nov 11 |
peter |
92 |
assert(list_.size()==x_.size()); |
2642 |
10 Nov 11 |
peter |
93 |
assert(list_.size()==y_.size()); |
2642 |
10 Nov 11 |
peter |
94 |
return list_.size(); |
2642 |
10 Nov 11 |
peter |
95 |
} |
2642 |
10 Nov 11 |
peter |
96 |
|
2642 |
10 Nov 11 |
peter |
97 |
|
2642 |
10 Nov 11 |
peter |
98 |
double p_exact(bool right, bool left) |
2642 |
10 Nov 11 |
peter |
99 |
{ |
2642 |
10 Nov 11 |
peter |
100 |
assert(right || left); |
2645 |
14 Nov 11 |
peter |
// be sure ap_ is updated |
2645 |
14 Nov 11 |
peter |
102 |
score(); |
2645 |
14 Nov 11 |
peter |
103 |
|
2645 |
14 Nov 11 |
peter |
// let's define two values upper and lower, and we will count |
2645 |
14 Nov 11 |
peter |
// outcomes with score either >=upper or <=lower. To avoid |
2645 |
14 Nov 11 |
peter |
// rounding error and avoid calculating sqrt N times we compare |
2645 |
14 Nov 11 |
peter |
// only the numerator in correlation calculation wich is |
2645 |
14 Nov 11 |
peter |
// AveragerPair::sum_xy_centered. |
2645 |
14 Nov 11 |
peter |
109 |
|
2642 |
10 Nov 11 |
peter |
// two-sided case |
2645 |
14 Nov 11 |
peter |
111 |
double upper = std::abs(ap_.sum_xy_centered()); |
2642 |
10 Nov 11 |
peter |
112 |
double lower = -upper; |
2642 |
10 Nov 11 |
peter |
113 |
if (right && !left) { |
2645 |
14 Nov 11 |
peter |
114 |
upper = ap_.sum_xy_centered(); |
2645 |
14 Nov 11 |
peter |
115 |
lower = -std::numeric_limits<double>::infinity(); |
2642 |
10 Nov 11 |
peter |
116 |
} |
2642 |
10 Nov 11 |
peter |
117 |
else if (left && !right) { |
2645 |
14 Nov 11 |
peter |
118 |
upper = std::numeric_limits<double>::infinity(); |
2645 |
14 Nov 11 |
peter |
119 |
lower = ap_.sum_xy_centered(); |
2642 |
10 Nov 11 |
peter |
120 |
} |
2642 |
10 Nov 11 |
peter |
121 |
|
2645 |
14 Nov 11 |
peter |
// avoid rounding errors |
2645 |
14 Nov 11 |
peter |
123 |
upper -= 10*std::numeric_limits<double>::epsilon(); |
2645 |
14 Nov 11 |
peter |
124 |
lower += 10*std::numeric_limits<double>::epsilon(); |
2645 |
14 Nov 11 |
peter |
125 |
|
2642 |
10 Nov 11 |
peter |
126 |
unsigned long int count = 0; |
2642 |
10 Nov 11 |
peter |
127 |
unsigned long int total = 0; |
2642 |
10 Nov 11 |
peter |
128 |
std::vector<double> x; |
2642 |
10 Nov 11 |
peter |
129 |
x.reserve(n()); |
2642 |
10 Nov 11 |
peter |
130 |
std::vector<double> y; |
2642 |
10 Nov 11 |
peter |
131 |
y.reserve(n()); |
2638 |
10 Nov 11 |
peter |
132 |
for (List::const_iterator i=list_.begin(); i!=list_.end(); ++i) { |
2642 |
10 Nov 11 |
peter |
133 |
x.push_back(i->x_rank_); |
2642 |
10 Nov 11 |
peter |
134 |
y.push_back(i->y_rank_); |
2638 |
10 Nov 11 |
peter |
135 |
} |
2642 |
10 Nov 11 |
peter |
136 |
std::sort(x.begin(), x.end()); |
2642 |
10 Nov 11 |
peter |
137 |
std::sort(y.begin(), y.end()); |
2642 |
10 Nov 11 |
peter |
138 |
|
2642 |
10 Nov 11 |
peter |
139 |
while (true) { |
2642 |
10 Nov 11 |
peter |
140 |
AveragerPair ap; |
2642 |
10 Nov 11 |
peter |
141 |
statistics::add(ap, x.begin(), x.end(), y.begin()); |
2645 |
14 Nov 11 |
peter |
142 |
double r = ap.sum_xy_centered(); |
2642 |
10 Nov 11 |
peter |
143 |
if (r>=upper || r<=lower) |
2642 |
10 Nov 11 |
peter |
144 |
++count; |
2642 |
10 Nov 11 |
peter |
145 |
++total; |
2642 |
10 Nov 11 |
peter |
146 |
if (!std::next_permutation(y.begin(), y.end())) |
2642 |
10 Nov 11 |
peter |
147 |
break; |
2642 |
10 Nov 11 |
peter |
148 |
} |
2642 |
10 Nov 11 |
peter |
149 |
|
2642 |
10 Nov 11 |
peter |
150 |
return static_cast<double>(count)/static_cast<double>(total); |
2638 |
10 Nov 11 |
peter |
151 |
} |
2638 |
10 Nov 11 |
peter |
152 |
|
2638 |
10 Nov 11 |
peter |
153 |
|
2642 |
10 Nov 11 |
peter |
154 |
void reset(void) |
2642 |
10 Nov 11 |
peter |
155 |
{ |
2642 |
10 Nov 11 |
peter |
156 |
list_.clear(); |
2642 |
10 Nov 11 |
peter |
157 |
x_.clear(); |
2642 |
10 Nov 11 |
peter |
158 |
y_.clear(); |
2645 |
14 Nov 11 |
peter |
159 |
ap_.reset(); |
2642 |
10 Nov 11 |
peter |
160 |
have_ties_ = false; |
2642 |
10 Nov 11 |
peter |
161 |
} |
2642 |
10 Nov 11 |
peter |
162 |
|
2642 |
10 Nov 11 |
peter |
163 |
|
2642 |
10 Nov 11 |
peter |
164 |
double score(void) |
2642 |
10 Nov 11 |
peter |
165 |
{ |
2645 |
14 Nov 11 |
peter |
166 |
if (ap_.n()==0 && !list_.empty()) { |
2642 |
10 Nov 11 |
peter |
167 |
update_ranks(); |
2642 |
10 Nov 11 |
peter |
168 |
for (List::const_iterator i=list_.begin(); i!=list_.end(); ++i) |
2645 |
14 Nov 11 |
peter |
169 |
ap_.add(i->x_rank_, i->y_rank_); |
2642 |
10 Nov 11 |
peter |
170 |
} |
2645 |
14 Nov 11 |
peter |
171 |
return ap_.correlation(); |
2642 |
10 Nov 11 |
peter |
172 |
} |
2642 |
10 Nov 11 |
peter |
173 |
|
2642 |
10 Nov 11 |
peter |
174 |
|
2638 |
10 Nov 11 |
peter |
175 |
void update_ranks(void) |
2638 |
10 Nov 11 |
peter |
176 |
{ |
2638 |
10 Nov 11 |
peter |
177 |
update_ranks(x_, true); |
2638 |
10 Nov 11 |
peter |
178 |
update_ranks(y_, false); |
2638 |
10 Nov 11 |
peter |
179 |
} |
2638 |
10 Nov 11 |
peter |
180 |
|
2638 |
10 Nov 11 |
peter |
181 |
|
2638 |
10 Nov 11 |
peter |
182 |
void update_ranks(std::multimap<double, List::iterator>& m, bool x) |
2638 |
10 Nov 11 |
peter |
183 |
{ |
2638 |
10 Nov 11 |
peter |
184 |
typedef std::multimap<double, List::iterator>::iterator iterator; |
2638 |
10 Nov 11 |
peter |
185 |
iterator lower=m.begin(); |
2638 |
10 Nov 11 |
peter |
186 |
size_t n=0; |
2638 |
10 Nov 11 |
peter |
187 |
while (lower!=m.end()) { |
2638 |
10 Nov 11 |
peter |
188 |
iterator upper = lower; |
2638 |
10 Nov 11 |
peter |
189 |
Averager averager; |
2638 |
10 Nov 11 |
peter |
190 |
while (upper->first == lower->first) { |
2638 |
10 Nov 11 |
peter |
191 |
++n; |
2638 |
10 Nov 11 |
peter |
192 |
averager.add(n); |
2638 |
10 Nov 11 |
peter |
193 |
++upper; |
2638 |
10 Nov 11 |
peter |
194 |
} |
2638 |
10 Nov 11 |
peter |
195 |
for (; lower!=upper; ++lower) |
2638 |
10 Nov 11 |
peter |
196 |
lower->second->rank(x) = averager.mean(); |
2638 |
10 Nov 11 |
peter |
197 |
} |
2638 |
10 Nov 11 |
peter |
198 |
} |
2638 |
10 Nov 11 |
peter |
199 |
|
2645 |
14 Nov 11 |
peter |
200 |
AveragerPair ap_; |
2638 |
10 Nov 11 |
peter |
201 |
List list_; |
2638 |
10 Nov 11 |
peter |
202 |
std::multimap<double, List::iterator> x_; |
2638 |
10 Nov 11 |
peter |
203 |
std::multimap<double, List::iterator> y_; |
2642 |
10 Nov 11 |
peter |
204 |
bool have_ties_; |
2642 |
10 Nov 11 |
peter |
205 |
|
2642 |
10 Nov 11 |
peter |
206 |
private: |
2642 |
10 Nov 11 |
peter |
207 |
Pimpl& operator=(const Pimpl& rhs); |
2642 |
10 Nov 11 |
peter |
208 |
Pimpl(Pimpl&); |
2638 |
10 Nov 11 |
peter |
209 |
}; |
2638 |
10 Nov 11 |
peter |
210 |
|
2638 |
10 Nov 11 |
peter |
211 |
|
2640 |
10 Nov 11 |
peter |
// Spearman class |
2640 |
10 Nov 11 |
peter |
213 |
|
2638 |
10 Nov 11 |
peter |
214 |
Spearman::Spearman(void) |
2638 |
10 Nov 11 |
peter |
215 |
: pimpl_(new Pimpl) |
2638 |
10 Nov 11 |
peter |
216 |
{ |
2635 |
09 Nov 11 |
peter |
217 |
} |
2635 |
09 Nov 11 |
peter |
218 |
|
2635 |
09 Nov 11 |
peter |
219 |
|
2638 |
10 Nov 11 |
peter |
220 |
Spearman::~Spearman(void) |
2638 |
10 Nov 11 |
peter |
221 |
{ |
2638 |
10 Nov 11 |
peter |
222 |
delete pimpl_; |
2638 |
10 Nov 11 |
peter |
223 |
} |
2638 |
10 Nov 11 |
peter |
224 |
|
2638 |
10 Nov 11 |
peter |
225 |
|
2640 |
10 Nov 11 |
peter |
226 |
void Spearman::add(double x, double y) |
2638 |
10 Nov 11 |
peter |
227 |
{ |
2638 |
10 Nov 11 |
peter |
228 |
pimpl_->add(x, y); |
2638 |
10 Nov 11 |
peter |
229 |
} |
2638 |
10 Nov 11 |
peter |
230 |
|
2638 |
10 Nov 11 |
peter |
231 |
|
2648 |
14 Nov 11 |
peter |
232 |
size_t Spearman::n(void) const |
2635 |
09 Nov 11 |
peter |
233 |
{ |
2648 |
14 Nov 11 |
peter |
234 |
return pimpl_->n(); |
2635 |
09 Nov 11 |
peter |
235 |
} |
2635 |
09 Nov 11 |
peter |
236 |
|
2637 |
10 Nov 11 |
peter |
237 |
|
2637 |
10 Nov 11 |
peter |
238 |
double Spearman::p_left(bool exact) const |
2637 |
10 Nov 11 |
peter |
239 |
{ |
2642 |
10 Nov 11 |
peter |
240 |
if (pimpl_->n()<2) |
2642 |
10 Nov 11 |
peter |
241 |
return std::numeric_limits<double>::quiet_NaN(); |
2642 |
10 Nov 11 |
peter |
242 |
if (!exact) |
2642 |
10 Nov 11 |
peter |
243 |
return pearson_p_value(-score(), pimpl_->n()); |
2642 |
10 Nov 11 |
peter |
244 |
return pimpl_->p_exact(false, true); |
2637 |
10 Nov 11 |
peter |
245 |
} |
2637 |
10 Nov 11 |
peter |
246 |
|
2637 |
10 Nov 11 |
peter |
247 |
|
2637 |
10 Nov 11 |
peter |
248 |
double Spearman::p_right(bool exact) const |
2637 |
10 Nov 11 |
peter |
249 |
{ |
2642 |
10 Nov 11 |
peter |
250 |
if (pimpl_->n()<2) |
2642 |
10 Nov 11 |
peter |
251 |
return std::numeric_limits<double>::quiet_NaN(); |
2642 |
10 Nov 11 |
peter |
252 |
if (!exact) { |
2642 |
10 Nov 11 |
peter |
253 |
return pearson_p_value(score(), pimpl_->n()); |
2642 |
10 Nov 11 |
peter |
254 |
} |
2642 |
10 Nov 11 |
peter |
255 |
return pimpl_->p_exact(true, false); |
2637 |
10 Nov 11 |
peter |
256 |
} |
2637 |
10 Nov 11 |
peter |
257 |
|
2637 |
10 Nov 11 |
peter |
258 |
|
2637 |
10 Nov 11 |
peter |
259 |
double Spearman::p_value(bool exact) const |
2637 |
10 Nov 11 |
peter |
260 |
{ |
2642 |
10 Nov 11 |
peter |
261 |
if (exact) |
2642 |
10 Nov 11 |
peter |
262 |
return pimpl_->p_exact(true, true); |
2642 |
10 Nov 11 |
peter |
263 |
if (score()>0.0) |
2642 |
10 Nov 11 |
peter |
264 |
return 2*p_right(exact); |
2642 |
10 Nov 11 |
peter |
265 |
return 2*p_left(exact); |
2637 |
10 Nov 11 |
peter |
266 |
} |
2637 |
10 Nov 11 |
peter |
267 |
|
2637 |
10 Nov 11 |
peter |
268 |
|
2637 |
10 Nov 11 |
peter |
269 |
void Spearman::reset(void) |
2637 |
10 Nov 11 |
peter |
270 |
{ |
2642 |
10 Nov 11 |
peter |
271 |
pimpl_->reset(); |
2637 |
10 Nov 11 |
peter |
272 |
} |
2637 |
10 Nov 11 |
peter |
273 |
|
2648 |
14 Nov 11 |
peter |
274 |
|
2648 |
14 Nov 11 |
peter |
275 |
double Spearman::score(void) const |
2648 |
14 Nov 11 |
peter |
276 |
{ |
2648 |
14 Nov 11 |
peter |
277 |
return pimpl_->score(); |
2648 |
14 Nov 11 |
peter |
278 |
} |
2648 |
14 Nov 11 |
peter |
279 |
|
2635 |
09 Nov 11 |
peter |
280 |
}}} // of namespace statistics, yat, and theplu |