2998 |
14 Mar 13 |
peter |
// $Id$ |
2998 |
14 Mar 13 |
peter |
2 |
|
2998 |
14 Mar 13 |
peter |
3 |
/* |
2998 |
14 Mar 13 |
peter |
Copyright (C) 2013 Peter Johansson |
2998 |
14 Mar 13 |
peter |
5 |
|
2998 |
14 Mar 13 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
2998 |
14 Mar 13 |
peter |
7 |
|
2998 |
14 Mar 13 |
peter |
The yat library is free software; you can redistribute it and/or |
2998 |
14 Mar 13 |
peter |
modify it under the terms of the GNU General Public License as |
2998 |
14 Mar 13 |
peter |
published by the Free Software Foundation; either version 3 of the |
2998 |
14 Mar 13 |
peter |
License, or (at your option) any later version. |
2998 |
14 Mar 13 |
peter |
12 |
|
2998 |
14 Mar 13 |
peter |
The yat library is distributed in the hope that it will be useful, |
2998 |
14 Mar 13 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
2998 |
14 Mar 13 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2998 |
14 Mar 13 |
peter |
General Public License for more details. |
2998 |
14 Mar 13 |
peter |
17 |
|
2998 |
14 Mar 13 |
peter |
You should have received a copy of the GNU General Public License |
2998 |
14 Mar 13 |
peter |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
2998 |
14 Mar 13 |
peter |
20 |
*/ |
2998 |
14 Mar 13 |
peter |
21 |
|
2998 |
14 Mar 13 |
peter |
22 |
#include <config.h> |
2998 |
14 Mar 13 |
peter |
23 |
|
2998 |
14 Mar 13 |
peter |
24 |
#include "KolmogorovSmirnovOneSample.h" |
2998 |
14 Mar 13 |
peter |
25 |
|
2998 |
14 Mar 13 |
peter |
26 |
#include "yat/utility/Exception.h" |
2998 |
14 Mar 13 |
peter |
27 |
|
2998 |
14 Mar 13 |
peter |
28 |
#include <cassert> |
2998 |
14 Mar 13 |
peter |
29 |
#include <cmath> |
2998 |
14 Mar 13 |
peter |
30 |
#include <sstream> |
2998 |
14 Mar 13 |
peter |
31 |
|
2998 |
14 Mar 13 |
peter |
32 |
#include <iostream> |
2998 |
14 Mar 13 |
peter |
33 |
|
2998 |
14 Mar 13 |
peter |
34 |
namespace theplu { |
2998 |
14 Mar 13 |
peter |
35 |
namespace yat { |
2998 |
14 Mar 13 |
peter |
36 |
namespace statistics { |
2998 |
14 Mar 13 |
peter |
37 |
|
2998 |
14 Mar 13 |
peter |
38 |
KolmogorovSmirnovOneSample::KolmogorovSmirnovOneSample(void) |
2998 |
14 Mar 13 |
peter |
39 |
: sum_w_(0) |
2998 |
14 Mar 13 |
peter |
40 |
{ |
2998 |
14 Mar 13 |
peter |
41 |
} |
2998 |
14 Mar 13 |
peter |
42 |
|
2998 |
14 Mar 13 |
peter |
43 |
|
2998 |
14 Mar 13 |
peter |
44 |
void KolmogorovSmirnovOneSample::add(double value, double weight) |
2998 |
14 Mar 13 |
peter |
45 |
{ |
2998 |
14 Mar 13 |
peter |
46 |
if (weight==0) |
2998 |
14 Mar 13 |
peter |
47 |
return; |
2998 |
14 Mar 13 |
peter |
48 |
cached_ = false; |
2998 |
14 Mar 13 |
peter |
49 |
data_.insert(std::make_pair(value, weight)); |
2998 |
14 Mar 13 |
peter |
50 |
sum_w_ += weight; |
2998 |
14 Mar 13 |
peter |
51 |
} |
2998 |
14 Mar 13 |
peter |
52 |
|
2998 |
14 Mar 13 |
peter |
53 |
|
2998 |
14 Mar 13 |
peter |
54 |
double KolmogorovSmirnovOneSample::p_value(void) const |
2998 |
14 Mar 13 |
peter |
55 |
{ |
2998 |
14 Mar 13 |
peter |
56 |
double res=0; |
2998 |
14 Mar 13 |
peter |
57 |
double res2=0; |
2998 |
14 Mar 13 |
peter |
58 |
double s2 = score() * score() * sum_w_; |
2998 |
14 Mar 13 |
peter |
59 |
int sign = 1; |
2998 |
14 Mar 13 |
peter |
60 |
for (size_t k = 1; k<100; ++k) { |
2998 |
14 Mar 13 |
peter |
61 |
res += sign * 2 * exp(-2.0 * k * k * s2); |
2998 |
14 Mar 13 |
peter |
62 |
sign *= -1; |
2998 |
14 Mar 13 |
peter |
// ignore remaining terms as they are small |
2998 |
14 Mar 13 |
peter |
64 |
if (res==res2) |
2998 |
14 Mar 13 |
peter |
65 |
return res; |
2998 |
14 Mar 13 |
peter |
66 |
res2 = res; |
2998 |
14 Mar 13 |
peter |
67 |
} |
2998 |
14 Mar 13 |
peter |
68 |
|
2998 |
14 Mar 13 |
peter |
69 |
return res; |
2998 |
14 Mar 13 |
peter |
70 |
} |
2998 |
14 Mar 13 |
peter |
71 |
|
2998 |
14 Mar 13 |
peter |
72 |
|
2998 |
14 Mar 13 |
peter |
73 |
void KolmogorovSmirnovOneSample::remove(double value, double weight) |
2998 |
14 Mar 13 |
peter |
74 |
{ |
2998 |
14 Mar 13 |
peter |
75 |
if (!weight) |
2998 |
14 Mar 13 |
peter |
76 |
return; |
2998 |
14 Mar 13 |
peter |
77 |
typedef std::multimap<double, double>::iterator iterator; |
2998 |
14 Mar 13 |
peter |
78 |
std::pair<iterator, iterator> range = data_.equal_range(value); |
2998 |
14 Mar 13 |
peter |
79 |
for ( ; range.first!=range.second; ++range.first) { |
2998 |
14 Mar 13 |
peter |
80 |
if (range.first->second == weight) { |
2998 |
14 Mar 13 |
peter |
81 |
data_.erase(range.first); |
2998 |
14 Mar 13 |
peter |
82 |
sum_w_ -= weight; |
2998 |
14 Mar 13 |
peter |
83 |
cached_ = false; |
2998 |
14 Mar 13 |
peter |
84 |
return; |
2998 |
14 Mar 13 |
peter |
85 |
} |
2998 |
14 Mar 13 |
peter |
86 |
} |
2998 |
14 Mar 13 |
peter |
87 |
std::ostringstream ss; |
2998 |
14 Mar 13 |
peter |
88 |
ss << "KolmogorovSmirnovOneSample::remove: " << value << " " << weight; |
2998 |
14 Mar 13 |
peter |
89 |
throw utility::runtime_error(ss.str()); |
2998 |
14 Mar 13 |
peter |
90 |
} |
2998 |
14 Mar 13 |
peter |
91 |
|
2998 |
14 Mar 13 |
peter |
92 |
|
2998 |
14 Mar 13 |
peter |
93 |
void KolmogorovSmirnovOneSample::reset(void) |
2998 |
14 Mar 13 |
peter |
94 |
{ |
2998 |
14 Mar 13 |
peter |
95 |
data_.clear(); |
2998 |
14 Mar 13 |
peter |
96 |
sum_w_ = 0; |
2998 |
14 Mar 13 |
peter |
97 |
} |
2998 |
14 Mar 13 |
peter |
98 |
|
2998 |
14 Mar 13 |
peter |
99 |
|
2998 |
14 Mar 13 |
peter |
100 |
double KolmogorovSmirnovOneSample::score(void) const |
2998 |
14 Mar 13 |
peter |
101 |
{ |
2998 |
14 Mar 13 |
peter |
102 |
return std::abs(signed_score()); |
2998 |
14 Mar 13 |
peter |
103 |
} |
2998 |
14 Mar 13 |
peter |
104 |
|
2998 |
14 Mar 13 |
peter |
105 |
|
2998 |
14 Mar 13 |
peter |
106 |
double KolmogorovSmirnovOneSample::signed_score(void) const |
2998 |
14 Mar 13 |
peter |
107 |
{ |
2998 |
14 Mar 13 |
peter |
108 |
if (cached_) |
2998 |
14 Mar 13 |
peter |
109 |
return score_; |
2998 |
14 Mar 13 |
peter |
110 |
score_ = 0.0; |
2998 |
14 Mar 13 |
peter |
111 |
if (data_.empty()) |
2998 |
14 Mar 13 |
peter |
112 |
return 0.0; |
2998 |
14 Mar 13 |
peter |
113 |
|
2998 |
14 Mar 13 |
peter |
114 |
typedef std::multimap<double, double>::const_iterator iterator; |
2998 |
14 Mar 13 |
peter |
115 |
iterator iter = data_.begin(); |
2998 |
14 Mar 13 |
peter |
116 |
double w = 0; |
3002 |
20 Mar 13 |
peter |
// Finding the max value of |F(x)-x|, where F(x) is empirical |
3002 |
20 Mar 13 |
peter |
// cumulative distribution. Since F is stepwise constant and |
3002 |
20 Mar 13 |
peter |
// right-continuous by definition we only need to look at the end |
3002 |
20 Mar 13 |
peter |
// of every step, i.e., at x_i and x_i - dx. |
2998 |
14 Mar 13 |
peter |
121 |
while (iter != data_.end()) { |
2998 |
14 Mar 13 |
peter |
122 |
double x = iter->first; |
3002 |
20 Mar 13 |
peter |
// calculate |F(x-dx) - (x-dx)| |
3002 |
20 Mar 13 |
peter |
124 |
double s = w / sum_w_ - x; |
3002 |
20 Mar 13 |
peter |
125 |
if (std::abs(s) > std::abs(score_)) |
3002 |
20 Mar 13 |
peter |
126 |
score_ = s; |
2998 |
14 Mar 13 |
peter |
127 |
while (iter!=data_.end() && iter->first==x) { |
2998 |
14 Mar 13 |
peter |
128 |
w += iter->second; |
2998 |
14 Mar 13 |
peter |
129 |
++iter; |
2998 |
14 Mar 13 |
peter |
130 |
} |
3002 |
20 Mar 13 |
peter |
// calculate |F(x) - x| |
3002 |
20 Mar 13 |
peter |
132 |
s = w / sum_w_ - x; |
2998 |
14 Mar 13 |
peter |
133 |
if (std::abs(s) > std::abs(score_)) |
2998 |
14 Mar 13 |
peter |
134 |
score_ = s; |
2998 |
14 Mar 13 |
peter |
135 |
} |
2998 |
14 Mar 13 |
peter |
136 |
|
2998 |
14 Mar 13 |
peter |
137 |
cached_ = true; |
2998 |
14 Mar 13 |
peter |
138 |
return score_; |
2998 |
14 Mar 13 |
peter |
139 |
} |
2998 |
14 Mar 13 |
peter |
140 |
|
2998 |
14 Mar 13 |
peter |
141 |
}}} // of namespace theplu yat statistics |