1708 |
13 Jan 09 |
jari |
// $Id$ |
1708 |
13 Jan 09 |
jari |
2 |
|
1708 |
13 Jan 09 |
jari |
3 |
/* |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2009 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2010, 2012 Peter Johansson |
1708 |
13 Jan 09 |
jari |
6 |
|
1708 |
13 Jan 09 |
jari |
This file is part of the yat library, http://dev.thep.lu.se/yat |
1708 |
13 Jan 09 |
jari |
8 |
|
1708 |
13 Jan 09 |
jari |
The yat library is free software; you can redistribute it and/or |
1708 |
13 Jan 09 |
jari |
modify it under the terms of the GNU General Public License as |
1708 |
13 Jan 09 |
jari |
published by the Free Software Foundation; either version 3 of the |
1708 |
13 Jan 09 |
jari |
License, or (at your option) any later version. |
1708 |
13 Jan 09 |
jari |
13 |
|
1708 |
13 Jan 09 |
jari |
The yat library is distributed in the hope that it will be useful, |
1708 |
13 Jan 09 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
1708 |
13 Jan 09 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
1708 |
13 Jan 09 |
jari |
General Public License for more details. |
1708 |
13 Jan 09 |
jari |
18 |
|
1708 |
13 Jan 09 |
jari |
You should have received a copy of the GNU General Public License |
1708 |
13 Jan 09 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
1708 |
13 Jan 09 |
jari |
21 |
*/ |
1708 |
13 Jan 09 |
jari |
22 |
|
2881 |
18 Nov 12 |
peter |
23 |
#include <config.h> |
2881 |
18 Nov 12 |
peter |
24 |
|
1708 |
13 Jan 09 |
jari |
25 |
#include "qQuantileNormalizer.h" |
1708 |
13 Jan 09 |
jari |
26 |
|
1708 |
13 Jan 09 |
jari |
27 |
#include "yat/regression/CSplineInterpolation.h" |
1708 |
13 Jan 09 |
jari |
28 |
#include "yat/statistics/Averager.h" |
1738 |
20 Jan 09 |
peter |
29 |
#include "yat/statistics/AveragerWeighted.h" |
1738 |
20 Jan 09 |
peter |
30 |
#include "yat/utility/DataWeight.h" |
2210 |
05 Mar 10 |
peter |
31 |
#include "yat/utility/Exception.h" |
1712 |
13 Jan 09 |
jari |
32 |
#include "yat/utility/Vector.h" |
1712 |
13 Jan 09 |
jari |
33 |
#include "yat/utility/VectorBase.h" |
1738 |
20 Jan 09 |
peter |
34 |
#include "yat/utility/WeightIterator.h" |
1708 |
13 Jan 09 |
jari |
35 |
|
1708 |
13 Jan 09 |
jari |
36 |
#include <algorithm> |
1708 |
13 Jan 09 |
jari |
37 |
#include <cassert> |
1738 |
20 Jan 09 |
peter |
38 |
#include <numeric> |
1738 |
20 Jan 09 |
peter |
39 |
#include <sstream> |
1738 |
20 Jan 09 |
peter |
40 |
#include <stdexcept> |
1738 |
20 Jan 09 |
peter |
41 |
#include <string> |
1738 |
20 Jan 09 |
peter |
42 |
#include <vector> |
1708 |
13 Jan 09 |
jari |
43 |
|
1708 |
13 Jan 09 |
jari |
44 |
namespace theplu { |
1708 |
13 Jan 09 |
jari |
45 |
namespace yat { |
1708 |
13 Jan 09 |
jari |
46 |
namespace normalizer { |
1708 |
13 Jan 09 |
jari |
47 |
|
1708 |
13 Jan 09 |
jari |
48 |
|
4200 |
19 Aug 22 |
peter |
49 |
void |
1736 |
19 Jan 09 |
peter |
50 |
qQuantileNormalizer::Partitioner::init(const utility::VectorBase& sortedvec, |
1736 |
19 Jan 09 |
peter |
51 |
unsigned int N) |
1708 |
13 Jan 09 |
jari |
52 |
{ |
1708 |
13 Jan 09 |
jari |
53 |
assert(N>1); |
1736 |
19 Jan 09 |
peter |
54 |
assert(N<=sortedvec.size()); |
1736 |
19 Jan 09 |
peter |
55 |
double range=static_cast<double>(sortedvec.size())/N; |
1708 |
13 Jan 09 |
jari |
56 |
assert(range); |
1713 |
13 Jan 09 |
jari |
57 |
unsigned int start=0; |
1709 |
13 Jan 09 |
jari |
58 |
for (unsigned int i=0; i<N; ++i) { |
1713 |
13 Jan 09 |
jari |
59 |
unsigned int end = ( i==(N-1) ? sortedvec.size() : |
1713 |
13 Jan 09 |
jari |
60 |
static_cast<unsigned int>((i+1)*range) ); |
1708 |
13 Jan 09 |
jari |
61 |
statistics::Averager av; |
1713 |
13 Jan 09 |
jari |
62 |
for (unsigned int r=start; r<end; ++r) |
1708 |
13 Jan 09 |
jari |
63 |
av.add(sortedvec(r)); |
1735 |
16 Jan 09 |
jari |
64 |
average_(i) = av.mean(); |
1813 |
20 Feb 09 |
peter |
65 |
quantiles_(i) = 0.5*(end+start); |
1713 |
13 Jan 09 |
jari |
66 |
start=end; |
1708 |
13 Jan 09 |
jari |
67 |
} |
1813 |
20 Feb 09 |
peter |
// rescale quantiles to be in range (0,1) |
1813 |
20 Feb 09 |
peter |
69 |
quantiles_ *= 1.0/sortedvec.size(); |
1708 |
13 Jan 09 |
jari |
70 |
} |
1708 |
13 Jan 09 |
jari |
71 |
|
1708 |
13 Jan 09 |
jari |
72 |
|
1738 |
20 Jan 09 |
peter |
73 |
void qQuantileNormalizer::Partitioner::init |
1738 |
20 Jan 09 |
peter |
74 |
(const std::vector<utility::DataWeight>& sortedvec, unsigned int N) |
1738 |
20 Jan 09 |
peter |
75 |
{ |
1738 |
20 Jan 09 |
peter |
76 |
assert(N>1); |
1738 |
20 Jan 09 |
peter |
77 |
assert(N<=sortedvec.size()); |
1738 |
20 Jan 09 |
peter |
78 |
double total_w = std::accumulate(utility::weight_iterator(sortedvec.begin()), |
4200 |
19 Aug 22 |
peter |
79 |
utility::weight_iterator(sortedvec.end()), |
1738 |
20 Jan 09 |
peter |
80 |
0.0); |
1738 |
20 Jan 09 |
peter |
81 |
|
1738 |
20 Jan 09 |
peter |
82 |
assert(total_w); |
1738 |
20 Jan 09 |
peter |
83 |
double sum_w = 0; |
1738 |
20 Jan 09 |
peter |
84 |
std::vector<utility::DataWeight>::const_iterator iter(sortedvec.begin()); |
1738 |
20 Jan 09 |
peter |
85 |
for (unsigned int i=0; i<N; ++i) { |
1738 |
20 Jan 09 |
peter |
86 |
statistics::AveragerWeighted av; |
4200 |
19 Aug 22 |
peter |
87 |
double end_sum_w = (i+1) * total_w / N - sum_w; |
1738 |
20 Jan 09 |
peter |
88 |
if (i!=N-1) { |
1778 |
06 Feb 09 |
peter |
89 |
while(av.sum_w() + iter->weight() <= end_sum_w) { |
1738 |
20 Jan 09 |
peter |
90 |
av.add(iter->data(), iter->weight()); |
1738 |
20 Jan 09 |
peter |
91 |
++iter; |
1738 |
20 Jan 09 |
peter |
92 |
} |
1738 |
20 Jan 09 |
peter |
93 |
} |
1738 |
20 Jan 09 |
peter |
// use all remaining data for last bin (to avoid problems |
1738 |
20 Jan 09 |
peter |
// due to rounding errors) |
4200 |
19 Aug 22 |
peter |
96 |
else |
1738 |
20 Jan 09 |
peter |
97 |
add(av, iter, sortedvec.end()); |
1738 |
20 Jan 09 |
peter |
98 |
|
1738 |
20 Jan 09 |
peter |
99 |
if (av.sum_w() == 0) { |
1738 |
20 Jan 09 |
peter |
100 |
std::stringstream ss; |
1738 |
20 Jan 09 |
peter |
101 |
ss << "yat::normalizer::qQuantileNormalizer: relative weight too " |
2103 |
06 Nov 09 |
peter |
102 |
<< "large. See qQuantileNormalizer constructor documentation " |
2103 |
06 Nov 09 |
peter |
103 |
<< "for details on weights.\n"; |
2210 |
05 Mar 10 |
peter |
104 |
throw utility::runtime_error(ss.str()); |
1738 |
20 Jan 09 |
peter |
105 |
} |
1738 |
20 Jan 09 |
peter |
106 |
average_(i) = av.mean(); |
1813 |
20 Feb 09 |
peter |
107 |
quantiles_(i) = (sum_w + 0.5*av.sum_w())/total_w; |
1738 |
20 Jan 09 |
peter |
108 |
sum_w += av.sum_w(); |
1738 |
20 Jan 09 |
peter |
109 |
} |
1738 |
20 Jan 09 |
peter |
110 |
} |
1738 |
20 Jan 09 |
peter |
111 |
|
1738 |
20 Jan 09 |
peter |
112 |
|
1716 |
13 Jan 09 |
jari |
113 |
const utility::Vector& qQuantileNormalizer::Partitioner::averages(void) const |
1708 |
13 Jan 09 |
jari |
114 |
{ |
1708 |
13 Jan 09 |
jari |
115 |
return average_; |
1708 |
13 Jan 09 |
jari |
116 |
} |
1708 |
13 Jan 09 |
jari |
117 |
|
1708 |
13 Jan 09 |
jari |
118 |
|
1813 |
20 Feb 09 |
peter |
119 |
const utility::Vector& qQuantileNormalizer::Partitioner::quantiles(void) const |
1708 |
13 Jan 09 |
jari |
120 |
{ |
1813 |
20 Feb 09 |
peter |
121 |
return quantiles_; |
1708 |
13 Jan 09 |
jari |
122 |
} |
1708 |
13 Jan 09 |
jari |
123 |
|
1708 |
13 Jan 09 |
jari |
124 |
|
1716 |
13 Jan 09 |
jari |
125 |
size_t qQuantileNormalizer::Partitioner::size(void) const |
1708 |
13 Jan 09 |
jari |
126 |
{ |
1709 |
13 Jan 09 |
jari |
127 |
return average_.size(); |
1708 |
13 Jan 09 |
jari |
128 |
} |
1708 |
13 Jan 09 |
jari |
129 |
|
1708 |
13 Jan 09 |
jari |
130 |
}}} // end of namespace normalizer, yat and thep |