4185 |
04 Jul 22 |
peter |
// $Id$ |
4185 |
04 Jul 22 |
peter |
2 |
|
4185 |
04 Jul 22 |
peter |
3 |
/* |
4185 |
04 Jul 22 |
peter |
Copyright (C) 2022 Peter Johansson |
4185 |
04 Jul 22 |
peter |
5 |
|
4185 |
04 Jul 22 |
peter |
This file is part of the yat library, https://dev.thep.lu.se/yat |
4185 |
04 Jul 22 |
peter |
7 |
|
4185 |
04 Jul 22 |
peter |
The yat library is free software; you can redistribute it and/or |
4185 |
04 Jul 22 |
peter |
modify it under the terms of the GNU General Public License as |
4185 |
04 Jul 22 |
peter |
published by the Free Software Foundation; either version 3 of the |
4185 |
04 Jul 22 |
peter |
License, or (at your option) any later version. |
4185 |
04 Jul 22 |
peter |
12 |
|
4185 |
04 Jul 22 |
peter |
The yat library is distributed in the hope that it will be useful, |
4185 |
04 Jul 22 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
4185 |
04 Jul 22 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
4185 |
04 Jul 22 |
peter |
General Public License for more details. |
4185 |
04 Jul 22 |
peter |
17 |
|
4185 |
04 Jul 22 |
peter |
You should have received a copy of the GNU General Public License |
4185 |
04 Jul 22 |
peter |
along with yat. If not, see <https://www.gnu.org/licenses/>. |
4185 |
04 Jul 22 |
peter |
20 |
*/ |
4185 |
04 Jul 22 |
peter |
21 |
|
4185 |
04 Jul 22 |
peter |
22 |
#include <config.h> |
4185 |
04 Jul 22 |
peter |
23 |
|
4185 |
04 Jul 22 |
peter |
24 |
#include "QuantileNormalizer2.h" |
4185 |
04 Jul 22 |
peter |
25 |
|
4185 |
04 Jul 22 |
peter |
26 |
#include "yat/statistics/Averager.h" |
4185 |
04 Jul 22 |
peter |
27 |
#include "yat/utility/MatrixMutable.h" |
4185 |
04 Jul 22 |
peter |
28 |
|
4185 |
04 Jul 22 |
peter |
29 |
#include <vector> |
4185 |
04 Jul 22 |
peter |
30 |
#include <cassert> |
4185 |
04 Jul 22 |
peter |
31 |
|
4188 |
18 Jul 22 |
peter |
32 |
#include <iostream> |
4185 |
04 Jul 22 |
peter |
33 |
using std::vector; |
4185 |
04 Jul 22 |
peter |
34 |
|
4185 |
04 Jul 22 |
peter |
35 |
namespace theplu { |
4185 |
04 Jul 22 |
peter |
36 |
namespace yat { |
4185 |
04 Jul 22 |
peter |
37 |
namespace normalizer { |
4185 |
04 Jul 22 |
peter |
38 |
|
4189 |
18 Jul 22 |
peter |
39 |
using regression::LinearInterpolation; |
4189 |
18 Jul 22 |
peter |
40 |
|
4189 |
18 Jul 22 |
peter |
41 |
QuantileNormalizer2::QuantileNormalizer2(void) |
4189 |
18 Jul 22 |
peter |
42 |
{} |
4189 |
18 Jul 22 |
peter |
43 |
|
4189 |
18 Jul 22 |
peter |
44 |
|
4189 |
18 Jul 22 |
peter |
// interpolator_ is built lazily when needed |
4189 |
18 Jul 22 |
peter |
46 |
QuantileNormalizer2::QuantileNormalizer2(const QuantileNormalizer2& other) |
4189 |
18 Jul 22 |
peter |
47 |
: distribution_(other.distribution_) |
4189 |
18 Jul 22 |
peter |
48 |
{} |
4189 |
18 Jul 22 |
peter |
49 |
|
4189 |
18 Jul 22 |
peter |
50 |
|
4189 |
18 Jul 22 |
peter |
51 |
QuantileNormalizer2::QuantileNormalizer2(QuantileNormalizer2&& other) |
4189 |
18 Jul 22 |
peter |
52 |
: distribution_(std::move(other.distribution_)), |
4189 |
18 Jul 22 |
peter |
53 |
interpolator_(std::move(other.interpolator_)) |
4189 |
18 Jul 22 |
peter |
54 |
{} |
4189 |
18 Jul 22 |
peter |
55 |
|
4189 |
18 Jul 22 |
peter |
56 |
|
4189 |
18 Jul 22 |
peter |
57 |
QuantileNormalizer2& |
4189 |
18 Jul 22 |
peter |
58 |
QuantileNormalizer2::operator=(const QuantileNormalizer2& rhs) |
4189 |
18 Jul 22 |
peter |
59 |
{ |
4189 |
18 Jul 22 |
peter |
60 |
QuantileNormalizer2 tmp(rhs); |
4189 |
18 Jul 22 |
peter |
61 |
swap(tmp, *this); |
4189 |
18 Jul 22 |
peter |
62 |
return *this; |
4189 |
18 Jul 22 |
peter |
63 |
} |
4189 |
18 Jul 22 |
peter |
64 |
|
4189 |
18 Jul 22 |
peter |
65 |
|
4189 |
18 Jul 22 |
peter |
66 |
QuantileNormalizer2& |
4189 |
18 Jul 22 |
peter |
67 |
QuantileNormalizer2::operator=(QuantileNormalizer2&& rhs) |
4189 |
18 Jul 22 |
peter |
68 |
{ |
4189 |
18 Jul 22 |
peter |
69 |
QuantileNormalizer2 tmp(std::move(rhs)); |
4189 |
18 Jul 22 |
peter |
70 |
swap(tmp, *this); |
4189 |
18 Jul 22 |
peter |
71 |
return *this; |
4189 |
18 Jul 22 |
peter |
72 |
} |
4189 |
18 Jul 22 |
peter |
73 |
|
4189 |
18 Jul 22 |
peter |
74 |
|
4185 |
04 Jul 22 |
peter |
75 |
void QuantileNormalizer2::operator()(const utility::MatrixBase& input, |
4185 |
04 Jul 22 |
peter |
76 |
utility::MatrixMutable& result) |
4185 |
04 Jul 22 |
peter |
77 |
{ |
4185 |
04 Jul 22 |
peter |
78 |
assert(input.rows()==result.rows()); |
4185 |
04 Jul 22 |
peter |
79 |
assert(input.columns()==result.columns()); |
4185 |
04 Jul 22 |
peter |
80 |
|
4185 |
04 Jul 22 |
peter |
81 |
vector<vector<size_t> > index; |
4185 |
04 Jul 22 |
peter |
82 |
create_index(input, index); |
4185 |
04 Jul 22 |
peter |
83 |
train(input, index); |
4185 |
04 Jul 22 |
peter |
84 |
normalize(result, index); |
4185 |
04 Jul 22 |
peter |
85 |
} |
4185 |
04 Jul 22 |
peter |
86 |
|
4185 |
04 Jul 22 |
peter |
87 |
|
4185 |
04 Jul 22 |
peter |
88 |
void QuantileNormalizer2::create_index(const utility::MatrixBase& X, |
4185 |
04 Jul 22 |
peter |
89 |
vector<vector<size_t>>& index) const |
4185 |
04 Jul 22 |
peter |
90 |
{ |
4185 |
04 Jul 22 |
peter |
91 |
index.resize(X.columns()); |
4185 |
04 Jul 22 |
peter |
92 |
for (size_t column=0; column<X.columns(); ++column) { |
4185 |
04 Jul 22 |
peter |
93 |
assert(column<index.size()); |
4185 |
04 Jul 22 |
peter |
94 |
utility::sort_index(index[column], X.column_const_view(column)); |
4185 |
04 Jul 22 |
peter |
95 |
} |
4185 |
04 Jul 22 |
peter |
96 |
} |
4185 |
04 Jul 22 |
peter |
97 |
|
4185 |
04 Jul 22 |
peter |
98 |
|
4189 |
18 Jul 22 |
peter |
99 |
LinearInterpolation& QuantileNormalizer2::interpolator(void) const |
4185 |
04 Jul 22 |
peter |
100 |
{ |
4189 |
18 Jul 22 |
peter |
101 |
if (!interpolator_) { |
4189 |
18 Jul 22 |
peter |
102 |
utility::Vector x(distribution_.size()); |
4189 |
18 Jul 22 |
peter |
103 |
for (size_t i=0; i<x.size(); ++i) |
4189 |
18 Jul 22 |
peter |
104 |
x(i) = (i+0.5)/x.size(); |
4189 |
18 Jul 22 |
peter |
105 |
interpolator_.reset(new LinearInterpolation(x, distribution_)); |
4189 |
18 Jul 22 |
peter |
106 |
} |
4189 |
18 Jul 22 |
peter |
107 |
assert(interpolator_); |
4189 |
18 Jul 22 |
peter |
108 |
return *interpolator_; |
4185 |
04 Jul 22 |
peter |
109 |
} |
4185 |
04 Jul 22 |
peter |
110 |
|
4185 |
04 Jul 22 |
peter |
111 |
|
4185 |
04 Jul 22 |
peter |
112 |
void QuantileNormalizer2::normalize(const utility::VectorBase& input, |
4185 |
04 Jul 22 |
peter |
113 |
utility::VectorMutable& result) const |
4185 |
04 Jul 22 |
peter |
114 |
{ |
4185 |
04 Jul 22 |
peter |
115 |
assert(input.size()==result.size()); |
4185 |
04 Jul 22 |
peter |
116 |
|
4185 |
04 Jul 22 |
peter |
117 |
vector<size_t> index; |
4185 |
04 Jul 22 |
peter |
118 |
utility::sort_index(index, input); |
4189 |
18 Jul 22 |
peter |
119 |
if (distribution_.size() == input.size()) |
4189 |
18 Jul 22 |
peter |
120 |
normalize(result, index); |
4189 |
18 Jul 22 |
peter |
121 |
else |
4189 |
18 Jul 22 |
peter |
122 |
normalize(result, index, interpolator()); |
4185 |
04 Jul 22 |
peter |
123 |
} |
4185 |
04 Jul 22 |
peter |
124 |
|
4185 |
04 Jul 22 |
peter |
125 |
|
4185 |
04 Jul 22 |
peter |
126 |
void |
4185 |
04 Jul 22 |
peter |
127 |
QuantileNormalizer2::normalize(utility::VectorMutable& result, |
4185 |
04 Jul 22 |
peter |
128 |
const vector<size_t>& index) const |
4185 |
04 Jul 22 |
peter |
129 |
{ |
4185 |
04 Jul 22 |
peter |
130 |
assert(result.size() == index.size()); |
4189 |
18 Jul 22 |
peter |
131 |
assert(result.size() == distribution_.size()); |
4189 |
18 Jul 22 |
peter |
132 |
for (size_t rank=0; rank<result.size(); ++rank) |
4189 |
18 Jul 22 |
peter |
133 |
result(index[rank]) = distribution_(rank); |
4189 |
18 Jul 22 |
peter |
134 |
} |
4188 |
18 Jul 22 |
peter |
135 |
|
4189 |
18 Jul 22 |
peter |
136 |
|
4189 |
18 Jul 22 |
peter |
137 |
void |
4189 |
18 Jul 22 |
peter |
138 |
QuantileNormalizer2::normalize(utility::VectorMutable& result, |
4189 |
18 Jul 22 |
peter |
139 |
const vector<size_t>& index, |
4189 |
18 Jul 22 |
peter |
140 |
LinearInterpolation& interpolator) const |
4189 |
18 Jul 22 |
peter |
141 |
{ |
4189 |
18 Jul 22 |
peter |
142 |
for (size_t rank=0; rank<result.size(); ++rank) { |
4189 |
18 Jul 22 |
peter |
143 |
double z = (rank+0.5) / result.size(); |
4189 |
18 Jul 22 |
peter |
144 |
result(index[rank]) = interpolator.evaluate(z); |
4188 |
18 Jul 22 |
peter |
145 |
} |
4185 |
04 Jul 22 |
peter |
146 |
} |
4185 |
04 Jul 22 |
peter |
147 |
|
4185 |
04 Jul 22 |
peter |
148 |
|
4189 |
18 Jul 22 |
peter |
149 |
void QuantileNormalizer2::normalize(const utility::MatrixBase& input, |
4189 |
18 Jul 22 |
peter |
150 |
utility::MatrixMutable& result) const |
4189 |
18 Jul 22 |
peter |
151 |
{ |
4189 |
18 Jul 22 |
peter |
152 |
assert(input.rows()==result.rows()); |
4189 |
18 Jul 22 |
peter |
153 |
assert(input.columns()==result.columns()); |
4189 |
18 Jul 22 |
peter |
154 |
|
4189 |
18 Jul 22 |
peter |
155 |
vector<vector<size_t> > index; |
4189 |
18 Jul 22 |
peter |
156 |
create_index(input, index); |
4189 |
18 Jul 22 |
peter |
157 |
if (result.rows() == distribution_.size()) |
4189 |
18 Jul 22 |
peter |
158 |
normalize(result, index); |
4189 |
18 Jul 22 |
peter |
159 |
else |
4189 |
18 Jul 22 |
peter |
160 |
normalize(result, index, interpolator()); |
4189 |
18 Jul 22 |
peter |
161 |
} |
4189 |
18 Jul 22 |
peter |
162 |
|
4189 |
18 Jul 22 |
peter |
163 |
|
4185 |
04 Jul 22 |
peter |
164 |
void |
4185 |
04 Jul 22 |
peter |
165 |
QuantileNormalizer2::normalize(utility::MatrixMutable& result, |
4185 |
04 Jul 22 |
peter |
166 |
const vector<vector<size_t>>& index) const |
4185 |
04 Jul 22 |
peter |
167 |
{ |
4189 |
18 Jul 22 |
peter |
168 |
assert(result.rows() == distribution_.size()); |
4185 |
04 Jul 22 |
peter |
169 |
assert(result.columns() == index.size()); |
4185 |
04 Jul 22 |
peter |
170 |
for (size_t column=0; column<result.columns(); ++column) { |
4185 |
04 Jul 22 |
peter |
171 |
auto r = result.column_view(column); |
4185 |
04 Jul 22 |
peter |
172 |
normalize(r, index[column]); |
4185 |
04 Jul 22 |
peter |
173 |
} |
4185 |
04 Jul 22 |
peter |
174 |
} |
4185 |
04 Jul 22 |
peter |
175 |
|
4185 |
04 Jul 22 |
peter |
176 |
|
4189 |
18 Jul 22 |
peter |
177 |
void |
4189 |
18 Jul 22 |
peter |
178 |
QuantileNormalizer2::normalize(utility::MatrixMutable& result, |
4189 |
18 Jul 22 |
peter |
179 |
const vector<vector<size_t>>& index, |
4189 |
18 Jul 22 |
peter |
180 |
LinearInterpolation& interpolator) const |
4189 |
18 Jul 22 |
peter |
181 |
{ |
4189 |
18 Jul 22 |
peter |
182 |
assert(result.columns() == index.size()); |
4189 |
18 Jul 22 |
peter |
183 |
for (size_t column=0; column<result.columns(); ++column) { |
4189 |
18 Jul 22 |
peter |
184 |
auto r = result.column_view(column); |
4189 |
18 Jul 22 |
peter |
185 |
normalize(r, index[column], interpolator); |
4189 |
18 Jul 22 |
peter |
186 |
} |
4189 |
18 Jul 22 |
peter |
187 |
} |
4189 |
18 Jul 22 |
peter |
188 |
|
4189 |
18 Jul 22 |
peter |
189 |
|
4185 |
04 Jul 22 |
peter |
190 |
void QuantileNormalizer2::train(const utility::MatrixBase& data) |
4185 |
04 Jul 22 |
peter |
191 |
{ |
4185 |
04 Jul 22 |
peter |
192 |
vector<vector<size_t> > index; |
4185 |
04 Jul 22 |
peter |
193 |
create_index(data, index); |
4185 |
04 Jul 22 |
peter |
194 |
train(data, index); |
4185 |
04 Jul 22 |
peter |
195 |
} |
4185 |
04 Jul 22 |
peter |
196 |
|
4185 |
04 Jul 22 |
peter |
197 |
|
4185 |
04 Jul 22 |
peter |
198 |
void |
4185 |
04 Jul 22 |
peter |
199 |
QuantileNormalizer2::train(const utility::MatrixBase& X, |
4185 |
04 Jul 22 |
peter |
200 |
const vector<vector<size_t>>& index) |
4185 |
04 Jul 22 |
peter |
201 |
{ |
4189 |
18 Jul 22 |
peter |
202 |
interpolator_.reset(); |
4185 |
04 Jul 22 |
peter |
203 |
distribution_.resize(X.rows()); |
4185 |
04 Jul 22 |
peter |
204 |
for (size_t rank=0; rank<distribution_.size(); ++rank) { |
4185 |
04 Jul 22 |
peter |
205 |
statistics::Averager a; |
4185 |
04 Jul 22 |
peter |
206 |
for (size_t column=0; column<X.columns(); ++column) { |
4185 |
04 Jul 22 |
peter |
207 |
assert(column < index.size()); |
4185 |
04 Jul 22 |
peter |
208 |
assert(rank < index[column].size()); |
4185 |
04 Jul 22 |
peter |
209 |
a.add(X(index[column][rank], column)); |
4185 |
04 Jul 22 |
peter |
210 |
} |
4185 |
04 Jul 22 |
peter |
211 |
distribution_(rank) = a.mean(); |
4185 |
04 Jul 22 |
peter |
212 |
} |
4185 |
04 Jul 22 |
peter |
213 |
} |
4185 |
04 Jul 22 |
peter |
214 |
|
4185 |
04 Jul 22 |
peter |
215 |
|
4189 |
18 Jul 22 |
peter |
216 |
void swap(QuantileNormalizer2& lhs, QuantileNormalizer2& rhs) |
4189 |
18 Jul 22 |
peter |
217 |
{ |
4189 |
18 Jul 22 |
peter |
218 |
std::swap(lhs.distribution_, rhs.distribution_); |
4189 |
18 Jul 22 |
peter |
219 |
std::swap(lhs.interpolator_, rhs.interpolator_); |
4189 |
18 Jul 22 |
peter |
220 |
} |
4189 |
18 Jul 22 |
peter |
221 |
|
4185 |
04 Jul 22 |
peter |
222 |
}}} // end of namespace utility, yat and thep |