4185 |
04 Jul 22 |
peter |
1 |
#ifndef _theplu_yat_normalizer_quantile_normalizer2_ |
4185 |
04 Jul 22 |
peter |
2 |
#define _theplu_yat_normalizer_quantile_normalizer2_ |
4185 |
04 Jul 22 |
peter |
3 |
|
4185 |
04 Jul 22 |
peter |
// $Id$ |
4185 |
04 Jul 22 |
peter |
5 |
|
4185 |
04 Jul 22 |
peter |
6 |
/* |
4185 |
04 Jul 22 |
peter |
Copyright (C) 2022 Peter Johansson |
4185 |
04 Jul 22 |
peter |
8 |
|
4185 |
04 Jul 22 |
peter |
This file is part of the yat library, https://dev.thep.lu.se/yat |
4185 |
04 Jul 22 |
peter |
10 |
|
4185 |
04 Jul 22 |
peter |
The yat library is free software; you can redistribute it and/or |
4185 |
04 Jul 22 |
peter |
modify it under the terms of the GNU General Public License as |
4185 |
04 Jul 22 |
peter |
published by the Free Software Foundation; either version 3 of the |
4185 |
04 Jul 22 |
peter |
License, or (at your option) any later version. |
4185 |
04 Jul 22 |
peter |
15 |
|
4185 |
04 Jul 22 |
peter |
The yat library is distributed in the hope that it will be useful, |
4185 |
04 Jul 22 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
4185 |
04 Jul 22 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
4185 |
04 Jul 22 |
peter |
General Public License for more details. |
4185 |
04 Jul 22 |
peter |
20 |
|
4185 |
04 Jul 22 |
peter |
You should have received a copy of the GNU General Public License |
4185 |
04 Jul 22 |
peter |
along with yat. If not, see <https://www.gnu.org/licenses/>. |
4185 |
04 Jul 22 |
peter |
23 |
*/ |
4185 |
04 Jul 22 |
peter |
24 |
|
4189 |
18 Jul 22 |
peter |
25 |
#include "yat/regression/LinearInterpolation.h" |
4185 |
04 Jul 22 |
peter |
26 |
#include "yat/utility/Vector.h" |
4185 |
04 Jul 22 |
peter |
27 |
|
4185 |
04 Jul 22 |
peter |
28 |
namespace theplu { |
4185 |
04 Jul 22 |
peter |
29 |
namespace yat { |
4185 |
04 Jul 22 |
peter |
30 |
namespace utility { |
4185 |
04 Jul 22 |
peter |
31 |
class MatrixBase; |
4185 |
04 Jul 22 |
peter |
32 |
class MatrixMutable; |
4185 |
04 Jul 22 |
peter |
33 |
} |
4185 |
04 Jul 22 |
peter |
34 |
namespace normalizer { |
4185 |
04 Jul 22 |
peter |
35 |
|
4185 |
04 Jul 22 |
peter |
36 |
/** |
4185 |
04 Jul 22 |
peter |
\brief Perform quantile normalization |
4186 |
04 Jul 22 |
peter |
38 |
|
4186 |
04 Jul 22 |
peter |
After a quantile normalization each column has the same |
4186 |
04 Jul 22 |
peter |
distribution of data (the quantiles are the same). Also, within |
4186 |
04 Jul 22 |
peter |
each column the rank of an element is not changed. The |
4186 |
04 Jul 22 |
peter |
distribution that each column follows is determined by taking the |
4186 |
04 Jul 22 |
peter |
average across columns, i.e., the largest element in each column |
4186 |
04 Jul 22 |
peter |
will be equal to the average of the largest elements. The 2nd |
4186 |
04 Jul 22 |
peter |
largest element in each column will be equal to the average of |
4186 |
04 Jul 22 |
peter |
the 2nd largest element in each column et cetera. |
4186 |
04 Jul 22 |
peter |
47 |
|
4185 |
04 Jul 22 |
peter |
\since New in yat 0.20 |
4185 |
04 Jul 22 |
peter |
49 |
*/ |
4185 |
04 Jul 22 |
peter |
50 |
class QuantileNormalizer2 |
4185 |
04 Jul 22 |
peter |
51 |
{ |
4185 |
04 Jul 22 |
peter |
52 |
public: |
4185 |
04 Jul 22 |
peter |
53 |
/** |
4189 |
18 Jul 22 |
peter |
Defaul constructor |
4189 |
18 Jul 22 |
peter |
55 |
*/ |
4189 |
18 Jul 22 |
peter |
56 |
QuantileNormalizer2(void); |
4189 |
18 Jul 22 |
peter |
57 |
|
4189 |
18 Jul 22 |
peter |
58 |
/** |
4189 |
18 Jul 22 |
peter |
Copy constructor |
4189 |
18 Jul 22 |
peter |
60 |
*/ |
4189 |
18 Jul 22 |
peter |
61 |
QuantileNormalizer2(const QuantileNormalizer2& other); |
4189 |
18 Jul 22 |
peter |
62 |
|
4189 |
18 Jul 22 |
peter |
63 |
/** |
4189 |
18 Jul 22 |
peter |
Move constructor |
4189 |
18 Jul 22 |
peter |
65 |
*/ |
4189 |
18 Jul 22 |
peter |
66 |
QuantileNormalizer2(QuantileNormalizer2&& other); |
4189 |
18 Jul 22 |
peter |
67 |
|
4189 |
18 Jul 22 |
peter |
68 |
/** |
4189 |
18 Jul 22 |
peter |
Copy assignment |
4189 |
18 Jul 22 |
peter |
70 |
*/ |
4189 |
18 Jul 22 |
peter |
71 |
QuantileNormalizer2& operator=(const QuantileNormalizer2& rhs); |
4189 |
18 Jul 22 |
peter |
72 |
|
4189 |
18 Jul 22 |
peter |
73 |
/** |
4189 |
18 Jul 22 |
peter |
Move assignment |
4189 |
18 Jul 22 |
peter |
75 |
*/ |
4189 |
18 Jul 22 |
peter |
76 |
QuantileNormalizer2& operator=(QuantileNormalizer2&& rhs); |
4189 |
18 Jul 22 |
peter |
77 |
|
4189 |
18 Jul 22 |
peter |
78 |
/** |
4185 |
04 Jul 22 |
peter |
Calculate the typical distribution from \a data. The first |
4185 |
04 Jul 22 |
peter |
element is calculated by taking the average of the smallest |
4185 |
04 Jul 22 |
peter |
value of each column, the 2nd element is calculated by taking |
4185 |
04 Jul 22 |
peter |
the average of the 2nd smallest element of each column, et |
4185 |
04 Jul 22 |
peter |
c. The resulting distribution has the same size as the number |
4185 |
04 Jul 22 |
peter |
of rows in \a data. |
4185 |
04 Jul 22 |
peter |
85 |
*/ |
4185 |
04 Jul 22 |
peter |
86 |
void train(const utility::MatrixBase& data); |
4185 |
04 Jul 22 |
peter |
87 |
|
4185 |
04 Jul 22 |
peter |
88 |
/** |
4185 |
04 Jul 22 |
peter |
\brief perform the quantile normalization. |
4185 |
04 Jul 22 |
peter |
90 |
|
4185 |
04 Jul 22 |
peter |
Equivalent to calling |
4185 |
04 Jul 22 |
peter |
\code |
4185 |
04 Jul 22 |
peter |
train(input); |
4185 |
04 Jul 22 |
peter |
normalize(input, result); |
4185 |
04 Jul 22 |
peter |
\endcode |
4185 |
04 Jul 22 |
peter |
96 |
|
4185 |
04 Jul 22 |
peter |
It is possible to normalize "in place"; it is permissible for |
4185 |
04 Jul 22 |
peter |
\a matrix and \a result to reference to the same Matrix. If the |
4185 |
04 Jul 22 |
peter |
input and result are overlapping in any other way the behaviour |
4185 |
04 Jul 22 |
peter |
is undefined. |
4185 |
04 Jul 22 |
peter |
101 |
|
4185 |
04 Jul 22 |
peter |
\note dimensions of \a matrix and \a result must match. |
4185 |
04 Jul 22 |
peter |
103 |
*/ |
4185 |
04 Jul 22 |
peter |
104 |
void operator()(const utility::MatrixBase& input, |
4185 |
04 Jul 22 |
peter |
105 |
utility::MatrixMutable& result); |
4185 |
04 Jul 22 |
peter |
106 |
|
4185 |
04 Jul 22 |
peter |
107 |
/** |
4185 |
04 Jul 22 |
peter |
Equivalent with calling |
4185 |
04 Jul 22 |
peter |
normalize(const utility::VectorBase&,utility::MatrixMutable&) const; |
4185 |
04 Jul 22 |
peter |
for each column in \a input and \a result. |
4185 |
04 Jul 22 |
peter |
111 |
|
4185 |
04 Jul 22 |
peter |
Requires that a typical distribution has been inferred via |
4185 |
04 Jul 22 |
peter |
train() or operator(). |
4185 |
04 Jul 22 |
peter |
114 |
|
4185 |
04 Jul 22 |
peter |
It is possible to normalize "in place"; it is permissible for |
4185 |
04 Jul 22 |
peter |
\a input and \a result to reference to the same Matrix. If the |
4185 |
04 Jul 22 |
peter |
input and result are overlapping in any other way the behaviour |
4185 |
04 Jul 22 |
peter |
is undefined. |
4185 |
04 Jul 22 |
peter |
119 |
|
4189 |
18 Jul 22 |
peter |
\note dimensions of \a input and \a result must match. |
4185 |
04 Jul 22 |
peter |
121 |
*/ |
4185 |
04 Jul 22 |
peter |
122 |
void normalize(const utility::MatrixBase& input, |
4185 |
04 Jul 22 |
peter |
123 |
utility::MatrixMutable& result) const; |
4185 |
04 Jul 22 |
peter |
124 |
|
4185 |
04 Jul 22 |
peter |
125 |
/** |
4185 |
04 Jul 22 |
peter |
The result vector will be filled with the values from the |
4185 |
04 Jul 22 |
peter |
typical distribution (inferred from train() or operator()) in |
4185 |
04 Jul 22 |
peter |
such a way that if the nth element is the smallest value in the |
4185 |
04 Jul 22 |
peter |
\a input, the nth element is also the smallest element in \a |
4185 |
04 Jul 22 |
peter |
result and so on. |
4185 |
04 Jul 22 |
peter |
131 |
|
4191 |
18 Jul 22 |
peter |
If the size of the template (passed via train function) and the |
4191 |
18 Jul 22 |
peter |
\a input are not the same, values interpolated. Both for the |
4191 |
18 Jul 22 |
peter |
template and result data, x is equally distributed between 0 |
4191 |
18 Jul 22 |
peter |
and 1 such that kth value is (k+0.5) / N, where N is size of |
4191 |
18 Jul 22 |
peter |
the data range. The normalised is interpolated from the two |
4191 |
18 Jul 22 |
peter |
nearest data points in the template using linear interpolation. |
4191 |
18 Jul 22 |
peter |
138 |
|
4185 |
04 Jul 22 |
peter |
Requires that a typical distribution has been inferred via |
4185 |
04 Jul 22 |
peter |
train() or operator(). |
4185 |
04 Jul 22 |
peter |
141 |
|
4185 |
04 Jul 22 |
peter |
It is possible to normalize "in place"; it is permissible for |
4185 |
04 Jul 22 |
peter |
\a input and \a result to reference to the same Matrix. If the |
4185 |
04 Jul 22 |
peter |
input and result are overlapping in any other way the behaviour |
4185 |
04 Jul 22 |
peter |
is undefined. |
4185 |
04 Jul 22 |
peter |
146 |
|
4189 |
18 Jul 22 |
peter |
\note dimensions of \a input and \a result must match. |
4185 |
04 Jul 22 |
peter |
148 |
*/ |
4185 |
04 Jul 22 |
peter |
149 |
void normalize(const utility::VectorBase& input, |
4185 |
04 Jul 22 |
peter |
150 |
utility::VectorMutable& result) const; |
4185 |
04 Jul 22 |
peter |
151 |
|
4185 |
04 Jul 22 |
peter |
152 |
private: |
4189 |
18 Jul 22 |
peter |
153 |
friend void swap(QuantileNormalizer2& lhs, QuantileNormalizer2& rhs); |
4185 |
04 Jul 22 |
peter |
154 |
utility::Vector distribution_; |
4189 |
18 Jul 22 |
peter |
155 |
mutable std::unique_ptr<regression::LinearInterpolation> interpolator_; |
4185 |
04 Jul 22 |
peter |
156 |
|
4185 |
04 Jul 22 |
peter |
157 |
void create_index(const utility::MatrixBase& X, |
4185 |
04 Jul 22 |
peter |
158 |
std::vector<std::vector<size_t>>& index) const; |
4189 |
18 Jul 22 |
peter |
159 |
|
4189 |
18 Jul 22 |
peter |
160 |
regression::LinearInterpolation& interpolator(void) const; |
4189 |
18 Jul 22 |
peter |
161 |
|
4185 |
04 Jul 22 |
peter |
162 |
void normalize(utility::VectorMutable& result, |
4185 |
04 Jul 22 |
peter |
163 |
const std::vector<size_t>& index) const; |
4189 |
18 Jul 22 |
peter |
164 |
void normalize(utility::VectorMutable& result, |
4189 |
18 Jul 22 |
peter |
165 |
const std::vector<size_t>& index, |
4189 |
18 Jul 22 |
peter |
166 |
regression::LinearInterpolation& interpolator) const; |
4189 |
18 Jul 22 |
peter |
167 |
|
4185 |
04 Jul 22 |
peter |
168 |
void normalize(utility::MatrixMutable& result, |
4185 |
04 Jul 22 |
peter |
169 |
const std::vector<std::vector<size_t>>& index) const; |
4189 |
18 Jul 22 |
peter |
170 |
void normalize(utility::MatrixMutable& result, |
4189 |
18 Jul 22 |
peter |
171 |
const std::vector<std::vector<size_t>>& index, |
4189 |
18 Jul 22 |
peter |
172 |
regression::LinearInterpolation& interpolator) const; |
4189 |
18 Jul 22 |
peter |
173 |
|
4185 |
04 Jul 22 |
peter |
174 |
void train(const utility::MatrixBase& X, |
4185 |
04 Jul 22 |
peter |
175 |
const std::vector<std::vector<size_t>>& index); |
4185 |
04 Jul 22 |
peter |
176 |
}; |
4185 |
04 Jul 22 |
peter |
177 |
|
4190 |
18 Jul 22 |
peter |
178 |
/** |
4190 |
18 Jul 22 |
peter |
Swap two quantile normalizers. |
4190 |
18 Jul 22 |
peter |
180 |
|
4190 |
18 Jul 22 |
peter |
\relates QuantileNormalizer2 |
4190 |
18 Jul 22 |
peter |
182 |
*/ |
4189 |
18 Jul 22 |
peter |
183 |
void swap(QuantileNormalizer2& lhs, QuantileNormalizer2& rhs); |
4189 |
18 Jul 22 |
peter |
184 |
|
4185 |
04 Jul 22 |
peter |
185 |
}}} // end of namespace normalizer, yat and thep |
4185 |
04 Jul 22 |
peter |
186 |
#endif |