yat/normalizer/QuantileNormalizer2.cc

Code
Comments
Other
Rev Date Author Line
4185 04 Jul 22 peter 1 // $Id$
4185 04 Jul 22 peter 2
4185 04 Jul 22 peter 3 /*
4185 04 Jul 22 peter 4   Copyright (C) 2022 Peter Johansson
4185 04 Jul 22 peter 5
4185 04 Jul 22 peter 6   This file is part of the yat library, https://dev.thep.lu.se/yat
4185 04 Jul 22 peter 7
4185 04 Jul 22 peter 8   The yat library is free software; you can redistribute it and/or
4185 04 Jul 22 peter 9   modify it under the terms of the GNU General Public License as
4185 04 Jul 22 peter 10   published by the Free Software Foundation; either version 3 of the
4185 04 Jul 22 peter 11   License, or (at your option) any later version.
4185 04 Jul 22 peter 12
4185 04 Jul 22 peter 13   The yat library is distributed in the hope that it will be useful,
4185 04 Jul 22 peter 14   but WITHOUT ANY WARRANTY; without even the implied warranty of
4185 04 Jul 22 peter 15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4185 04 Jul 22 peter 16   General Public License for more details.
4185 04 Jul 22 peter 17
4185 04 Jul 22 peter 18   You should have received a copy of the GNU General Public License
4185 04 Jul 22 peter 19   along with yat. If not, see <https://www.gnu.org/licenses/>.
4185 04 Jul 22 peter 20 */
4185 04 Jul 22 peter 21
4185 04 Jul 22 peter 22 #include <config.h>
4185 04 Jul 22 peter 23
4185 04 Jul 22 peter 24 #include "QuantileNormalizer2.h"
4185 04 Jul 22 peter 25
4185 04 Jul 22 peter 26 #include "yat/statistics/Averager.h"
4185 04 Jul 22 peter 27 #include "yat/utility/MatrixMutable.h"
4185 04 Jul 22 peter 28
4185 04 Jul 22 peter 29 #include <vector>
4185 04 Jul 22 peter 30 #include <cassert>
4185 04 Jul 22 peter 31
4188 18 Jul 22 peter 32 #include <iostream>
4185 04 Jul 22 peter 33 using std::vector;
4185 04 Jul 22 peter 34
4185 04 Jul 22 peter 35 namespace theplu {
4185 04 Jul 22 peter 36 namespace yat {
4185 04 Jul 22 peter 37 namespace normalizer {
4185 04 Jul 22 peter 38
4189 18 Jul 22 peter 39   using regression::LinearInterpolation;
4189 18 Jul 22 peter 40
4189 18 Jul 22 peter 41   QuantileNormalizer2::QuantileNormalizer2(void)
4189 18 Jul 22 peter 42   {}
4189 18 Jul 22 peter 43
4189 18 Jul 22 peter 44
4189 18 Jul 22 peter 45   // interpolator_ is built lazily when needed
4189 18 Jul 22 peter 46   QuantileNormalizer2::QuantileNormalizer2(const QuantileNormalizer2& other)
4189 18 Jul 22 peter 47     : distribution_(other.distribution_)
4189 18 Jul 22 peter 48   {}
4189 18 Jul 22 peter 49
4189 18 Jul 22 peter 50
4189 18 Jul 22 peter 51   QuantileNormalizer2::QuantileNormalizer2(QuantileNormalizer2&& other)
4189 18 Jul 22 peter 52     : distribution_(std::move(other.distribution_)),
4189 18 Jul 22 peter 53       interpolator_(std::move(other.interpolator_))
4189 18 Jul 22 peter 54   {}
4189 18 Jul 22 peter 55
4189 18 Jul 22 peter 56
4189 18 Jul 22 peter 57   QuantileNormalizer2&
4189 18 Jul 22 peter 58   QuantileNormalizer2::operator=(const QuantileNormalizer2& rhs)
4189 18 Jul 22 peter 59   {
4189 18 Jul 22 peter 60     QuantileNormalizer2 tmp(rhs);
4189 18 Jul 22 peter 61     swap(tmp, *this);
4189 18 Jul 22 peter 62     return *this;
4189 18 Jul 22 peter 63   }
4189 18 Jul 22 peter 64
4189 18 Jul 22 peter 65
4189 18 Jul 22 peter 66   QuantileNormalizer2&
4189 18 Jul 22 peter 67   QuantileNormalizer2::operator=(QuantileNormalizer2&& rhs)
4189 18 Jul 22 peter 68   {
4189 18 Jul 22 peter 69     QuantileNormalizer2 tmp(std::move(rhs));
4189 18 Jul 22 peter 70     swap(tmp, *this);
4189 18 Jul 22 peter 71     return *this;
4189 18 Jul 22 peter 72   }
4189 18 Jul 22 peter 73
4189 18 Jul 22 peter 74
4185 04 Jul 22 peter 75   void QuantileNormalizer2::operator()(const utility::MatrixBase& input,
4185 04 Jul 22 peter 76                                        utility::MatrixMutable& result)
4185 04 Jul 22 peter 77   {
4185 04 Jul 22 peter 78     assert(input.rows()==result.rows());
4185 04 Jul 22 peter 79     assert(input.columns()==result.columns());
4185 04 Jul 22 peter 80
4185 04 Jul 22 peter 81     vector<vector<size_t> > index;
4185 04 Jul 22 peter 82     create_index(input, index);
4185 04 Jul 22 peter 83     train(input, index);
4185 04 Jul 22 peter 84     normalize(result, index);
4185 04 Jul 22 peter 85   }
4185 04 Jul 22 peter 86
4185 04 Jul 22 peter 87
4185 04 Jul 22 peter 88   void QuantileNormalizer2::create_index(const utility::MatrixBase& X,
4185 04 Jul 22 peter 89                                          vector<vector<size_t>>& index) const
4185 04 Jul 22 peter 90   {
4185 04 Jul 22 peter 91     index.resize(X.columns());
4185 04 Jul 22 peter 92     for (size_t column=0; column<X.columns(); ++column) {
4185 04 Jul 22 peter 93       assert(column<index.size());
4185 04 Jul 22 peter 94       utility::sort_index(index[column], X.column_const_view(column));
4185 04 Jul 22 peter 95     }
4185 04 Jul 22 peter 96   }
4185 04 Jul 22 peter 97
4185 04 Jul 22 peter 98
4189 18 Jul 22 peter 99   LinearInterpolation& QuantileNormalizer2::interpolator(void) const
4185 04 Jul 22 peter 100   {
4189 18 Jul 22 peter 101     if (!interpolator_) {
4189 18 Jul 22 peter 102       utility::Vector x(distribution_.size());
4189 18 Jul 22 peter 103       for (size_t i=0; i<x.size(); ++i)
4189 18 Jul 22 peter 104         x(i) = (i+0.5)/x.size();
4189 18 Jul 22 peter 105       interpolator_.reset(new LinearInterpolation(x, distribution_));
4189 18 Jul 22 peter 106     }
4189 18 Jul 22 peter 107     assert(interpolator_);
4189 18 Jul 22 peter 108     return *interpolator_;
4185 04 Jul 22 peter 109   }
4185 04 Jul 22 peter 110
4185 04 Jul 22 peter 111
4185 04 Jul 22 peter 112   void QuantileNormalizer2::normalize(const utility::VectorBase& input,
4185 04 Jul 22 peter 113                                       utility::VectorMutable& result) const
4185 04 Jul 22 peter 114   {
4185 04 Jul 22 peter 115     assert(input.size()==result.size());
4185 04 Jul 22 peter 116
4185 04 Jul 22 peter 117     vector<size_t> index;
4185 04 Jul 22 peter 118     utility::sort_index(index, input);
4189 18 Jul 22 peter 119     if (distribution_.size() == input.size())
4189 18 Jul 22 peter 120       normalize(result, index);
4189 18 Jul 22 peter 121     else
4189 18 Jul 22 peter 122       normalize(result, index, interpolator());
4185 04 Jul 22 peter 123   }
4185 04 Jul 22 peter 124
4185 04 Jul 22 peter 125
4185 04 Jul 22 peter 126   void
4185 04 Jul 22 peter 127   QuantileNormalizer2::normalize(utility::VectorMutable& result,
4185 04 Jul 22 peter 128                                  const vector<size_t>& index) const
4185 04 Jul 22 peter 129   {
4185 04 Jul 22 peter 130     assert(result.size() == index.size());
4189 18 Jul 22 peter 131     assert(result.size() == distribution_.size());
4189 18 Jul 22 peter 132     for (size_t rank=0; rank<result.size(); ++rank)
4189 18 Jul 22 peter 133       result(index[rank]) = distribution_(rank);
4189 18 Jul 22 peter 134   }
4188 18 Jul 22 peter 135
4189 18 Jul 22 peter 136
4189 18 Jul 22 peter 137   void
4189 18 Jul 22 peter 138   QuantileNormalizer2::normalize(utility::VectorMutable& result,
4189 18 Jul 22 peter 139                                  const vector<size_t>& index,
4189 18 Jul 22 peter 140                                  LinearInterpolation& interpolator) const
4189 18 Jul 22 peter 141   {
4189 18 Jul 22 peter 142     for (size_t rank=0; rank<result.size(); ++rank) {
4189 18 Jul 22 peter 143       double z = (rank+0.5) / result.size();
4189 18 Jul 22 peter 144       result(index[rank]) = interpolator.evaluate(z);
4188 18 Jul 22 peter 145     }
4185 04 Jul 22 peter 146   }
4185 04 Jul 22 peter 147
4185 04 Jul 22 peter 148
4189 18 Jul 22 peter 149   void QuantileNormalizer2::normalize(const utility::MatrixBase& input,
4189 18 Jul 22 peter 150                                       utility::MatrixMutable& result) const
4189 18 Jul 22 peter 151   {
4189 18 Jul 22 peter 152     assert(input.rows()==result.rows());
4189 18 Jul 22 peter 153     assert(input.columns()==result.columns());
4189 18 Jul 22 peter 154
4189 18 Jul 22 peter 155     vector<vector<size_t> > index;
4189 18 Jul 22 peter 156     create_index(input, index);
4189 18 Jul 22 peter 157     if (result.rows() == distribution_.size())
4189 18 Jul 22 peter 158       normalize(result, index);
4189 18 Jul 22 peter 159     else
4189 18 Jul 22 peter 160       normalize(result, index, interpolator());
4189 18 Jul 22 peter 161   }
4189 18 Jul 22 peter 162
4189 18 Jul 22 peter 163
4185 04 Jul 22 peter 164   void
4185 04 Jul 22 peter 165   QuantileNormalizer2::normalize(utility::MatrixMutable& result,
4185 04 Jul 22 peter 166                                  const vector<vector<size_t>>& index) const
4185 04 Jul 22 peter 167   {
4189 18 Jul 22 peter 168     assert(result.rows() == distribution_.size());
4185 04 Jul 22 peter 169     assert(result.columns() == index.size());
4185 04 Jul 22 peter 170     for (size_t column=0; column<result.columns(); ++column) {
4185 04 Jul 22 peter 171       auto r = result.column_view(column);
4185 04 Jul 22 peter 172       normalize(r, index[column]);
4185 04 Jul 22 peter 173     }
4185 04 Jul 22 peter 174   }
4185 04 Jul 22 peter 175
4185 04 Jul 22 peter 176
4189 18 Jul 22 peter 177   void
4189 18 Jul 22 peter 178   QuantileNormalizer2::normalize(utility::MatrixMutable& result,
4189 18 Jul 22 peter 179                                  const vector<vector<size_t>>& index,
4189 18 Jul 22 peter 180                                  LinearInterpolation& interpolator) const
4189 18 Jul 22 peter 181   {
4189 18 Jul 22 peter 182     assert(result.columns() == index.size());
4189 18 Jul 22 peter 183     for (size_t column=0; column<result.columns(); ++column) {
4189 18 Jul 22 peter 184       auto r = result.column_view(column);
4189 18 Jul 22 peter 185       normalize(r, index[column], interpolator);
4189 18 Jul 22 peter 186     }
4189 18 Jul 22 peter 187   }
4189 18 Jul 22 peter 188
4189 18 Jul 22 peter 189
4185 04 Jul 22 peter 190   void QuantileNormalizer2::train(const utility::MatrixBase& data)
4185 04 Jul 22 peter 191   {
4185 04 Jul 22 peter 192     vector<vector<size_t> > index;
4185 04 Jul 22 peter 193     create_index(data, index);
4185 04 Jul 22 peter 194     train(data, index);
4185 04 Jul 22 peter 195   }
4185 04 Jul 22 peter 196
4185 04 Jul 22 peter 197
4185 04 Jul 22 peter 198   void
4185 04 Jul 22 peter 199   QuantileNormalizer2::train(const utility::MatrixBase& X,
4185 04 Jul 22 peter 200                              const vector<vector<size_t>>& index)
4185 04 Jul 22 peter 201   {
4189 18 Jul 22 peter 202     interpolator_.reset();
4185 04 Jul 22 peter 203     distribution_.resize(X.rows());
4185 04 Jul 22 peter 204     for (size_t rank=0; rank<distribution_.size(); ++rank) {
4185 04 Jul 22 peter 205       statistics::Averager a;
4185 04 Jul 22 peter 206       for (size_t column=0; column<X.columns(); ++column) {
4185 04 Jul 22 peter 207         assert(column < index.size());
4185 04 Jul 22 peter 208         assert(rank < index[column].size());
4185 04 Jul 22 peter 209         a.add(X(index[column][rank], column));
4185 04 Jul 22 peter 210       }
4185 04 Jul 22 peter 211       distribution_(rank) = a.mean();
4185 04 Jul 22 peter 212     }
4185 04 Jul 22 peter 213   }
4185 04 Jul 22 peter 214
4185 04 Jul 22 peter 215
4189 18 Jul 22 peter 216   void swap(QuantileNormalizer2& lhs, QuantileNormalizer2& rhs)
4189 18 Jul 22 peter 217   {
4189 18 Jul 22 peter 218     std::swap(lhs.distribution_, rhs.distribution_);
4189 18 Jul 22 peter 219     std::swap(lhs.interpolator_, rhs.interpolator_);
4189 18 Jul 22 peter 220   }
4189 18 Jul 22 peter 221
4185 04 Jul 22 peter 222 }}} // end of namespace utility, yat and thep