svndigest - svndigest

yat/normalizer/QuantileNormalizer2.h

: Code
: Comments
: Other

Rev	Date	Author	Line
4185	04 Jul 22	peter	1	#ifndef _theplu_yat_normalizer_quantile_normalizer2_
4185	04 Jul 22	peter	2	#define _theplu_yat_normalizer_quantile_normalizer2_
4185	04 Jul 22	peter	3
4185	04 Jul 22	peter	4	// $Id$
4185	04 Jul 22	peter	5
4185	04 Jul 22	peter	6	/*
4185	04 Jul 22	peter	7	Copyright (C) 2022 Peter Johansson
4185	04 Jul 22	peter	8
4185	04 Jul 22	peter	9	This file is part of the yat library, https://dev.thep.lu.se/yat
4185	04 Jul 22	peter	10
4185	04 Jul 22	peter	11	The yat library is free software; you can redistribute it and/or
4185	04 Jul 22	peter	12	modify it under the terms of the GNU General Public License as
4185	04 Jul 22	peter	13	published by the Free Software Foundation; either version 3 of the
4185	04 Jul 22	peter	14	License, or (at your option) any later version.
4185	04 Jul 22	peter	15
4185	04 Jul 22	peter	16	The yat library is distributed in the hope that it will be useful,
4185	04 Jul 22	peter	17	but WITHOUT ANY WARRANTY; without even the implied warranty of
4185	04 Jul 22	peter	18	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4185	04 Jul 22	peter	19	General Public License for more details.
4185	04 Jul 22	peter	20
4185	04 Jul 22	peter	21	You should have received a copy of the GNU General Public License
4185	04 Jul 22	peter	22	along with yat. If not, see <https://www.gnu.org/licenses/>.
4185	04 Jul 22	peter	23	*/
4185	04 Jul 22	peter	24
4189	18 Jul 22	peter	25	#include "yat/regression/LinearInterpolation.h"
4185	04 Jul 22	peter	26	#include "yat/utility/Vector.h"
4185	04 Jul 22	peter	27
4185	04 Jul 22	peter	28	namespace theplu {
4185	04 Jul 22	peter	29	namespace yat {
4185	04 Jul 22	peter	30	namespace utility {
4185	04 Jul 22	peter	31	class MatrixBase;
4185	04 Jul 22	peter	32	class MatrixMutable;
4185	04 Jul 22	peter	33	}
4185	04 Jul 22	peter	34	namespace normalizer {
4185	04 Jul 22	peter	35
4185	04 Jul 22	peter	36	/**
4185	04 Jul 22	peter	37	\brief Perform quantile normalization
4186	04 Jul 22	peter	38
4186	04 Jul 22	peter	39	After a quantile normalization each column has the same
4186	04 Jul 22	peter	40	distribution of data (the quantiles are the same). Also, within
4186	04 Jul 22	peter	41	each column the rank of an element is not changed. The
4186	04 Jul 22	peter	42	distribution that each column follows is determined by taking the
4186	04 Jul 22	peter	43	average across columns, i.e., the largest element in each column
4186	04 Jul 22	peter	44	will be equal to the average of the largest elements. The 2nd
4186	04 Jul 22	peter	45	largest element in each column will be equal to the average of
4186	04 Jul 22	peter	46	the 2nd largest element in each column et cetera.
4186	04 Jul 22	peter	47
4185	04 Jul 22	peter	48	\since New in yat 0.20
4185	04 Jul 22	peter	49	*/
4185	04 Jul 22	peter	50	class QuantileNormalizer2
4185	04 Jul 22	peter	51	{
4185	04 Jul 22	peter	52	public:
4185	04 Jul 22	peter	53	/**
4189	18 Jul 22	peter	54	Defaul constructor
4189	18 Jul 22	peter	55	*/
4189	18 Jul 22	peter	56	QuantileNormalizer2(void);
4189	18 Jul 22	peter	57
4189	18 Jul 22	peter	58	/**
4189	18 Jul 22	peter	59	Copy constructor
4189	18 Jul 22	peter	60	*/
4189	18 Jul 22	peter	61	QuantileNormalizer2(const QuantileNormalizer2& other);
4189	18 Jul 22	peter	62
4189	18 Jul 22	peter	63	/**
4189	18 Jul 22	peter	64	Move constructor
4189	18 Jul 22	peter	65	*/
4189	18 Jul 22	peter	66	QuantileNormalizer2(QuantileNormalizer2&& other);
4189	18 Jul 22	peter	67
4189	18 Jul 22	peter	68	/**
4189	18 Jul 22	peter	69	Copy assignment
4189	18 Jul 22	peter	70	*/
4189	18 Jul 22	peter	71	QuantileNormalizer2& operator=(const QuantileNormalizer2& rhs);
4189	18 Jul 22	peter	72
4189	18 Jul 22	peter	73	/**
4189	18 Jul 22	peter	74	Move assignment
4189	18 Jul 22	peter	75	*/
4189	18 Jul 22	peter	76	QuantileNormalizer2& operator=(QuantileNormalizer2&& rhs);
4189	18 Jul 22	peter	77
4189	18 Jul 22	peter	78	/**
4185	04 Jul 22	peter	79	Calculate the typical distribution from \a data. The first
4185	04 Jul 22	peter	80	element is calculated by taking the average of the smallest
4185	04 Jul 22	peter	81	value of each column, the 2nd element is calculated by taking
4185	04 Jul 22	peter	82	the average of the 2nd smallest element of each column, et
4185	04 Jul 22	peter	83	c. The resulting distribution has the same size as the number
4185	04 Jul 22	peter	84	of rows in \a data.
4185	04 Jul 22	peter	85	*/
4185	04 Jul 22	peter	86	void train(const utility::MatrixBase& data);
4185	04 Jul 22	peter	87
4185	04 Jul 22	peter	88	/**
4185	04 Jul 22	peter	89	\brief perform the quantile normalization.
4185	04 Jul 22	peter	90
4185	04 Jul 22	peter	91	Equivalent to calling
4185	04 Jul 22	peter	92	\code
4185	04 Jul 22	peter	93	train(input);
4185	04 Jul 22	peter	94	normalize(input, result);
4185	04 Jul 22	peter	95	\endcode
4185	04 Jul 22	peter	96
4185	04 Jul 22	peter	97	It is possible to normalize "in place"; it is permissible for
4185	04 Jul 22	peter	98	\a matrix and \a result to reference to the same Matrix. If the
4185	04 Jul 22	peter	99	input and result are overlapping in any other way the behaviour
4185	04 Jul 22	peter	100	is undefined.
4185	04 Jul 22	peter	101
4185	04 Jul 22	peter	102	\note dimensions of \a matrix and \a result must match.
4185	04 Jul 22	peter	103	*/
4185	04 Jul 22	peter	104	void operator()(const utility::MatrixBase& input,
4185	04 Jul 22	peter	105	utility::MatrixMutable& result);
4185	04 Jul 22	peter	106
4185	04 Jul 22	peter	107	/**
4185	04 Jul 22	peter	108	Equivalent with calling
4185	04 Jul 22	peter	109	normalize(const utility::VectorBase&,utility::MatrixMutable&) const;
4185	04 Jul 22	peter	110	for each column in \a input and \a result.
4185	04 Jul 22	peter	111
4185	04 Jul 22	peter	112	Requires that a typical distribution has been inferred via
4185	04 Jul 22	peter	113	train() or operator().
4185	04 Jul 22	peter	114
4185	04 Jul 22	peter	115	It is possible to normalize "in place"; it is permissible for
4185	04 Jul 22	peter	116	\a input and \a result to reference to the same Matrix. If the
4185	04 Jul 22	peter	117	input and result are overlapping in any other way the behaviour
4185	04 Jul 22	peter	118	is undefined.
4185	04 Jul 22	peter	119
4189	18 Jul 22	peter	120	\note dimensions of \a input and \a result must match.
4185	04 Jul 22	peter	121	*/
4185	04 Jul 22	peter	122	void normalize(const utility::MatrixBase& input,
4185	04 Jul 22	peter	123	utility::MatrixMutable& result) const;
4185	04 Jul 22	peter	124
4185	04 Jul 22	peter	125	/**
4185	04 Jul 22	peter	126	The result vector will be filled with the values from the
4185	04 Jul 22	peter	127	typical distribution (inferred from train() or operator()) in
4185	04 Jul 22	peter	128	such a way that if the nth element is the smallest value in the
4185	04 Jul 22	peter	129	\a input, the nth element is also the smallest element in \a
4185	04 Jul 22	peter	130	result and so on.
4185	04 Jul 22	peter	131
4191	18 Jul 22	peter	132	If the size of the template (passed via train function) and the
4191	18 Jul 22	peter	133	\a input are not the same, values interpolated. Both for the
4191	18 Jul 22	peter	134	template and result data, x is equally distributed between 0
4191	18 Jul 22	peter	135	and 1 such that kth value is (k+0.5) / N, where N is size of
4191	18 Jul 22	peter	136	the data range. The normalised is interpolated from the two
4191	18 Jul 22	peter	137	nearest data points in the template using linear interpolation.
4191	18 Jul 22	peter	138
4185	04 Jul 22	peter	139	Requires that a typical distribution has been inferred via
4185	04 Jul 22	peter	140	train() or operator().
4185	04 Jul 22	peter	141
4185	04 Jul 22	peter	142	It is possible to normalize "in place"; it is permissible for
4185	04 Jul 22	peter	143	\a input and \a result to reference to the same Matrix. If the
4185	04 Jul 22	peter	144	input and result are overlapping in any other way the behaviour
4185	04 Jul 22	peter	145	is undefined.
4185	04 Jul 22	peter	146
4189	18 Jul 22	peter	147	\note dimensions of \a input and \a result must match.
4185	04 Jul 22	peter	148	*/
4185	04 Jul 22	peter	149	void normalize(const utility::VectorBase& input,
4185	04 Jul 22	peter	150	utility::VectorMutable& result) const;
4185	04 Jul 22	peter	151
4185	04 Jul 22	peter	152	private:
4189	18 Jul 22	peter	153	friend void swap(QuantileNormalizer2& lhs, QuantileNormalizer2& rhs);
4185	04 Jul 22	peter	154	utility::Vector distribution_;
4189	18 Jul 22	peter	155	mutable std::unique_ptr<regression::LinearInterpolation> interpolator_;
4185	04 Jul 22	peter	156
4185	04 Jul 22	peter	157	void create_index(const utility::MatrixBase& X,
4185	04 Jul 22	peter	158	std::vector<std::vector<size_t>>& index) const;
4189	18 Jul 22	peter	159
4189	18 Jul 22	peter	160	regression::LinearInterpolation& interpolator(void) const;
4189	18 Jul 22	peter	161
4185	04 Jul 22	peter	162	void normalize(utility::VectorMutable& result,
4185	04 Jul 22	peter	163	const std::vector<size_t>& index) const;
4189	18 Jul 22	peter	164	void normalize(utility::VectorMutable& result,
4189	18 Jul 22	peter	165	const std::vector<size_t>& index,
4189	18 Jul 22	peter	166	regression::LinearInterpolation& interpolator) const;
4189	18 Jul 22	peter	167
4185	04 Jul 22	peter	168	void normalize(utility::MatrixMutable& result,
4185	04 Jul 22	peter	169	const std::vector<std::vector<size_t>>& index) const;
4189	18 Jul 22	peter	170	void normalize(utility::MatrixMutable& result,
4189	18 Jul 22	peter	171	const std::vector<std::vector<size_t>>& index,
4189	18 Jul 22	peter	172	regression::LinearInterpolation& interpolator) const;
4189	18 Jul 22	peter	173
4185	04 Jul 22	peter	174	void train(const utility::MatrixBase& X,
4185	04 Jul 22	peter	175	const std::vector<std::vector<size_t>>& index);
4185	04 Jul 22	peter	176	};
4185	04 Jul 22	peter	177
4190	18 Jul 22	peter	178	/**
4190	18 Jul 22	peter	179	Swap two quantile normalizers.
4190	18 Jul 22	peter	180
4190	18 Jul 22	peter	181	\relates QuantileNormalizer2
4190	18 Jul 22	peter	182	*/
4189	18 Jul 22	peter	183	void swap(QuantileNormalizer2& lhs, QuantileNormalizer2& rhs);
4189	18 Jul 22	peter	184
4185	04 Jul 22	peter	185	}}} // end of namespace normalizer, yat and thep
4185	04 Jul 22	peter	186	#endif