4125 |
14 Jan 22 |
peter |
1 |
#ifndef _theplu_yat_utility_pca_ |
4125 |
14 Jan 22 |
peter |
2 |
#define _theplu_yat_utility_pca_ |
42 |
26 Feb 04 |
jari |
3 |
|
616 |
31 Aug 06 |
jari |
// $Id$ |
13 |
19 Jun 03 |
daniel |
5 |
|
675 |
10 Oct 06 |
jari |
6 |
/* |
831 |
27 Mar 07 |
peter |
Copyright (C) 2003 Daniel Dalevi |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2004 Jari Häkkinen |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2005 Jari Häkkinen, Peter Johansson |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2006 Jari Häkkinen |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson |
4369 |
13 Sep 23 |
peter |
Copyright (C) 2010, 2018, 2020, 2022, 2023 Peter Johansson |
13 |
19 Jun 03 |
daniel |
13 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
616 |
31 Aug 06 |
jari |
15 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
20 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
25 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
28 |
*/ |
675 |
10 Oct 06 |
jari |
29 |
|
3736 |
22 May 18 |
peter |
30 |
#include "config_public.h" |
3736 |
22 May 18 |
peter |
31 |
|
1121 |
22 Feb 08 |
peter |
32 |
#include "Matrix.h" |
1120 |
21 Feb 08 |
peter |
33 |
#include "Vector.h" |
675 |
10 Oct 06 |
jari |
34 |
|
42 |
26 Feb 04 |
jari |
35 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
36 |
namespace yat { |
301 |
30 Apr 05 |
peter |
37 |
namespace utility { |
42 |
26 Feb 04 |
jari |
38 |
|
13 |
19 Jun 03 |
daniel |
39 |
/** |
767 |
22 Feb 07 |
peter |
@brief Principal Component Analysis |
767 |
22 Feb 07 |
peter |
41 |
|
4369 |
13 Sep 23 |
peter |
Class performing PCA using SVD. This class assumes that each |
4369 |
13 Sep 23 |
peter |
column corresponds to a data point. From yat 0.22 there is no |
4369 |
13 Sep 23 |
peter |
restriction on the input dimension, M, in relation to number of |
4369 |
13 Sep 23 |
peter |
data points, N. If N<M, the PCA will have N principal components |
4369 |
13 Sep 23 |
peter |
(provided throught the eigenvectors()) with the last one |
4369 |
13 Sep 23 |
peter |
associated with a zero eigenvalue due to the centralization of |
4369 |
13 Sep 23 |
peter |
data. If N>M, the PCA will have M principal components, i.e., |
4369 |
13 Sep 23 |
peter |
same dimensionaly as the input data. |
13 |
19 Jun 03 |
daniel |
50 |
*/ |
13 |
19 Jun 03 |
daniel |
51 |
class PCA |
13 |
19 Jun 03 |
daniel |
52 |
{ |
13 |
19 Jun 03 |
daniel |
53 |
public: |
13 |
19 Jun 03 |
daniel |
54 |
/** |
13 |
19 Jun 03 |
daniel |
Constructor taking the data-matrix as input. No row-centering |
4369 |
13 Sep 23 |
peter |
should have been performed and no products. Each column in \c X |
4369 |
13 Sep 23 |
peter |
corresponds to a data point. |
13 |
19 Jun 03 |
daniel |
58 |
*/ |
4373 |
19 Sep 23 |
peter |
59 |
explicit PCA(const MatrixBase& X, bool centralize=true, bool scale=false); |
3736 |
22 May 18 |
peter |
60 |
|
13 |
19 Jun 03 |
daniel |
61 |
/** |
3736 |
22 May 18 |
peter |
Same as PCA(const Matrix&) but moves \a M rather than copy. |
3736 |
22 May 18 |
peter |
63 |
|
3938 |
16 Jul 20 |
peter |
\since new in yat 0.16 |
3736 |
22 May 18 |
peter |
65 |
*/ |
4373 |
19 Sep 23 |
peter |
66 |
explicit PCA(MatrixMutable&& X, bool centralize=true, bool scale=false); |
3736 |
22 May 18 |
peter |
67 |
|
3736 |
22 May 18 |
peter |
68 |
/** |
2323 |
19 Sep 10 |
peter |
\brief Returns eigenvalues. |
420 |
02 Dec 05 |
jari |
70 |
|
829 |
24 Mar 07 |
jari |
\return A const reference to the internal vector containing all |
829 |
24 Mar 07 |
jari |
eigenvalues. |
13 |
19 Jun 03 |
daniel |
73 |
*/ |
2323 |
19 Sep 10 |
peter |
74 |
const Vector& eigenvalues(void) const; |
13 |
19 Jun 03 |
daniel |
75 |
|
13 |
19 Jun 03 |
daniel |
76 |
/** |
2323 |
19 Sep 10 |
peter |
\brief Get all eigenvectors in a Matrix. |
13 |
19 Jun 03 |
daniel |
78 |
|
4363 |
08 Sep 23 |
peter |
First row of the returned matrix contains the 1st |
4363 |
08 Sep 23 |
peter |
eigen-vector. Second row contains the 2nd... etc. |
4363 |
08 Sep 23 |
peter |
81 |
|
829 |
24 Mar 07 |
jari |
\return A const reference to the internal matrix containing all |
829 |
24 Mar 07 |
jari |
eigenvectors. |
13 |
19 Jun 03 |
daniel |
84 |
*/ |
2323 |
19 Sep 10 |
peter |
85 |
const Matrix& eigenvectors(void) const; |
13 |
19 Jun 03 |
daniel |
86 |
|
4371 |
14 Sep 23 |
peter |
87 |
/** |
4371 |
14 Sep 23 |
peter |
\return the mean data point. |
4371 |
14 Sep 23 |
peter |
89 |
|
4371 |
14 Sep 23 |
peter |
Size of returned vector is the same as number of rows in data |
4371 |
14 Sep 23 |
peter |
matrix. |
4371 |
14 Sep 23 |
peter |
92 |
|
4371 |
14 Sep 23 |
peter |
\since New in yat 0.22 |
4371 |
14 Sep 23 |
peter |
94 |
*/ |
4371 |
14 Sep 23 |
peter |
95 |
const Vector& mean(void) const; |
4371 |
14 Sep 23 |
peter |
96 |
|
420 |
02 Dec 05 |
jari |
97 |
/** |
420 |
02 Dec 05 |
jari |
This function will project data onto the new coordinate-system |
4351 |
18 Aug 23 |
peter |
where the axes are the calculated eigenvectors. |
420 |
02 Dec 05 |
jari |
Output is presented as coordinates in the N-dimensional room |
420 |
02 Dec 05 |
jari |
spanned by the eigenvectors. |
4369 |
13 Sep 23 |
peter |
102 |
|
4369 |
13 Sep 23 |
peter |
Each column in \c X corresponds to a data point. Likewise, each |
4369 |
13 Sep 23 |
peter |
column in the output Matrix corresponds to a data point and |
4369 |
13 Sep 23 |
peter |
each row corresponds to the corresponding principal component. |
13 |
19 Jun 03 |
daniel |
106 |
*/ |
4369 |
13 Sep 23 |
peter |
107 |
Matrix projection(const MatrixBase& X) const; |
420 |
02 Dec 05 |
jari |
108 |
|
4370 |
13 Sep 23 |
peter |
109 |
/** |
4370 |
13 Sep 23 |
peter |
Same as projection(const MatrixBase&) but for a single data |
4370 |
13 Sep 23 |
peter |
point. Function centralises data by subtracting the mean value |
4370 |
13 Sep 23 |
peter |
and then project onto coordinate system defined by the |
4370 |
13 Sep 23 |
peter |
principal components. |
829 |
24 Mar 07 |
jari |
114 |
|
4370 |
13 Sep 23 |
peter |
\since New in yat 0.22 |
4370 |
13 Sep 23 |
peter |
116 |
*/ |
4370 |
13 Sep 23 |
peter |
117 |
Vector projection(const VectorBase& x) const; |
4370 |
13 Sep 23 |
peter |
118 |
|
4370 |
13 Sep 23 |
peter |
119 |
private: |
4370 |
13 Sep 23 |
peter |
120 |
|
13 |
19 Jun 03 |
daniel |
121 |
/** |
829 |
24 Mar 07 |
jari |
Will perform PCA according to the following scheme: \n |
829 |
24 Mar 07 |
jari |
1: Rowcenter A \n |
829 |
24 Mar 07 |
jari |
2: SVD(A) --> USV' \n |
4200 |
19 Aug 22 |
peter |
3: Calculate eigenvalues according to \n |
829 |
24 Mar 07 |
jari |
\f$ \lambda_{ii} = s_{ii}/N_{rows} \f$ \n |
829 |
24 Mar 07 |
jari |
127 |
*/ |
829 |
24 Mar 07 |
jari |
128 |
void process(void); |
829 |
24 Mar 07 |
jari |
129 |
|
4368 |
12 Sep 23 |
peter |
/// Calculate the mean of each row |
4368 |
12 Sep 23 |
peter |
131 |
void calculate_means(const Matrix& X); |
4373 |
19 Sep 23 |
peter |
132 |
void calculate_scale_factors(const Matrix& X); |
4368 |
12 Sep 23 |
peter |
133 |
|
4373 |
19 Sep 23 |
peter |
// inverse of normalize |
4373 |
19 Sep 23 |
peter |
135 |
void normalize(VectorMutable& x) const; |
4373 |
19 Sep 23 |
peter |
136 |
|
4368 |
12 Sep 23 |
peter |
137 |
/** |
4368 |
12 Sep 23 |
peter |
Function that subtracts the means vector from each column in |
4368 |
12 Sep 23 |
peter |
X. If the means were calculate from X, it means that each row |
4368 |
12 Sep 23 |
peter |
has a mean = 0. |
13 |
19 Jun 03 |
daniel |
141 |
*/ |
4368 |
12 Sep 23 |
peter |
142 |
void row_center(Matrix& X) const; |
4373 |
19 Sep 23 |
peter |
// normalize each row; equivalent to calling normalize for each |
4373 |
19 Sep 23 |
peter |
// column |
4373 |
19 Sep 23 |
peter |
145 |
void row_normalize(Matrix& X) const; |
4373 |
19 Sep 23 |
peter |
146 |
void row_scale(Matrix& X) const; |
13 |
19 Jun 03 |
daniel |
147 |
|
4372 |
14 Sep 23 |
peter |
148 |
Matrix A_; |
4372 |
14 Sep 23 |
peter |
149 |
Vector eigenvalues_; |
4372 |
14 Sep 23 |
peter |
150 |
Matrix eigenvectors_; |
4372 |
14 Sep 23 |
peter |
151 |
Vector meanvalues_; |
4373 |
19 Sep 23 |
peter |
152 |
Vector scale_factors_; |
829 |
24 Mar 07 |
jari |
153 |
}; |
13 |
19 Jun 03 |
daniel |
154 |
|
687 |
16 Oct 06 |
jari |
155 |
}}} // of namespace utility, yat, and theplu |
13 |
19 Jun 03 |
daniel |
156 |
|
13 |
19 Jun 03 |
daniel |
157 |
#endif |