42 |
26 Feb 04 |
jari |
// $Id$ |
42 |
26 Feb 04 |
jari |
2 |
|
675 |
10 Oct 06 |
jari |
3 |
/* |
831 |
27 Mar 07 |
peter |
Copyright (C) 2003 Daniel Dalevi |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2004 Jari Häkkinen |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2005 Jari Häkkinen, Peter Johansson |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2006 Jari Häkkinen |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2012, 2018, 2019, 2022, 2023 Peter Johansson |
675 |
10 Oct 06 |
jari |
10 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
675 |
10 Oct 06 |
jari |
12 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
675 |
10 Oct 06 |
jari |
17 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
675 |
10 Oct 06 |
jari |
General Public License for more details. |
675 |
10 Oct 06 |
jari |
22 |
|
675 |
10 Oct 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
675 |
10 Oct 06 |
jari |
25 |
*/ |
675 |
10 Oct 06 |
jari |
26 |
|
2881 |
18 Nov 12 |
peter |
27 |
#include <config.h> |
2881 |
18 Nov 12 |
peter |
28 |
|
4125 |
14 Jan 22 |
peter |
29 |
#include "MatrixBase.h" |
4125 |
14 Jan 22 |
peter |
30 |
#include "MatrixMutable.h" |
680 |
11 Oct 06 |
jari |
31 |
#include "PCA.h" |
680 |
11 Oct 06 |
jari |
32 |
#include "SVD.h" |
735 |
06 Jan 07 |
peter |
33 |
#include "utility.h" |
2323 |
19 Sep 10 |
peter |
34 |
#include "Vector.h" |
675 |
10 Oct 06 |
jari |
35 |
|
42 |
26 Feb 04 |
jari |
36 |
#include <iostream> |
189 |
21 Oct 04 |
jari |
37 |
#include <cmath> |
42 |
26 Feb 04 |
jari |
38 |
#include <memory> |
3736 |
22 May 18 |
peter |
39 |
#include <utility> |
42 |
26 Feb 04 |
jari |
40 |
|
42 |
26 Feb 04 |
jari |
41 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
42 |
namespace yat { |
616 |
31 Aug 06 |
jari |
43 |
namespace utility { |
42 |
26 Feb 04 |
jari |
44 |
|
13 |
19 Jun 03 |
daniel |
45 |
|
4373 |
19 Sep 23 |
peter |
46 |
PCA::PCA(const MatrixBase& A, bool center, bool scale) |
4373 |
19 Sep 23 |
peter |
47 |
: A_(A), meanvalues_(center ? A_.rows() : 0), |
4373 |
19 Sep 23 |
peter |
48 |
scale_factors_(scale ? A_.rows() : 0) |
703 |
18 Dec 06 |
jari |
49 |
{ |
829 |
24 Mar 07 |
jari |
50 |
process(); |
703 |
18 Dec 06 |
jari |
51 |
} |
703 |
18 Dec 06 |
jari |
52 |
|
703 |
18 Dec 06 |
jari |
53 |
|
4373 |
19 Sep 23 |
peter |
54 |
PCA::PCA(MatrixMutable&& A, bool center, bool scale) |
4373 |
19 Sep 23 |
peter |
55 |
: A_(std::move(A)), meanvalues_(center ? A_.rows() : 0), |
4373 |
19 Sep 23 |
peter |
56 |
scale_factors_(scale ? A_.rows() : 0) |
3736 |
22 May 18 |
peter |
57 |
{ |
3736 |
22 May 18 |
peter |
58 |
process(); |
3736 |
22 May 18 |
peter |
59 |
} |
3736 |
22 May 18 |
peter |
60 |
|
3736 |
22 May 18 |
peter |
61 |
|
4368 |
12 Sep 23 |
peter |
62 |
void PCA::calculate_means(const Matrix& A_center) |
4368 |
12 Sep 23 |
peter |
63 |
{ |
4368 |
12 Sep 23 |
peter |
64 |
for (size_t i=0; i<meanvalues_.size(); ++i) |
4368 |
12 Sep 23 |
peter |
65 |
meanvalues_(i) = sum(A_.row_const_view(i)) / A_.columns(); |
4368 |
12 Sep 23 |
peter |
66 |
} |
4368 |
12 Sep 23 |
peter |
67 |
|
4368 |
12 Sep 23 |
peter |
68 |
|
4373 |
19 Sep 23 |
peter |
69 |
void PCA::calculate_scale_factors(const Matrix& X) |
4373 |
19 Sep 23 |
peter |
70 |
{ |
4373 |
19 Sep 23 |
peter |
71 |
for (size_t i=0; i<scale_factors_.size(); ++i) { |
4373 |
19 Sep 23 |
peter |
72 |
assert(i < X.rows()); |
4373 |
19 Sep 23 |
peter |
73 |
VectorConstView x = X.row_const_view(i); |
4373 |
19 Sep 23 |
peter |
74 |
scale_factors_(i) = std::sqrt((x * x)/x.size()); |
4373 |
19 Sep 23 |
peter |
75 |
} |
4373 |
19 Sep 23 |
peter |
76 |
} |
4373 |
19 Sep 23 |
peter |
77 |
|
4373 |
19 Sep 23 |
peter |
78 |
|
2323 |
19 Sep 10 |
peter |
79 |
const Vector& PCA::eigenvalues(void) const |
715 |
22 Dec 06 |
jari |
80 |
{ |
829 |
24 Mar 07 |
jari |
81 |
return eigenvalues_; |
715 |
22 Dec 06 |
jari |
82 |
} |
715 |
22 Dec 06 |
jari |
83 |
|
715 |
22 Dec 06 |
jari |
84 |
|
2323 |
19 Sep 10 |
peter |
85 |
const Matrix& PCA::eigenvectors(void) const |
715 |
22 Dec 06 |
jari |
86 |
{ |
829 |
24 Mar 07 |
jari |
87 |
return eigenvectors_; |
715 |
22 Dec 06 |
jari |
88 |
} |
715 |
22 Dec 06 |
jari |
89 |
|
715 |
22 Dec 06 |
jari |
90 |
|
4371 |
14 Sep 23 |
peter |
91 |
const Vector& PCA::mean(void) const |
4371 |
14 Sep 23 |
peter |
92 |
{ |
4371 |
14 Sep 23 |
peter |
93 |
return meanvalues_; |
4371 |
14 Sep 23 |
peter |
94 |
} |
4371 |
14 Sep 23 |
peter |
95 |
|
4371 |
14 Sep 23 |
peter |
96 |
|
4373 |
19 Sep 23 |
peter |
97 |
void PCA::normalize(VectorMutable& x) const |
4373 |
19 Sep 23 |
peter |
98 |
{ |
4373 |
19 Sep 23 |
peter |
99 |
if (meanvalues_.size()) |
4373 |
19 Sep 23 |
peter |
100 |
x -= meanvalues_; |
4373 |
19 Sep 23 |
peter |
101 |
if (scale_factors_.size()) |
4373 |
19 Sep 23 |
peter |
102 |
x.div(scale_factors_); |
4373 |
19 Sep 23 |
peter |
103 |
} |
4373 |
19 Sep 23 |
peter |
104 |
|
4373 |
19 Sep 23 |
peter |
105 |
|
616 |
31 Aug 06 |
jari |
106 |
void PCA::process(void) |
616 |
31 Aug 06 |
jari |
107 |
{ |
4369 |
13 Sep 23 |
peter |
108 |
size_t n = A_.columns(); |
4373 |
19 Sep 23 |
peter |
109 |
Matrix A_normalized(A_); |
4368 |
12 Sep 23 |
peter |
110 |
|
4373 |
19 Sep 23 |
peter |
111 |
if (meanvalues_.size()) { |
4373 |
19 Sep 23 |
peter |
112 |
calculate_means(A_normalized); |
4373 |
19 Sep 23 |
peter |
// Row-center the data matrix |
4373 |
19 Sep 23 |
peter |
114 |
row_center(A_normalized); |
4373 |
19 Sep 23 |
peter |
115 |
} |
4373 |
19 Sep 23 |
peter |
116 |
if (scale_factors_.size()) { |
4373 |
19 Sep 23 |
peter |
117 |
calculate_scale_factors(A_normalized); |
4373 |
19 Sep 23 |
peter |
118 |
row_scale(A_normalized); |
4373 |
19 Sep 23 |
peter |
119 |
} |
13 |
19 Jun 03 |
daniel |
120 |
|
4373 |
19 Sep 23 |
peter |
121 |
if (A_normalized.rows() < A_normalized.columns()) { |
4373 |
19 Sep 23 |
peter |
122 |
A_normalized.transpose(); |
4385 |
13 Oct 23 |
peter |
123 |
SVD svd(std::move(A_normalized)); |
4363 |
08 Sep 23 |
peter |
124 |
svd.decompose(); |
4369 |
13 Sep 23 |
peter |
125 |
|
4369 |
13 Sep 23 |
peter |
// A' = U S V' |
4369 |
13 Sep 23 |
peter |
// n*Cov = AA' = (USV')'USV' = VSU'USV' = VSSV' = VDV' |
4369 |
13 Sep 23 |
peter |
128 |
eigenvectors_ = transpose(svd.V()); |
4369 |
13 Sep 23 |
peter |
129 |
eigenvalues_ = svd.s(); |
4369 |
13 Sep 23 |
peter |
130 |
eigenvalues_.mul(eigenvalues_); |
4369 |
13 Sep 23 |
peter |
131 |
eigenvalues_ *= 1.0/n; |
4363 |
08 Sep 23 |
peter |
132 |
return; |
4363 |
08 Sep 23 |
peter |
133 |
} |
4363 |
08 Sep 23 |
peter |
134 |
|
616 |
31 Aug 06 |
jari |
// Single value decompose the data matrix |
4385 |
13 Oct 23 |
peter |
136 |
SVD svd(std::move(A_normalized)); |
4296 |
04 Feb 23 |
peter |
137 |
svd.decompose(); |
13 |
19 Jun 03 |
daniel |
138 |
|
4369 |
13 Sep 23 |
peter |
// Each column corresponds to a datapoint, which means each each |
4369 |
13 Sep 23 |
peter |
// row corresponds to a dimension/variable. |
4369 |
13 Sep 23 |
peter |
141 |
// |
4369 |
13 Sep 23 |
peter |
// A = U S V' |
4369 |
13 Sep 23 |
peter |
// n*Cov = AA' = U S V' (USV')' = USV'VSU' = USSU' = UDU' |
4369 |
13 Sep 23 |
peter |
// where n is number of data points, i.e., number of rows in A. |
4369 |
13 Sep 23 |
peter |
145 |
|
616 |
31 Aug 06 |
jari |
// Read the eigenvectors and eigenvalues |
4296 |
04 Feb 23 |
peter |
147 |
eigenvectors_ = transpose(svd.U()); |
4296 |
04 Feb 23 |
peter |
148 |
eigenvalues_ = svd.s(); |
4296 |
04 Feb 23 |
peter |
149 |
eigenvalues_.mul(eigenvalues_); |
4369 |
13 Sep 23 |
peter |
150 |
eigenvalues_ *= 1.0/n; |
616 |
31 Aug 06 |
jari |
151 |
} |
13 |
19 Jun 03 |
daniel |
152 |
|
13 |
19 Jun 03 |
daniel |
153 |
|
4125 |
14 Jan 22 |
peter |
154 |
Matrix PCA::projection(const MatrixBase& samples ) const |
616 |
31 Aug 06 |
jari |
155 |
{ |
4297 |
04 Feb 23 |
peter |
156 |
assert(samples.rows() == meanvalues_.size()); |
4373 |
19 Sep 23 |
peter |
157 |
Matrix normalized(samples); |
4373 |
19 Sep 23 |
peter |
158 |
row_normalize(normalized); |
4373 |
19 Sep 23 |
peter |
159 |
return eigenvectors_ * normalized; |
616 |
31 Aug 06 |
jari |
160 |
} |
13 |
19 Jun 03 |
daniel |
161 |
|
13 |
19 Jun 03 |
daniel |
162 |
|
4370 |
13 Sep 23 |
peter |
163 |
Vector PCA::projection(const VectorBase& x) const |
4370 |
13 Sep 23 |
peter |
164 |
{ |
4370 |
13 Sep 23 |
peter |
165 |
assert(x.size() == meanvalues_.size()); |
4370 |
13 Sep 23 |
peter |
166 |
assert(x.size() == eigenvectors_.columns()); |
4373 |
19 Sep 23 |
peter |
167 |
Vector z(x); |
4373 |
19 Sep 23 |
peter |
168 |
normalize(z); |
4373 |
19 Sep 23 |
peter |
169 |
return eigenvectors_ * z; |
4370 |
13 Sep 23 |
peter |
170 |
} |
4370 |
13 Sep 23 |
peter |
171 |
|
4370 |
13 Sep 23 |
peter |
172 |
|
829 |
24 Mar 07 |
jari |
// This function will row-center the matrix A_, |
829 |
24 Mar 07 |
jari |
// that is, A_ = A_ - M, where M is a matrix |
829 |
24 Mar 07 |
jari |
// with the meanvalues of each row |
4373 |
19 Sep 23 |
peter |
176 |
void PCA::row_center(Matrix& X) const |
616 |
31 Aug 06 |
jari |
177 |
{ |
4373 |
19 Sep 23 |
peter |
178 |
for (size_t i=0; i<meanvalues_.size(); ++i) |
4373 |
19 Sep 23 |
peter |
179 |
X.row_view(i) -= meanvalues_(i); |
616 |
31 Aug 06 |
jari |
180 |
} |
15 |
25 Jun 03 |
daniel |
181 |
|
4373 |
19 Sep 23 |
peter |
182 |
|
4373 |
19 Sep 23 |
peter |
183 |
void PCA::row_normalize(Matrix& X) const |
4373 |
19 Sep 23 |
peter |
184 |
{ |
4373 |
19 Sep 23 |
peter |
185 |
if (meanvalues_.size()) |
4373 |
19 Sep 23 |
peter |
186 |
row_center(X); |
4373 |
19 Sep 23 |
peter |
187 |
if (scale_factors_.size()) |
4373 |
19 Sep 23 |
peter |
188 |
row_scale(X); |
4373 |
19 Sep 23 |
peter |
189 |
} |
4373 |
19 Sep 23 |
peter |
190 |
|
4373 |
19 Sep 23 |
peter |
191 |
|
4373 |
19 Sep 23 |
peter |
192 |
void PCA::row_scale(Matrix& X) const |
4373 |
19 Sep 23 |
peter |
193 |
{ |
4373 |
19 Sep 23 |
peter |
194 |
for (size_t i=0; i<scale_factors_.size(); ++i) |
4373 |
19 Sep 23 |
peter |
195 |
X.row_view(i) *= 1.0 / scale_factors_(i); |
4373 |
19 Sep 23 |
peter |
196 |
} |
4373 |
19 Sep 23 |
peter |
197 |
|
687 |
16 Oct 06 |
jari |
198 |
}}} // of namespace utility, yat, and theplu |