680 |
11 Oct 06 |
jari |
1 |
#ifndef _theplu_yat_utility_nni_ |
4125 |
14 Jan 22 |
peter |
2 |
#define _theplu_yat_utility_nni_ |
616 |
31 Aug 06 |
jari |
3 |
|
145 |
05 Sep 04 |
jari |
// $Id$ |
145 |
05 Sep 04 |
jari |
5 |
|
570 |
05 Apr 06 |
jari |
6 |
/* |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2004 Jari Häkkinen |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2009 Jari Häkkinen |
4207 |
26 Aug 22 |
peter |
Copyright (C) 2022 Peter Johansson |
570 |
05 Apr 06 |
jari |
13 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
570 |
05 Apr 06 |
jari |
15 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
570 |
05 Apr 06 |
jari |
20 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
570 |
05 Apr 06 |
jari |
General Public License for more details. |
570 |
05 Apr 06 |
jari |
25 |
|
570 |
05 Apr 06 |
jari |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
570 |
05 Apr 06 |
jari |
28 |
*/ |
570 |
05 Apr 06 |
jari |
29 |
|
1121 |
22 Feb 08 |
peter |
30 |
#include "Matrix.h" |
675 |
10 Oct 06 |
jari |
31 |
|
145 |
05 Sep 04 |
jari |
32 |
#include <utility> |
145 |
05 Sep 04 |
jari |
33 |
#include <vector> |
145 |
05 Sep 04 |
jari |
34 |
|
145 |
05 Sep 04 |
jari |
35 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
36 |
namespace yat { |
301 |
30 Apr 05 |
peter |
37 |
namespace utility { |
145 |
05 Sep 04 |
jari |
38 |
|
145 |
05 Sep 04 |
jari |
39 |
/// |
767 |
22 Feb 07 |
peter |
/// @brief Interface class for nearest |
177 |
01 Oct 04 |
jari |
/// neighbour imputation (NNI) algorithms. |
145 |
05 Sep 04 |
jari |
42 |
/// |
177 |
01 Oct 04 |
jari |
/// NNI algorithms implemented here is discussed in documents |
177 |
01 Oct 04 |
jari |
/// created in the WeNNI project. This document will be released for |
177 |
01 Oct 04 |
jari |
/// public access, and the necessary information for retrieving that |
177 |
01 Oct 04 |
jari |
/// document will be provided here. |
177 |
01 Oct 04 |
jari |
47 |
/// |
177 |
01 Oct 04 |
jari |
/// Short introduction to NNI is that one may want to improve |
177 |
01 Oct 04 |
jari |
/// (correct) uncertain data. Here, the data to be imputed is stored in a |
177 |
01 Oct 04 |
jari |
/// matrix where rows similar to each other are used to adjust |
177 |
01 Oct 04 |
jari |
/// uncertain data. The data matrix is accompanied by a weight |
177 |
01 Oct 04 |
jari |
/// (uncertainty) matrix defining what data is to be considered as |
177 |
01 Oct 04 |
jari |
/// 'certain' and what data is uncertain. The weight matrix can be |
177 |
01 Oct 04 |
jari |
/// binary with 1's indicating that the data does not need |
177 |
01 Oct 04 |
jari |
/// corrections, whereas a 0 means that the data should be replaced |
177 |
01 Oct 04 |
jari |
/// by an imputed value. Naturally, the weight matrix can also be |
178 |
01 Oct 04 |
jari |
/// continuous where values between 0 and 1 defines how certain a |
177 |
01 Oct 04 |
jari |
/// data element is. |
177 |
01 Oct 04 |
jari |
59 |
/// |
177 |
01 Oct 04 |
jari |
/// The imputation depends on how similarity of rows of data is |
177 |
01 Oct 04 |
jari |
/// defined and on the number of closest neighbours (here; rows) to |
177 |
01 Oct 04 |
jari |
/// use in the imputation can be set. |
177 |
01 Oct 04 |
jari |
63 |
/// |
177 |
01 Oct 04 |
jari |
/// Implementation issues |
177 |
01 Oct 04 |
jari |
65 |
/// |
178 |
01 Oct 04 |
jari |
/// The current implementation treats rows where all data are tagged |
178 |
01 Oct 04 |
jari |
/// are completely uncertain, i.e. all weights are zero, by |
177 |
01 Oct 04 |
jari |
/// ignoring these lines in nearest neighbourhood |
177 |
01 Oct 04 |
jari |
/// calculations. Importantly, this type of data are not changed |
177 |
01 Oct 04 |
jari |
/// (imputed) either since there is no close neighbourhood defined |
177 |
01 Oct 04 |
jari |
/// for this data. |
177 |
01 Oct 04 |
jari |
72 |
/// |
177 |
01 Oct 04 |
jari |
/// Rows that is completely identical in an imputation algorithm |
177 |
01 Oct 04 |
jari |
/// sense will give problems since the distance between will usually |
177 |
01 Oct 04 |
jari |
/// become zero. This is solved by setting zero distance to a small |
177 |
01 Oct 04 |
jari |
/// number. Identical rows in this context are basically a |
177 |
01 Oct 04 |
jari |
/// comparison between elements with non-zero uncertainty weights |
178 |
01 Oct 04 |
jari |
/// only, and all these elements are equal. Zero weight elements are |
177 |
01 Oct 04 |
jari |
/// not used in the comparison since these are considered as |
177 |
01 Oct 04 |
jari |
/// non/sense values. |
177 |
01 Oct 04 |
jari |
81 |
/// |
145 |
05 Sep 04 |
jari |
82 |
class NNI |
145 |
05 Sep 04 |
jari |
83 |
{ |
145 |
05 Sep 04 |
jari |
84 |
public: |
178 |
01 Oct 04 |
jari |
85 |
|
178 |
01 Oct 04 |
jari |
86 |
/// |
177 |
01 Oct 04 |
jari |
/// Base constructor for the nearest neighbour imputation |
177 |
01 Oct 04 |
jari |
/// algorithms. |
178 |
01 Oct 04 |
jari |
89 |
/// |
4125 |
14 Jan 22 |
peter |
90 |
NNI(const utility::MatrixBase& matrix,const utility::MatrixBase& weight, |
1271 |
09 Apr 08 |
peter |
91 |
const unsigned int neighbours); |
145 |
05 Sep 04 |
jari |
92 |
|
146 |
08 Sep 04 |
jari |
93 |
virtual ~NNI(void) {}; |
145 |
05 Sep 04 |
jari |
94 |
|
1726 |
15 Jan 09 |
jari |
95 |
/** |
1726 |
15 Jan 09 |
jari |
\brief Function doing the imputation. |
1726 |
15 Jan 09 |
jari |
97 |
|
1726 |
15 Jan 09 |
jari |
The return value can be used as an indication of how well the |
1726 |
15 Jan 09 |
jari |
imputation worked. The return value should be zero if proper |
1726 |
15 Jan 09 |
jari |
pre-processing of data is done. An example of bad data is a |
1726 |
15 Jan 09 |
jari |
matrix with a column of zero weights, another is a |
1726 |
15 Jan 09 |
jari |
corresponding situation with a row with all weights zero. |
1726 |
15 Jan 09 |
jari |
103 |
|
1726 |
15 Jan 09 |
jari |
\return The number of rows that have at least one value not |
1726 |
15 Jan 09 |
jari |
imputed. |
1726 |
15 Jan 09 |
jari |
106 |
*/ |
1271 |
09 Apr 08 |
peter |
107 |
virtual unsigned int estimate(void)=0; |
228 |
01 Feb 05 |
peter |
108 |
|
228 |
01 Feb 05 |
peter |
109 |
/// |
177 |
01 Oct 04 |
jari |
/// @return A const reference to the modified data. |
178 |
01 Oct 04 |
jari |
111 |
/// |
1121 |
22 Feb 08 |
peter |
112 |
const utility::Matrix& imputed_data(void) const; |
172 |
28 Sep 04 |
jari |
113 |
|
228 |
01 Feb 05 |
peter |
114 |
/// |
228 |
01 Feb 05 |
peter |
/// @return indices of rows in data matrix not imputed |
228 |
01 Feb 05 |
peter |
116 |
/// |
718 |
26 Dec 06 |
jari |
117 |
const std::vector<size_t>& not_imputed(void) const; |
228 |
01 Feb 05 |
peter |
118 |
|
145 |
05 Sep 04 |
jari |
119 |
protected: |
649 |
15 Sep 06 |
jari |
120 |
/** |
649 |
15 Sep 06 |
jari |
\f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2 |
649 |
15 Sep 06 |
jari |
}{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns |
649 |
15 Sep 06 |
jari |
123 |
*/ |
4200 |
19 Aug 22 |
peter |
124 |
std::vector<std::pair<size_t,double> > |
1271 |
09 Apr 08 |
peter |
125 |
calculate_distances(const size_t) const; |
648 |
14 Sep 06 |
peter |
/// Contributing nearest neighbours are added up to the user set |
648 |
14 Sep 06 |
peter |
/// number, and neighbours are disqualified if their element |
648 |
14 Sep 06 |
peter |
/// (column) weight is zero |
4200 |
19 Aug 22 |
peter |
129 |
std::vector<size_t> |
1271 |
09 Apr 08 |
peter |
130 |
nearest_neighbours(const size_t, |
1271 |
09 Apr 08 |
peter |
131 |
const std::vector<std::pair<size_t,double> >&) const; |
648 |
14 Sep 06 |
peter |
132 |
/// |
648 |
14 Sep 06 |
peter |
/// original data matrix |
648 |
14 Sep 06 |
peter |
134 |
/// |
4125 |
14 Jan 22 |
peter |
135 |
const utility::MatrixBase& data_; |
145 |
05 Sep 04 |
jari |
136 |
|
648 |
14 Sep 06 |
peter |
137 |
/// |
648 |
14 Sep 06 |
peter |
/// data after imputation |
648 |
14 Sep 06 |
peter |
139 |
/// |
1121 |
22 Feb 08 |
peter |
140 |
utility::Matrix imputed_data_; |
648 |
14 Sep 06 |
peter |
141 |
|
648 |
14 Sep 06 |
peter |
142 |
/// |
648 |
14 Sep 06 |
peter |
/// number of neighbor to use |
648 |
14 Sep 06 |
peter |
144 |
/// |
1271 |
09 Apr 08 |
peter |
145 |
unsigned int neighbours_; |
648 |
14 Sep 06 |
peter |
146 |
|
648 |
14 Sep 06 |
peter |
147 |
/// |
648 |
14 Sep 06 |
peter |
/// which rows are not imputed due to lack of data |
648 |
14 Sep 06 |
peter |
149 |
/// |
376 |
07 Aug 05 |
jari |
150 |
std::vector<size_t> not_imputed_; |
648 |
14 Sep 06 |
peter |
151 |
|
648 |
14 Sep 06 |
peter |
152 |
/// |
648 |
14 Sep 06 |
peter |
/// weight matrix |
648 |
14 Sep 06 |
peter |
154 |
/// |
4125 |
14 Jan 22 |
peter |
155 |
const utility::MatrixBase& weight_; |
145 |
05 Sep 04 |
jari |
156 |
}; |
145 |
05 Sep 04 |
jari |
157 |
|
687 |
16 Oct 06 |
jari |
158 |
}}} // of namespace utility, yat, and theplu |
145 |
05 Sep 04 |
jari |
159 |
|
145 |
05 Sep 04 |
jari |
160 |
#endif |