69 |
11 Feb 06 |
jari |
// $Id$ |
69 |
11 Feb 06 |
jari |
2 |
|
95 |
05 Apr 06 |
jari |
3 |
/* |
95 |
05 Apr 06 |
jari |
Copyright (C) 2004 Jari Häkkinen |
95 |
05 Apr 06 |
jari |
Copyright (C) 2005 Jari Häkkinen, Peter Johansson |
95 |
05 Apr 06 |
jari |
Copyright (C) 2006 Jari Häkkinen |
95 |
05 Apr 06 |
jari |
7 |
|
95 |
05 Apr 06 |
jari |
This file is part of the thep c++ tools library, |
95 |
05 Apr 06 |
jari |
http://lev.thep.lu.se/trac/c++_tools |
95 |
05 Apr 06 |
jari |
10 |
|
95 |
05 Apr 06 |
jari |
The c++ tools library is free software; you can redistribute it |
95 |
05 Apr 06 |
jari |
and/or modify it under the terms of the GNU General Public License |
824 |
26 Nov 08 |
jari |
as published by the Free Software Foundation; either version 3 of |
95 |
05 Apr 06 |
jari |
the License, or (at your option) any later version. |
95 |
05 Apr 06 |
jari |
15 |
|
95 |
05 Apr 06 |
jari |
The c++ tools library is distributed in the hope that it will be |
95 |
05 Apr 06 |
jari |
useful, but WITHOUT ANY WARRANTY; without even the implied warranty |
95 |
05 Apr 06 |
jari |
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
95 |
05 Apr 06 |
jari |
General Public License for more details. |
95 |
05 Apr 06 |
jari |
20 |
|
95 |
05 Apr 06 |
jari |
You should have received a copy of the GNU General Public License |
824 |
26 Nov 08 |
jari |
along with WeNNI. If not, see <http://www.gnu.org/licenses/>. |
95 |
05 Apr 06 |
jari |
23 |
*/ |
95 |
05 Apr 06 |
jari |
24 |
|
69 |
11 Feb 06 |
jari |
25 |
#ifndef _theplu_utility_nni_ |
69 |
11 Feb 06 |
jari |
26 |
#define _theplu_utility_nni_ |
69 |
11 Feb 06 |
jari |
27 |
|
69 |
11 Feb 06 |
jari |
28 |
#include <iostream> |
69 |
11 Feb 06 |
jari |
29 |
#include <utility> |
69 |
11 Feb 06 |
jari |
30 |
#include <vector> |
69 |
11 Feb 06 |
jari |
31 |
|
69 |
11 Feb 06 |
jari |
32 |
#include <c++_tools/gslapi/matrix.h> |
69 |
11 Feb 06 |
jari |
33 |
|
69 |
11 Feb 06 |
jari |
34 |
namespace theplu { |
69 |
11 Feb 06 |
jari |
35 |
namespace utility { |
69 |
11 Feb 06 |
jari |
36 |
|
69 |
11 Feb 06 |
jari |
37 |
/// |
69 |
11 Feb 06 |
jari |
/// NNI is an abstract base class defining the interface for nearest |
69 |
11 Feb 06 |
jari |
/// neighbour imputation (NNI) algorithms. |
69 |
11 Feb 06 |
jari |
40 |
/// |
69 |
11 Feb 06 |
jari |
/// NNI algorithms implemented here is discussed in documents |
69 |
11 Feb 06 |
jari |
/// created in the WeNNI project. This document will be released for |
69 |
11 Feb 06 |
jari |
/// public access, and the necessary information for retrieving that |
69 |
11 Feb 06 |
jari |
/// document will be provided here. |
69 |
11 Feb 06 |
jari |
45 |
/// |
69 |
11 Feb 06 |
jari |
/// Short introduction to NNI is that one may want to improve |
69 |
11 Feb 06 |
jari |
/// (correct) uncertain data. Here, the data to be imputed is stored in a |
69 |
11 Feb 06 |
jari |
/// matrix where rows similar to each other are used to adjust |
69 |
11 Feb 06 |
jari |
/// uncertain data. The data matrix is accompanied by a weight |
69 |
11 Feb 06 |
jari |
/// (uncertainty) matrix defining what data is to be considered as |
69 |
11 Feb 06 |
jari |
/// 'certain' and what data is uncertain. The weight matrix can be |
69 |
11 Feb 06 |
jari |
/// binary with 1's indicating that the data does not need |
69 |
11 Feb 06 |
jari |
/// corrections, whereas a 0 means that the data should be replaced |
69 |
11 Feb 06 |
jari |
/// by an imputed value. Naturally, the weight matrix can also be |
69 |
11 Feb 06 |
jari |
/// continuous where values between 0 and 1 defines how certain a |
69 |
11 Feb 06 |
jari |
/// data element is. |
69 |
11 Feb 06 |
jari |
57 |
/// |
69 |
11 Feb 06 |
jari |
/// The imputation depends on how similarity of rows of data is |
69 |
11 Feb 06 |
jari |
/// defined and on the number of closest neighbours (here; rows) to |
69 |
11 Feb 06 |
jari |
/// use in the imputation can be set. |
69 |
11 Feb 06 |
jari |
61 |
/// |
69 |
11 Feb 06 |
jari |
/// Implementation issues |
69 |
11 Feb 06 |
jari |
63 |
/// |
69 |
11 Feb 06 |
jari |
/// The current implementation treats rows where all data are tagged |
69 |
11 Feb 06 |
jari |
/// are completely uncertain, i.e. all weights are zero, by |
69 |
11 Feb 06 |
jari |
/// ignoring these lines in nearest neighbourhood |
69 |
11 Feb 06 |
jari |
/// calculations. Importantly, this type of data are not changed |
69 |
11 Feb 06 |
jari |
/// (imputed) either since there is no close neighbourhood defined |
69 |
11 Feb 06 |
jari |
/// for this data. |
69 |
11 Feb 06 |
jari |
70 |
/// |
69 |
11 Feb 06 |
jari |
/// Rows that is completely identical in an imputation algorithm |
69 |
11 Feb 06 |
jari |
/// sense will give problems since the distance between will usually |
69 |
11 Feb 06 |
jari |
/// become zero. This is solved by setting zero distance to a small |
69 |
11 Feb 06 |
jari |
/// number. Identical rows in this context are basically a |
69 |
11 Feb 06 |
jari |
/// comparison between elements with non-zero uncertainty weights |
69 |
11 Feb 06 |
jari |
/// only, and all these elements are equal. Zero weight elements are |
69 |
11 Feb 06 |
jari |
/// not used in the comparison since these are considered as |
69 |
11 Feb 06 |
jari |
/// non/sense values. |
69 |
11 Feb 06 |
jari |
79 |
/// |
69 |
11 Feb 06 |
jari |
80 |
class NNI |
69 |
11 Feb 06 |
jari |
81 |
{ |
69 |
11 Feb 06 |
jari |
82 |
public: |
69 |
11 Feb 06 |
jari |
83 |
|
69 |
11 Feb 06 |
jari |
84 |
/// |
69 |
11 Feb 06 |
jari |
/// Base constructor for the nearest neighbour imputation |
69 |
11 Feb 06 |
jari |
/// algorithms. |
69 |
11 Feb 06 |
jari |
87 |
/// |
69 |
11 Feb 06 |
jari |
88 |
NNI(const gslapi::matrix& matrix,const gslapi::matrix& weight, |
819 |
24 Nov 08 |
jari |
89 |
const unsigned int neighbours); |
69 |
11 Feb 06 |
jari |
90 |
|
69 |
11 Feb 06 |
jari |
91 |
virtual ~NNI(void) {}; |
69 |
11 Feb 06 |
jari |
92 |
|
69 |
11 Feb 06 |
jari |
93 |
/// |
69 |
11 Feb 06 |
jari |
/// Function doing the imputation. |
69 |
11 Feb 06 |
jari |
95 |
/// |
69 |
11 Feb 06 |
jari |
/// @return number of rows not imputed |
69 |
11 Feb 06 |
jari |
97 |
/// |
819 |
24 Nov 08 |
jari |
98 |
virtual unsigned int estimate(void)=0; |
69 |
11 Feb 06 |
jari |
99 |
|
69 |
11 Feb 06 |
jari |
100 |
/// |
69 |
11 Feb 06 |
jari |
/// @return A const reference to the modified data. |
69 |
11 Feb 06 |
jari |
102 |
/// |
69 |
11 Feb 06 |
jari |
103 |
const gslapi::matrix& imputed_data(void) const { return imputed_data_; } |
69 |
11 Feb 06 |
jari |
104 |
|
69 |
11 Feb 06 |
jari |
105 |
/// |
69 |
11 Feb 06 |
jari |
/// @return indices of rows in data matrix not imputed |
69 |
11 Feb 06 |
jari |
107 |
/// |
69 |
11 Feb 06 |
jari |
108 |
inline std::vector<size_t> not_imputed(void) const { return not_imputed_; } |
69 |
11 Feb 06 |
jari |
109 |
|
69 |
11 Feb 06 |
jari |
110 |
protected: |
819 |
24 Nov 08 |
jari |
111 |
std::vector<std::pair<unsigned int,double> > calculate_distances(const unsigned int) const; |
819 |
24 Nov 08 |
jari |
112 |
std::vector<unsigned int> nearest_neighbours(const unsigned int, |
819 |
24 Nov 08 |
jari |
113 |
const std::vector<std::pair<unsigned int,double> >&) const; |
69 |
11 Feb 06 |
jari |
114 |
|
69 |
11 Feb 06 |
jari |
115 |
const gslapi::matrix& data_; |
69 |
11 Feb 06 |
jari |
116 |
gslapi::matrix imputed_data_; |
819 |
24 Nov 08 |
jari |
117 |
unsigned int neighbours_; |
69 |
11 Feb 06 |
jari |
118 |
std::vector<size_t> not_imputed_; |
69 |
11 Feb 06 |
jari |
119 |
const gslapi::matrix& weight_; |
69 |
11 Feb 06 |
jari |
120 |
}; |
69 |
11 Feb 06 |
jari |
121 |
|
69 |
11 Feb 06 |
jari |
122 |
}} // of namespace utility and namespace theplu |
69 |
11 Feb 06 |
jari |
123 |
|
69 |
11 Feb 06 |
jari |
124 |
#endif |