4200 |
19 Aug 22 |
peter |
1 |
#ifndef _theplu_yat_classifier_subset_generator_ |
4200 |
19 Aug 22 |
peter |
2 |
#define _theplu_yat_classifier_subset_generator_ |
613 |
30 Aug 06 |
markus |
3 |
|
613 |
30 Aug 06 |
markus |
// $Id$ |
613 |
30 Aug 06 |
markus |
5 |
|
613 |
30 Aug 06 |
markus |
6 |
/* |
2119 |
12 Dec 09 |
peter |
Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2009, 2010 Peter Johansson |
613 |
30 Aug 06 |
markus |
11 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
613 |
30 Aug 06 |
markus |
13 |
|
675 |
10 Oct 06 |
jari |
The yat library is free software; you can redistribute it and/or |
675 |
10 Oct 06 |
jari |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
675 |
10 Oct 06 |
jari |
License, or (at your option) any later version. |
613 |
30 Aug 06 |
markus |
18 |
|
675 |
10 Oct 06 |
jari |
The yat library is distributed in the hope that it will be useful, |
675 |
10 Oct 06 |
jari |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
675 |
10 Oct 06 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
613 |
30 Aug 06 |
markus |
General Public License for more details. |
613 |
30 Aug 06 |
markus |
23 |
|
613 |
30 Aug 06 |
markus |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
613 |
30 Aug 06 |
markus |
26 |
*/ |
613 |
30 Aug 06 |
markus |
27 |
|
1072 |
12 Feb 08 |
peter |
28 |
#include "FeatureSelector.h" |
1072 |
12 Feb 08 |
peter |
29 |
#include "KernelLookup.h" |
1072 |
12 Feb 08 |
peter |
30 |
#include "MatrixLookup.h" |
1072 |
12 Feb 08 |
peter |
31 |
#include "MatrixLookupWeighted.h" |
680 |
11 Oct 06 |
jari |
32 |
#include "Target.h" |
680 |
11 Oct 06 |
jari |
33 |
#include "Sampler.h" |
1134 |
23 Feb 08 |
peter |
34 |
#include "yat/utility/Index.h" |
1086 |
14 Feb 08 |
peter |
35 |
#include "yat/utility/yat_assert.h" |
675 |
10 Oct 06 |
jari |
36 |
|
1072 |
12 Feb 08 |
peter |
37 |
#include <algorithm> |
1072 |
12 Feb 08 |
peter |
38 |
#include <utility> |
1072 |
12 Feb 08 |
peter |
39 |
#include <typeinfo> |
615 |
31 Aug 06 |
peter |
40 |
#include <vector> |
613 |
30 Aug 06 |
markus |
41 |
|
613 |
30 Aug 06 |
markus |
42 |
namespace theplu { |
680 |
11 Oct 06 |
jari |
43 |
namespace yat { |
4200 |
19 Aug 22 |
peter |
44 |
namespace classifier { |
613 |
30 Aug 06 |
markus |
45 |
/// |
4200 |
19 Aug 22 |
peter |
/// @brief Class splitting Data into training and validation set. |
767 |
22 Feb 07 |
peter |
47 |
/// |
1220 |
11 Mar 08 |
peter |
/// A SubsetGenerator splits a Data into several training and |
1220 |
11 Mar 08 |
peter |
/// validation data. A Sampler is used to select samples for a |
1220 |
11 Mar 08 |
peter |
/// training Data set and a validation Data set, respectively. In |
1220 |
11 Mar 08 |
peter |
/// addition a FeatureSelector can be used to select Features. For |
1220 |
11 Mar 08 |
peter |
/// more details see constructors. |
4200 |
19 Aug 22 |
peter |
53 |
/// |
1220 |
11 Mar 08 |
peter |
/// \note Data must be one of MatrixLookup, MatrixLookupWeighted, or |
1220 |
11 Mar 08 |
peter |
/// KernelLookup. |
1220 |
11 Mar 08 |
peter |
56 |
/// |
4200 |
19 Aug 22 |
peter |
57 |
template <typename Data> |
1072 |
12 Feb 08 |
peter |
58 |
class SubsetGenerator |
613 |
30 Aug 06 |
markus |
59 |
{ |
1079 |
13 Feb 08 |
peter |
60 |
public: |
1125 |
22 Feb 08 |
peter |
61 |
/** |
1220 |
11 Mar 08 |
peter |
type of Data that is stored in SubsetGenerator |
1125 |
22 Feb 08 |
peter |
63 |
*/ |
1220 |
11 Mar 08 |
peter |
64 |
typedef Data value_type; |
1072 |
12 Feb 08 |
peter |
65 |
|
613 |
30 Aug 06 |
markus |
66 |
/// |
1220 |
11 Mar 08 |
peter |
/// @brief Create SubDataSets |
4200 |
19 Aug 22 |
peter |
68 |
/// |
1220 |
11 Mar 08 |
peter |
/// Creates N training data sets and N validation data sets, where |
1220 |
11 Mar 08 |
peter |
/// N equals the size of \a sampler. Data must be one of |
1220 |
11 Mar 08 |
peter |
/// MatrixLookup, MatrixLookupWeighted, or KernelLookup. |
615 |
31 Aug 06 |
peter |
72 |
/// |
1220 |
11 Mar 08 |
peter |
/// In case of MatrixLookup or MatrixLookupWeighted, each column |
1220 |
11 Mar 08 |
peter |
/// corresponds to a sample and the \a sampler is used to select |
1220 |
11 Mar 08 |
peter |
/// columns. Sampler::training_index(size_t) is used to select |
1220 |
11 Mar 08 |
peter |
/// columns for the corresponding traing_data, and |
1220 |
11 Mar 08 |
peter |
/// Sampler::validation_index(size_t) is used to select columns |
1220 |
11 Mar 08 |
peter |
/// for the corresponding validation_data. |
1220 |
11 Mar 08 |
peter |
79 |
/// |
1220 |
11 Mar 08 |
peter |
/// In case of a KernelLookup it is a bit different. A symmetric |
1220 |
11 Mar 08 |
peter |
/// training kernel is created using |
1220 |
11 Mar 08 |
peter |
/// Sampler::training_index(size_t) to select rows and |
1220 |
11 Mar 08 |
peter |
/// columns. The validation kernel is typically not symmetric, but |
1220 |
11 Mar 08 |
peter |
/// the columns correspond to a validation sample and each row |
1220 |
11 Mar 08 |
peter |
/// corresponds to a training sample. Consequently |
1220 |
11 Mar 08 |
peter |
/// Sampler::training_index(size_t) is used to select rows, and |
1220 |
11 Mar 08 |
peter |
/// Sampler::validation_index(size_t) is used to select columns. |
1220 |
11 Mar 08 |
peter |
88 |
/// |
1220 |
11 Mar 08 |
peter |
/// @param sampler Sampler that is used to select samples. |
1220 |
11 Mar 08 |
peter |
/// @param data Data to split up in validation and training. |
1220 |
11 Mar 08 |
peter |
91 |
/// |
1220 |
11 Mar 08 |
peter |
92 |
SubsetGenerator(const Sampler& sampler, const Data& data); |
615 |
31 Aug 06 |
peter |
93 |
|
615 |
31 Aug 06 |
peter |
94 |
/// |
1220 |
11 Mar 08 |
peter |
/// @brief Create SubDataSets with feature selection |
4200 |
19 Aug 22 |
peter |
96 |
/// |
1220 |
11 Mar 08 |
peter |
/// Creates N training data sets and N validation data sets, where |
1220 |
11 Mar 08 |
peter |
/// N equals the size of \a sampler. The Sampler defines which |
1220 |
11 Mar 08 |
peter |
/// samples are included in a subset. Likewise a FeatureSelector, |
1220 |
11 Mar 08 |
peter |
/// \a fs, is used to select features. The selection is based on |
1220 |
11 Mar 08 |
peter |
/// not based on the entire dataset but solely on the training |
1220 |
11 Mar 08 |
peter |
/// dataset. Data must be one of MatrixLookup, |
1220 |
11 Mar 08 |
peter |
/// MatrixLookupWeighted, or KernelLookup. |
1220 |
11 Mar 08 |
peter |
104 |
/// |
1220 |
11 Mar 08 |
peter |
/// In case of MatrixLookup or MatrixLookupWeighted, each column |
1220 |
11 Mar 08 |
peter |
/// corresponds to a sample and the \a sampler is used to select |
1220 |
11 Mar 08 |
peter |
/// columns. Sampler::training_index(size_t) is used to select |
1220 |
11 Mar 08 |
peter |
/// columns for the corresponding traing_data, and |
1220 |
11 Mar 08 |
peter |
/// Sampler::validation_index(size_t) is used to select columns |
1220 |
11 Mar 08 |
peter |
/// for the corresponding validation_data. The FeatureSelector is |
1220 |
11 Mar 08 |
peter |
/// used to select features, i.e., to select rows to be included |
1220 |
11 Mar 08 |
peter |
/// in the subsets. |
1220 |
11 Mar 08 |
peter |
113 |
/// |
1220 |
11 Mar 08 |
peter |
/// In case of a KernelLookup it is a bit different. A symmetric |
1220 |
11 Mar 08 |
peter |
/// training kernel is created using |
1220 |
11 Mar 08 |
peter |
/// Sampler::training_index(size_t) to select rows and |
1220 |
11 Mar 08 |
peter |
/// columns. However, the created KernelLookup is not simply the |
1220 |
11 Mar 08 |
peter |
/// subkernel of \a data, but each element is recalculated using |
1220 |
11 Mar 08 |
peter |
/// the features selected by FeatureSelector \a fs. In the |
1220 |
11 Mar 08 |
peter |
/// validation kernel each column corresponds to a validation |
1220 |
11 Mar 08 |
peter |
/// sample and each row corresponds to a training |
1220 |
11 Mar 08 |
peter |
/// sample. Consequently Sampler::training_index(size_t) is used |
1220 |
11 Mar 08 |
peter |
/// to select rows, and Sampler::validation_index(size_t) is used |
1220 |
11 Mar 08 |
peter |
/// to select columns. The same set of features are used to |
1220 |
11 Mar 08 |
peter |
/// caclulate the elements as for the training kernel, i.e., |
1220 |
11 Mar 08 |
peter |
/// feature selection is based on training data. |
1220 |
11 Mar 08 |
peter |
127 |
/// |
648 |
14 Sep 06 |
peter |
/// @param sampler taking care of partioning dataset |
648 |
14 Sep 06 |
peter |
/// @param data data to be split up in validation and training. |
648 |
14 Sep 06 |
peter |
/// @param fs Object selecting features for each subset |
615 |
31 Aug 06 |
peter |
131 |
/// |
4200 |
19 Aug 22 |
peter |
132 |
SubsetGenerator(const Sampler& sampler, const Data& data, |
615 |
31 Aug 06 |
peter |
133 |
FeatureSelector& fs); |
615 |
31 Aug 06 |
peter |
134 |
|
615 |
31 Aug 06 |
peter |
135 |
/// |
613 |
30 Aug 06 |
markus |
/// Destructor |
613 |
30 Aug 06 |
markus |
137 |
/// |
615 |
31 Aug 06 |
peter |
138 |
~SubsetGenerator(); |
4200 |
19 Aug 22 |
peter |
139 |
|
615 |
31 Aug 06 |
peter |
140 |
/// |
615 |
31 Aug 06 |
peter |
/// @return number of subsets |
615 |
31 Aug 06 |
peter |
142 |
/// |
1220 |
11 Mar 08 |
peter |
143 |
size_t size(void) const; |
615 |
31 Aug 06 |
peter |
144 |
|
615 |
31 Aug 06 |
peter |
145 |
/// |
615 |
31 Aug 06 |
peter |
/// @return the target for the total set |
615 |
31 Aug 06 |
peter |
147 |
/// |
720 |
26 Dec 06 |
jari |
148 |
const Target& target(void) const; |
615 |
31 Aug 06 |
peter |
149 |
|
615 |
31 Aug 06 |
peter |
150 |
/// |
1220 |
11 Mar 08 |
peter |
/// See constructors for details on how training data are |
1220 |
11 Mar 08 |
peter |
/// generated. |
615 |
31 Aug 06 |
peter |
153 |
/// |
1220 |
11 Mar 08 |
peter |
/// @return ith training data |
1220 |
11 Mar 08 |
peter |
155 |
/// |
1220 |
11 Mar 08 |
peter |
156 |
const Data& training_data(size_t i) const; |
615 |
31 Aug 06 |
peter |
157 |
|
615 |
31 Aug 06 |
peter |
158 |
/// |
1220 |
11 Mar 08 |
peter |
/// Features that are used to create ith training data and |
1220 |
11 Mar 08 |
peter |
/// validation data. |
615 |
31 Aug 06 |
peter |
161 |
/// |
615 |
31 Aug 06 |
peter |
/// @return training features |
615 |
31 Aug 06 |
peter |
163 |
/// |
1186 |
28 Feb 08 |
peter |
164 |
const utility::Index& training_features(size_t i) const; |
615 |
31 Aug 06 |
peter |
165 |
|
615 |
31 Aug 06 |
peter |
166 |
/// |
1222 |
11 Mar 08 |
peter |
/// @return Index of samples included in ith training data. |
615 |
31 Aug 06 |
peter |
168 |
/// |
1186 |
28 Feb 08 |
peter |
169 |
const utility::Index& training_index(size_t i) const; |
615 |
31 Aug 06 |
peter |
170 |
|
615 |
31 Aug 06 |
peter |
171 |
/// |
1220 |
11 Mar 08 |
peter |
/// @return Targets of ith set of training samples |
615 |
31 Aug 06 |
peter |
173 |
/// |
1220 |
11 Mar 08 |
peter |
174 |
const Target& training_target(size_t i) const; |
615 |
31 Aug 06 |
peter |
175 |
|
615 |
31 Aug 06 |
peter |
176 |
/// |
1220 |
11 Mar 08 |
peter |
/// See constructors for details on how validation data are |
1220 |
11 Mar 08 |
peter |
/// generated. |
615 |
31 Aug 06 |
peter |
179 |
/// |
1220 |
11 Mar 08 |
peter |
/// @return ith validation data |
1220 |
11 Mar 08 |
peter |
181 |
/// |
1220 |
11 Mar 08 |
peter |
182 |
const Data& validation_data(size_t i) const; |
615 |
31 Aug 06 |
peter |
183 |
|
615 |
31 Aug 06 |
peter |
184 |
/// |
1222 |
11 Mar 08 |
peter |
/// @return Index of samples included in ith validation data. |
615 |
31 Aug 06 |
peter |
186 |
/// |
1220 |
11 Mar 08 |
peter |
187 |
const utility::Index& validation_index(size_t i) const; |
615 |
31 Aug 06 |
peter |
188 |
|
615 |
31 Aug 06 |
peter |
189 |
/// |
1220 |
11 Mar 08 |
peter |
/// @return Targets of ith set validation samples |
615 |
31 Aug 06 |
peter |
191 |
/// |
1220 |
11 Mar 08 |
peter |
192 |
const Target& validation_target(size_t i) const; |
615 |
31 Aug 06 |
peter |
193 |
|
613 |
30 Aug 06 |
markus |
194 |
private: |
1086 |
14 Feb 08 |
peter |
195 |
void build(const MatrixLookup&); |
1086 |
14 Feb 08 |
peter |
196 |
void build(const MatrixLookupWeighted&); |
1086 |
14 Feb 08 |
peter |
197 |
void build(const KernelLookup&); |
1086 |
14 Feb 08 |
peter |
198 |
|
615 |
31 Aug 06 |
peter |
199 |
SubsetGenerator(const SubsetGenerator&); |
615 |
31 Aug 06 |
peter |
200 |
const SubsetGenerator& operator=(const SubsetGenerator&) const; |
613 |
30 Aug 06 |
markus |
201 |
|
615 |
31 Aug 06 |
peter |
202 |
FeatureSelector* f_selector_; |
1134 |
23 Feb 08 |
peter |
203 |
std::vector<utility::Index > features_; |
615 |
31 Aug 06 |
peter |
204 |
const Sampler& sampler_; |
1220 |
11 Mar 08 |
peter |
205 |
std::vector<const Data*> training_data_; |
615 |
31 Aug 06 |
peter |
206 |
std::vector<Target> training_target_; |
1220 |
11 Mar 08 |
peter |
207 |
std::vector<const Data*> validation_data_; |
615 |
31 Aug 06 |
peter |
208 |
std::vector<Target> validation_target_; |
613 |
30 Aug 06 |
markus |
209 |
|
613 |
30 Aug 06 |
markus |
210 |
}; |
613 |
30 Aug 06 |
markus |
211 |
|
1072 |
12 Feb 08 |
peter |
212 |
|
1072 |
12 Feb 08 |
peter |
// templates |
1072 |
12 Feb 08 |
peter |
214 |
|
1220 |
11 Mar 08 |
peter |
215 |
template<typename Data> |
4200 |
19 Aug 22 |
peter |
216 |
SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler, |
1220 |
11 Mar 08 |
peter |
217 |
const Data& data) |
1086 |
14 Feb 08 |
peter |
218 |
: f_selector_(NULL), sampler_(sampler) |
4200 |
19 Aug 22 |
peter |
219 |
{ |
1875 |
19 Mar 09 |
peter |
220 |
YAT_ASSERT(target().size()==data.columns()); |
1072 |
12 Feb 08 |
peter |
221 |
|
1072 |
12 Feb 08 |
peter |
222 |
training_data_.reserve(sampler_.size()); |
1072 |
12 Feb 08 |
peter |
223 |
validation_data_.reserve(sampler_.size()); |
1167 |
26 Feb 08 |
peter |
224 |
build(data); |
1875 |
19 Mar 09 |
peter |
225 |
YAT_ASSERT(training_data_.size()==size()); |
1875 |
19 Mar 09 |
peter |
226 |
YAT_ASSERT(training_target_.size()==size()); |
1875 |
19 Mar 09 |
peter |
227 |
YAT_ASSERT(validation_data_.size()==size()); |
1875 |
19 Mar 09 |
peter |
228 |
YAT_ASSERT(validation_target_.size()==size()); |
1072 |
12 Feb 08 |
peter |
229 |
} |
1072 |
12 Feb 08 |
peter |
230 |
|
1072 |
12 Feb 08 |
peter |
231 |
|
1220 |
11 Mar 08 |
peter |
232 |
template<typename Data> |
4200 |
19 Aug 22 |
peter |
233 |
SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler, |
4200 |
19 Aug 22 |
peter |
234 |
const Data& data, |
1167 |
26 Feb 08 |
peter |
235 |
FeatureSelector& fs) |
1086 |
14 Feb 08 |
peter |
236 |
: f_selector_(&fs), sampler_(sampler) |
4200 |
19 Aug 22 |
peter |
237 |
{ |
1875 |
19 Mar 09 |
peter |
238 |
YAT_ASSERT(target().size()==data.columns()); |
1072 |
12 Feb 08 |
peter |
239 |
features_.reserve(size()); |
1072 |
12 Feb 08 |
peter |
240 |
training_data_.reserve(size()); |
1072 |
12 Feb 08 |
peter |
241 |
validation_data_.reserve(size()); |
1086 |
14 Feb 08 |
peter |
242 |
build(data); |
1875 |
19 Mar 09 |
peter |
243 |
YAT_ASSERT(training_data_.size()==size()); |
1875 |
19 Mar 09 |
peter |
244 |
YAT_ASSERT(training_target_.size()==size()); |
1875 |
19 Mar 09 |
peter |
245 |
YAT_ASSERT(validation_data_.size()==size()); |
1875 |
19 Mar 09 |
peter |
246 |
YAT_ASSERT(validation_target_.size()==size()); |
1072 |
12 Feb 08 |
peter |
247 |
} |
1072 |
12 Feb 08 |
peter |
248 |
|
1072 |
12 Feb 08 |
peter |
249 |
|
1220 |
11 Mar 08 |
peter |
250 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
251 |
SubsetGenerator<Data>::~SubsetGenerator() |
1072 |
12 Feb 08 |
peter |
252 |
{ |
1875 |
19 Mar 09 |
peter |
253 |
YAT_ASSERT(training_data_.size()==validation_data_.size()); |
4200 |
19 Aug 22 |
peter |
254 |
for (size_t i=0; i<training_data_.size(); i++) |
1072 |
12 Feb 08 |
peter |
255 |
delete training_data_[i]; |
4200 |
19 Aug 22 |
peter |
256 |
for (size_t i=0; i<validation_data_.size(); i++) |
1072 |
12 Feb 08 |
peter |
257 |
delete validation_data_[i]; |
1072 |
12 Feb 08 |
peter |
258 |
} |
1072 |
12 Feb 08 |
peter |
259 |
|
1072 |
12 Feb 08 |
peter |
260 |
|
1220 |
11 Mar 08 |
peter |
261 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
262 |
void SubsetGenerator<Data>::build(const MatrixLookup& ml) |
1086 |
14 Feb 08 |
peter |
263 |
{ |
1206 |
05 Mar 08 |
peter |
264 |
if (!f_selector_)// no feature selection |
1206 |
05 Mar 08 |
peter |
265 |
features_.push_back(utility::Index(ml.rows())); |
1206 |
05 Mar 08 |
peter |
266 |
|
1086 |
14 Feb 08 |
peter |
267 |
for (size_t k=0; k<size(); k++){ |
1086 |
14 Feb 08 |
peter |
268 |
training_target_.push_back(Target(target(),training_index(k))); |
1086 |
14 Feb 08 |
peter |
269 |
validation_target_.push_back(Target(target(),validation_index(k))); |
1167 |
26 Feb 08 |
peter |
270 |
if (f_selector_){ |
1167 |
26 Feb 08 |
peter |
// training data with no feature selection |
4200 |
19 Aug 22 |
peter |
272 |
const MatrixLookup* train_data_all_feat = |
1168 |
26 Feb 08 |
peter |
273 |
new MatrixLookup(ml, training_index(k), false); |
1167 |
26 Feb 08 |
peter |
// use these data to create feature selection |
1875 |
19 Mar 09 |
peter |
275 |
YAT_ASSERT(train_data_all_feat); |
1167 |
26 Feb 08 |
peter |
276 |
f_selector_->update(*train_data_all_feat, training_target(k)); |
1086 |
14 Feb 08 |
peter |
// get features |
1167 |
26 Feb 08 |
peter |
278 |
features_.push_back(f_selector_->features()); |
1875 |
19 Mar 09 |
peter |
279 |
YAT_ASSERT(train_data_all_feat); |
1167 |
26 Feb 08 |
peter |
280 |
delete train_data_all_feat; |
1167 |
26 Feb 08 |
peter |
281 |
} |
4200 |
19 Aug 22 |
peter |
282 |
|
1086 |
14 Feb 08 |
peter |
// Dynamically allocated. Must be deleted in destructor. |
4200 |
19 Aug 22 |
peter |
284 |
training_data_.push_back(new MatrixLookup(ml,features_.back(), |
1086 |
14 Feb 08 |
peter |
285 |
training_index(k))); |
4200 |
19 Aug 22 |
peter |
286 |
validation_data_.push_back(new MatrixLookup(ml,features_.back(), |
4200 |
19 Aug 22 |
peter |
287 |
validation_index(k))); |
1086 |
14 Feb 08 |
peter |
288 |
} |
1086 |
14 Feb 08 |
peter |
289 |
|
1086 |
14 Feb 08 |
peter |
290 |
} |
1086 |
14 Feb 08 |
peter |
291 |
|
1086 |
14 Feb 08 |
peter |
292 |
|
1220 |
11 Mar 08 |
peter |
293 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
294 |
void SubsetGenerator<Data>::build(const MatrixLookupWeighted& ml) |
1086 |
14 Feb 08 |
peter |
295 |
{ |
1206 |
05 Mar 08 |
peter |
296 |
if (!f_selector_)// no feature selection |
1206 |
05 Mar 08 |
peter |
297 |
features_.push_back(utility::Index(ml.rows())); |
1206 |
05 Mar 08 |
peter |
298 |
|
1273 |
10 Apr 08 |
jari |
299 |
for (unsigned long k=0; k<size(); k++){ |
1086 |
14 Feb 08 |
peter |
300 |
training_target_.push_back(Target(target(),training_index(k))); |
1086 |
14 Feb 08 |
peter |
301 |
validation_target_.push_back(Target(target(),validation_index(k))); |
1167 |
26 Feb 08 |
peter |
302 |
if (f_selector_){ |
1167 |
26 Feb 08 |
peter |
// training data with no feature selection |
4200 |
19 Aug 22 |
peter |
304 |
const MatrixLookupWeighted* train_data_all_feat = |
4200 |
19 Aug 22 |
peter |
305 |
new MatrixLookupWeighted(ml, utility::Index(ml.rows()), |
2226 |
24 Mar 10 |
peter |
306 |
training_index(k)); |
1167 |
26 Feb 08 |
peter |
// use these data to create feature selection |
1167 |
26 Feb 08 |
peter |
308 |
f_selector_->update(*train_data_all_feat, training_target(k)); |
1167 |
26 Feb 08 |
peter |
// get features |
1167 |
26 Feb 08 |
peter |
310 |
features_.push_back(f_selector_->features()); |
1167 |
26 Feb 08 |
peter |
311 |
delete train_data_all_feat; |
1167 |
26 Feb 08 |
peter |
312 |
} |
1167 |
26 Feb 08 |
peter |
313 |
|
1167 |
26 Feb 08 |
peter |
314 |
|
1086 |
14 Feb 08 |
peter |
// Dynamically allocated. Must be deleted in destructor. |
4200 |
19 Aug 22 |
peter |
316 |
training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), |
1086 |
14 Feb 08 |
peter |
317 |
training_index(k))); |
4200 |
19 Aug 22 |
peter |
318 |
validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), |
1086 |
14 Feb 08 |
peter |
319 |
validation_index(k))); |
1086 |
14 Feb 08 |
peter |
320 |
} |
1086 |
14 Feb 08 |
peter |
321 |
} |
1086 |
14 Feb 08 |
peter |
322 |
|
1220 |
11 Mar 08 |
peter |
323 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
324 |
void SubsetGenerator<Data>::build(const KernelLookup& kernel) |
1086 |
14 Feb 08 |
peter |
325 |
{ |
1273 |
10 Apr 08 |
jari |
326 |
for (unsigned long k=0; k<size(); k++){ |
1086 |
14 Feb 08 |
peter |
327 |
training_target_.push_back(Target(target(),training_index(k))); |
1086 |
14 Feb 08 |
peter |
328 |
validation_target_.push_back(Target(target(),validation_index(k))); |
1165 |
26 Feb 08 |
peter |
329 |
|
1167 |
26 Feb 08 |
peter |
330 |
if (f_selector_){ |
1167 |
26 Feb 08 |
peter |
331 |
if (kernel.weighted()){ |
1206 |
05 Mar 08 |
peter |
332 |
MatrixLookupWeighted ml = kernel.data_weighted(); |
4200 |
19 Aug 22 |
peter |
333 |
f_selector_->update(MatrixLookupWeighted(ml, |
2226 |
24 Mar 10 |
peter |
334 |
utility::Index(ml.rows()), |
4200 |
19 Aug 22 |
peter |
335 |
training_index(k)), |
1167 |
26 Feb 08 |
peter |
336 |
training_target(k)); |
1167 |
26 Feb 08 |
peter |
337 |
} |
1167 |
26 Feb 08 |
peter |
338 |
else { |
1206 |
05 Mar 08 |
peter |
339 |
MatrixLookup ml=kernel.data(); |
4200 |
19 Aug 22 |
peter |
340 |
f_selector_->update(MatrixLookup(ml,training_index(k), false), |
1167 |
26 Feb 08 |
peter |
341 |
training_target(k)); |
4200 |
19 Aug 22 |
peter |
342 |
} |
1167 |
26 Feb 08 |
peter |
343 |
features_.push_back(f_selector_->features()); |
1206 |
05 Mar 08 |
peter |
344 |
KernelLookup kl = kernel.selected(features_.back()); |
1167 |
26 Feb 08 |
peter |
// Dynamically allocated. Must be deleted in destructor. |
1206 |
05 Mar 08 |
peter |
346 |
training_data_.push_back(new KernelLookup(kl,training_index(k), |
1167 |
26 Feb 08 |
peter |
347 |
training_index(k))); |
4200 |
19 Aug 22 |
peter |
348 |
validation_data_.push_back(new KernelLookup(kl, training_index(k), |
1167 |
26 Feb 08 |
peter |
349 |
validation_index(k))); |
1086 |
14 Feb 08 |
peter |
350 |
} |
1167 |
26 Feb 08 |
peter |
351 |
else {// no feature selection |
1167 |
26 Feb 08 |
peter |
352 |
training_data_.push_back(new KernelLookup(kernel, training_index(k), |
1167 |
26 Feb 08 |
peter |
353 |
training_index(k))); |
4200 |
19 Aug 22 |
peter |
354 |
validation_data_.push_back(new KernelLookup(kernel, |
4200 |
19 Aug 22 |
peter |
355 |
training_index(k), |
1167 |
26 Feb 08 |
peter |
356 |
validation_index(k))); |
1167 |
26 Feb 08 |
peter |
357 |
} |
4200 |
19 Aug 22 |
peter |
358 |
|
1086 |
14 Feb 08 |
peter |
359 |
} |
1206 |
05 Mar 08 |
peter |
360 |
if (!f_selector_){ |
1206 |
05 Mar 08 |
peter |
361 |
if (kernel.weighted()) |
1206 |
05 Mar 08 |
peter |
362 |
features_.push_back(utility::Index(kernel.data_weighted().rows())); |
1206 |
05 Mar 08 |
peter |
363 |
else |
1206 |
05 Mar 08 |
peter |
364 |
features_.push_back(utility::Index(kernel.data().rows())); |
1206 |
05 Mar 08 |
peter |
365 |
} |
1086 |
14 Feb 08 |
peter |
366 |
} |
1086 |
14 Feb 08 |
peter |
367 |
|
1086 |
14 Feb 08 |
peter |
368 |
|
1220 |
11 Mar 08 |
peter |
369 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
370 |
size_t SubsetGenerator<Data>::size(void) const |
1072 |
12 Feb 08 |
peter |
371 |
{ |
1072 |
12 Feb 08 |
peter |
372 |
return sampler_.size(); |
1072 |
12 Feb 08 |
peter |
373 |
} |
1072 |
12 Feb 08 |
peter |
374 |
|
1072 |
12 Feb 08 |
peter |
375 |
|
1220 |
11 Mar 08 |
peter |
376 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
377 |
const Target& SubsetGenerator<Data>::target(void) const |
1072 |
12 Feb 08 |
peter |
378 |
{ |
1072 |
12 Feb 08 |
peter |
379 |
return sampler_.target(); |
1072 |
12 Feb 08 |
peter |
380 |
} |
1072 |
12 Feb 08 |
peter |
381 |
|
1072 |
12 Feb 08 |
peter |
382 |
|
1220 |
11 Mar 08 |
peter |
383 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
384 |
const Data& |
4200 |
19 Aug 22 |
peter |
385 |
SubsetGenerator<Data>::training_data(size_t i) const |
1072 |
12 Feb 08 |
peter |
386 |
{ |
1072 |
12 Feb 08 |
peter |
387 |
return *(training_data_[i]); |
1072 |
12 Feb 08 |
peter |
388 |
} |
1072 |
12 Feb 08 |
peter |
389 |
|
1072 |
12 Feb 08 |
peter |
390 |
|
1220 |
11 Mar 08 |
peter |
391 |
template<typename Data> |
1134 |
23 Feb 08 |
peter |
392 |
const utility::Index& |
1220 |
11 Mar 08 |
peter |
393 |
SubsetGenerator<Data>::training_features(size_t i) const |
1072 |
12 Feb 08 |
peter |
394 |
{ |
2210 |
05 Mar 10 |
peter |
395 |
YAT_ASSERT(features_.size()); |
1072 |
12 Feb 08 |
peter |
396 |
return f_selector_ ? features_[i] : features_[0]; |
1072 |
12 Feb 08 |
peter |
397 |
} |
1072 |
12 Feb 08 |
peter |
398 |
|
1072 |
12 Feb 08 |
peter |
399 |
|
1220 |
11 Mar 08 |
peter |
400 |
template<typename Data> |
1134 |
23 Feb 08 |
peter |
401 |
const utility::Index& |
1220 |
11 Mar 08 |
peter |
402 |
SubsetGenerator<Data>::training_index(size_t i) const |
1072 |
12 Feb 08 |
peter |
403 |
{ |
1072 |
12 Feb 08 |
peter |
404 |
return sampler_.training_index(i); |
1072 |
12 Feb 08 |
peter |
405 |
} |
1072 |
12 Feb 08 |
peter |
406 |
|
1072 |
12 Feb 08 |
peter |
407 |
|
1220 |
11 Mar 08 |
peter |
408 |
template<typename Data> |
1072 |
12 Feb 08 |
peter |
409 |
const Target& |
1220 |
11 Mar 08 |
peter |
410 |
SubsetGenerator<Data>::training_target(size_t i) const |
1072 |
12 Feb 08 |
peter |
411 |
{ |
1072 |
12 Feb 08 |
peter |
412 |
return training_target_[i]; |
1072 |
12 Feb 08 |
peter |
413 |
} |
1072 |
12 Feb 08 |
peter |
414 |
|
1072 |
12 Feb 08 |
peter |
415 |
|
1220 |
11 Mar 08 |
peter |
416 |
template<typename Data> |
1220 |
11 Mar 08 |
peter |
417 |
const Data& |
1220 |
11 Mar 08 |
peter |
418 |
SubsetGenerator<Data>::validation_data(size_t i) const |
1072 |
12 Feb 08 |
peter |
419 |
{ |
1072 |
12 Feb 08 |
peter |
420 |
return *(validation_data_[i]); |
1072 |
12 Feb 08 |
peter |
421 |
} |
1072 |
12 Feb 08 |
peter |
422 |
|
1072 |
12 Feb 08 |
peter |
423 |
|
1220 |
11 Mar 08 |
peter |
424 |
template<typename Data> |
1134 |
23 Feb 08 |
peter |
425 |
const utility::Index& |
1220 |
11 Mar 08 |
peter |
426 |
SubsetGenerator<Data>::validation_index(size_t i) const |
1072 |
12 Feb 08 |
peter |
427 |
{ |
1072 |
12 Feb 08 |
peter |
428 |
return sampler_.validation_index(i); |
1072 |
12 Feb 08 |
peter |
429 |
} |
1072 |
12 Feb 08 |
peter |
430 |
|
1072 |
12 Feb 08 |
peter |
431 |
|
1220 |
11 Mar 08 |
peter |
432 |
template<typename Data> |
1072 |
12 Feb 08 |
peter |
433 |
const Target& |
1220 |
11 Mar 08 |
peter |
434 |
SubsetGenerator<Data>::validation_target(size_t i) const |
1072 |
12 Feb 08 |
peter |
435 |
{ |
1072 |
12 Feb 08 |
peter |
436 |
return validation_target_[i]; |
1072 |
12 Feb 08 |
peter |
437 |
} |
1072 |
12 Feb 08 |
peter |
438 |
|
680 |
11 Oct 06 |
jari |
439 |
}}} // of namespace classifier, yat, and theplu |
613 |
30 Aug 06 |
markus |
440 |
|
613 |
30 Aug 06 |
markus |
441 |
#endif |