952 |
06 Feb 09 |
jari |
// $Id$ |
952 |
06 Feb 09 |
jari |
2 |
|
952 |
06 Feb 09 |
jari |
3 |
/* |
952 |
06 Feb 09 |
jari |
Copyright (C) 2009 Jari Häkkinen |
952 |
06 Feb 09 |
jari |
5 |
|
952 |
06 Feb 09 |
jari |
This file is part of the Normalizers plug-in package for BASE |
952 |
06 Feb 09 |
jari |
(net.sf.based.normalizers). The package is available at |
952 |
06 Feb 09 |
jari |
http://baseplugins.thep.lu.se/ BASE main site is |
952 |
06 Feb 09 |
jari |
http://base.thep.lu.se/ |
952 |
06 Feb 09 |
jari |
10 |
|
952 |
06 Feb 09 |
jari |
This is free software; you can redistribute it and/or modify it |
952 |
06 Feb 09 |
jari |
under the terms of the GNU General Public License as published by |
952 |
06 Feb 09 |
jari |
the Free Software Foundation; either version 3 of the License, or |
952 |
06 Feb 09 |
jari |
(at your option) any later version. |
952 |
06 Feb 09 |
jari |
15 |
|
952 |
06 Feb 09 |
jari |
The software is distributed in the hope that it will be useful, but |
952 |
06 Feb 09 |
jari |
WITHOUT ANY WARRANTY; without even the implied warranty of |
952 |
06 Feb 09 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
952 |
06 Feb 09 |
jari |
General Public License for more details. |
952 |
06 Feb 09 |
jari |
20 |
|
952 |
06 Feb 09 |
jari |
You should have received a copy of the GNU General Public License |
952 |
06 Feb 09 |
jari |
along with this program. If not, see <http://www.gnu.org/licenses/>. |
952 |
06 Feb 09 |
jari |
23 |
*/ |
952 |
06 Feb 09 |
jari |
24 |
|
1014 |
30 Mar 09 |
jari |
25 |
#include <config.h> // this header file is created by configure |
952 |
06 Feb 09 |
jari |
26 |
|
1014 |
30 Mar 09 |
jari |
27 |
#include <yat/normalizer/ColumnNormalizer.h> |
1014 |
30 Mar 09 |
jari |
28 |
#include <yat/normalizer/qQuantileNormalizer.h> |
952 |
06 Feb 09 |
jari |
29 |
|
1014 |
30 Mar 09 |
jari |
30 |
#include <yat/utility/CommandLine.h> |
1043 |
23 Apr 09 |
jari |
31 |
#include <yat/utility/MatrixWeighted.h> |
1014 |
30 Mar 09 |
jari |
32 |
#include <yat/utility/OptionHelp.h> |
1034 |
07 Apr 09 |
jari |
33 |
#include <yat/utility/OptionInFile.h> |
1034 |
07 Apr 09 |
jari |
34 |
#include <yat/utility/OptionOutFile.h> |
1014 |
30 Mar 09 |
jari |
35 |
#include <yat/utility/OptionSwitch.h> |
1059 |
08 May 09 |
jari |
36 |
#include <yat/utility/stl_utility.h> |
1062 |
13 May 09 |
jari |
37 |
#include <yat/statistics/utility.h> |
1014 |
30 Mar 09 |
jari |
38 |
|
1014 |
30 Mar 09 |
jari |
39 |
#include <cstdlib> |
952 |
06 Feb 09 |
jari |
40 |
#include <fstream> |
1067 |
14 May 09 |
jari |
41 |
#include <functional> |
962 |
16 Feb 09 |
jari |
42 |
#include <iostream> |
1067 |
14 May 09 |
jari |
43 |
#include <limits> |
1020 |
03 Apr 09 |
jari |
44 |
#include <stdexcept> |
952 |
06 Feb 09 |
jari |
45 |
|
1020 |
03 Apr 09 |
jari |
46 |
using namespace theplu::yat::normalizer; |
1020 |
03 Apr 09 |
jari |
47 |
using namespace theplu::yat::utility; |
952 |
06 Feb 09 |
jari |
48 |
|
1050 |
06 May 09 |
jari |
49 |
|
1072 |
15 May 09 |
jari |
50 |
class CleanUpMatrix : std::unary_function<DataWeight, DataWeight> |
1067 |
14 May 09 |
jari |
51 |
{ |
1067 |
14 May 09 |
jari |
52 |
public: |
1067 |
14 May 09 |
jari |
53 |
CleanUpMatrix(void) {} |
1067 |
14 May 09 |
jari |
54 |
|
1067 |
14 May 09 |
jari |
55 |
inline DataWeight operator()(DataWeight x) const |
1067 |
14 May 09 |
jari |
56 |
{ return ( x.data()>0 ? |
1067 |
14 May 09 |
jari |
57 |
x : DataWeight(std::numeric_limits<double>::quiet_NaN(),0.0) ); } |
1067 |
14 May 09 |
jari |
58 |
}; |
1067 |
14 May 09 |
jari |
59 |
|
1067 |
14 May 09 |
jari |
60 |
|
1062 |
13 May 09 |
jari |
61 |
void create_target(std::vector<double>&, const MatrixWeighted&, bool use_median); |
1053 |
06 May 09 |
jari |
62 |
void create_target(std::vector<double>&, const MatrixWeighted&, |
1062 |
13 May 09 |
jari |
63 |
const std::string&, bool use_median); |
1050 |
06 May 09 |
jari |
64 |
|
1043 |
23 Apr 09 |
jari |
65 |
/** |
1043 |
23 Apr 09 |
jari |
writes the data values in the matrix ignoring the weights, i.e., |
1043 |
23 Apr 09 |
jari |
produces the same output as the Matrix output operator does. |
1043 |
23 Apr 09 |
jari |
68 |
*/ |
1043 |
23 Apr 09 |
jari |
69 |
std::ostream& operator<< (std::ostream&, const MatrixWeighted&); |
1020 |
03 Apr 09 |
jari |
70 |
|
1020 |
03 Apr 09 |
jari |
71 |
|
952 |
06 Feb 09 |
jari |
72 |
int main(int argc, char* argv[]) |
1020 |
03 Apr 09 |
jari |
73 |
{ |
1014 |
30 Mar 09 |
jari |
74 |
CommandLine cmd; |
1044 |
23 Apr 09 |
jari |
75 |
OptionInFile assay(cmd, "assay-data", "assay annotations"); |
1034 |
07 Apr 09 |
jari |
76 |
OptionInFile indata(cmd, "in-data", "data to be normalized"); |
1034 |
07 Apr 09 |
jari |
77 |
OptionOutFile outdata(cmd, "out-data", "normalized data"); |
1062 |
13 May 09 |
jari |
78 |
OptionSwitch target_median(cmd, "median", |
1062 |
13 May 09 |
jari |
79 |
"use median instead of mean for targets", false); |
1014 |
30 Mar 09 |
jari |
80 |
OptionHelp help(cmd); |
1014 |
30 Mar 09 |
jari |
81 |
help.synopsis()=(std::string("See ") + |
1014 |
30 Mar 09 |
jari |
82 |
"http://baseplugins.thep.lu.se/net.sf.basedb.normalizers " + |
1185 |
06 Nov 09 |
jari |
83 |
"for\ndetails on this program.\n\n" + |
1185 |
06 Nov 09 |
jari |
84 |
"All assays are used to calculate the target if no assay " + |
1185 |
06 Nov 09 |
jari |
85 |
"annotion file is set.\n" + |
1185 |
06 Nov 09 |
jari |
86 |
"The assay file format is pair information: " + |
1185 |
06 Nov 09 |
jari |
87 |
"assay_no\\tyes/no\n" + |
1185 |
06 Nov 09 |
jari |
88 |
"where assay_no is the assay number starting at 1, yes " + |
1185 |
06 Nov 09 |
jari |
89 |
"indicates that it should\n" + |
1185 |
06 Nov 09 |
jari |
90 |
"be used in target calculation, and no indicates that it " + |
1185 |
06 Nov 09 |
jari |
91 |
"should not be a part\n" + |
1185 |
06 Nov 09 |
jari |
92 |
"of target.\n"); |
1014 |
30 Mar 09 |
jari |
93 |
OptionSwitch version(cmd, "version", "output version and exit"); |
1014 |
30 Mar 09 |
jari |
94 |
std::stringstream copyright; |
1020 |
03 Apr 09 |
jari |
95 |
copyright << PACKAGE_STRING << '\n' |
1014 |
30 Mar 09 |
jari |
96 |
<< "Copyright (C) 2009 Jari Häkkinen\n\n" |
1014 |
30 Mar 09 |
jari |
97 |
<< "This is free software see the source for copying " |
1014 |
30 Mar 09 |
jari |
98 |
<< "conditions. There is NO\nwarranty; not even for " |
1014 |
30 Mar 09 |
jari |
99 |
<< "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"; |
1014 |
30 Mar 09 |
jari |
100 |
try { |
1014 |
30 Mar 09 |
jari |
101 |
cmd.parse(argc, argv); |
1014 |
30 Mar 09 |
jari |
102 |
} |
1014 |
30 Mar 09 |
jari |
103 |
catch (cmd_error& e) { |
1014 |
30 Mar 09 |
jari |
104 |
if (version.present()) { |
1014 |
30 Mar 09 |
jari |
105 |
std::cout << copyright.str(); |
1014 |
30 Mar 09 |
jari |
106 |
return EXIT_SUCCESS; |
1014 |
30 Mar 09 |
jari |
107 |
} |
1220 |
07 Apr 10 |
jari |
108 |
std::cerr << e.what() << std::endl; |
1014 |
30 Mar 09 |
jari |
109 |
return EXIT_FAILURE; |
1020 |
03 Apr 09 |
jari |
110 |
} |
1014 |
30 Mar 09 |
jari |
111 |
if (version.present()) { |
1014 |
30 Mar 09 |
jari |
112 |
std::cout << copyright.str(); |
1014 |
30 Mar 09 |
jari |
113 |
return EXIT_SUCCESS; |
1014 |
30 Mar 09 |
jari |
114 |
} |
1023 |
06 Apr 09 |
jari |
115 |
std::ifstream* infile=NULL; |
1023 |
06 Apr 09 |
jari |
116 |
std::streambuf* cin_buffer=NULL; |
1023 |
06 Apr 09 |
jari |
117 |
if (indata.present()) { |
1023 |
06 Apr 09 |
jari |
118 |
infile=new std::ifstream(indata.value().c_str()); |
1023 |
06 Apr 09 |
jari |
119 |
cin_buffer = std::cin.rdbuf(); // save cin's input buffer |
1023 |
06 Apr 09 |
jari |
120 |
std::cin.rdbuf(infile->rdbuf()); |
1023 |
06 Apr 09 |
jari |
121 |
} |
1164 |
23 Sep 09 |
jari |
122 |
|
1224 |
03 May 10 |
jari |
123 |
MatrixWeighted m(std::cin,'\t'); |
1023 |
06 Apr 09 |
jari |
124 |
if (indata.present()) { |
1023 |
06 Apr 09 |
jari |
125 |
std::cin.rdbuf(cin_buffer); // restore old input buffer |
1023 |
06 Apr 09 |
jari |
126 |
infile->close(); |
1023 |
06 Apr 09 |
jari |
127 |
delete infile; |
1023 |
06 Apr 09 |
jari |
128 |
} |
1023 |
06 Apr 09 |
jari |
129 |
|
1224 |
03 May 10 |
jari |
130 |
std::transform(m.begin(), m.end(), m.begin(), CleanUpMatrix()); |
1047 |
27 Apr 09 |
jari |
131 |
std::vector<double> target; |
1062 |
13 May 09 |
jari |
132 |
( assay.present() ? |
1224 |
03 May 10 |
jari |
133 |
create_target(target, m, assay.value(), target_median.value()) : |
1224 |
03 May 10 |
jari |
134 |
create_target(target, m, target_median.value()) ); |
1059 |
08 May 09 |
jari |
135 |
std::transform(target.begin(), target.end(), |
1059 |
08 May 09 |
jari |
136 |
target.begin(), theplu::yat::utility::Log<double>()); |
1224 |
03 May 10 |
jari |
137 |
std::transform(data_iterator(m.begin()), data_iterator(m.end()), |
1224 |
03 May 10 |
jari |
138 |
data_iterator(m.begin()), theplu::yat::utility::Log<double>()); |
1073 |
15 May 09 |
jari |
// q = min(100,target_size/10) but no smaller than 10 |
1073 |
15 May 09 |
jari |
140 |
unsigned int q=target.size()/10; |
1073 |
15 May 09 |
jari |
141 |
q = q>100 ? 100 : ( q<10 ? 10 : q ); |
1073 |
15 May 09 |
jari |
142 |
qQuantileNormalizer qqn(target.begin(), target.end(), q); |
1020 |
03 Apr 09 |
jari |
143 |
ColumnNormalizer<qQuantileNormalizer> cn(qqn); |
1224 |
03 May 10 |
jari |
144 |
MatrixWeighted result(m.rows(),m.columns()); |
1224 |
03 May 10 |
jari |
145 |
cn(m,result); |
1059 |
08 May 09 |
jari |
146 |
std::transform(data_iterator(result.begin()), data_iterator(result.end()), |
1059 |
08 May 09 |
jari |
147 |
data_iterator(result.begin()), theplu::yat::utility::Exp<double>()); |
1023 |
06 Apr 09 |
jari |
148 |
|
1023 |
06 Apr 09 |
jari |
149 |
std::ofstream* outfile=NULL; |
1023 |
06 Apr 09 |
jari |
150 |
std::streambuf* cout_buffer = std::cout.rdbuf(); |
1023 |
06 Apr 09 |
jari |
151 |
if (outdata.present()) { |
1023 |
06 Apr 09 |
jari |
152 |
outfile=new std::ofstream(outdata.value().c_str()); |
1023 |
06 Apr 09 |
jari |
153 |
cout_buffer = std::cout.rdbuf(); // save cout's output buffer |
1023 |
06 Apr 09 |
jari |
154 |
std::cout.rdbuf(outfile->rdbuf()); |
1023 |
06 Apr 09 |
jari |
155 |
} |
952 |
06 Feb 09 |
jari |
156 |
std::cout << result << std::endl; |
1023 |
06 Apr 09 |
jari |
157 |
if (outdata.present()) { |
1023 |
06 Apr 09 |
jari |
158 |
std::cout.rdbuf(cout_buffer); // restore old output buffer |
1023 |
06 Apr 09 |
jari |
159 |
outfile->close(); |
1023 |
06 Apr 09 |
jari |
160 |
delete outfile; |
1023 |
06 Apr 09 |
jari |
161 |
} |
952 |
06 Feb 09 |
jari |
162 |
|
1014 |
30 Mar 09 |
jari |
163 |
return EXIT_SUCCESS; |
952 |
06 Feb 09 |
jari |
164 |
} |
1020 |
03 Apr 09 |
jari |
165 |
|
1020 |
03 Apr 09 |
jari |
166 |
|
1053 |
06 May 09 |
jari |
167 |
void create_target(std::vector<double>& t, const MatrixWeighted& m, |
1062 |
13 May 09 |
jari |
168 |
const std::string& assay, bool use_median) |
1020 |
03 Apr 09 |
jari |
169 |
{ |
1062 |
13 May 09 |
jari |
170 |
std::vector< std::vector<double> > calc_support(m.rows()); |
1062 |
13 May 09 |
jari |
171 |
for (std::vector< std::vector<double> >::iterator i=calc_support.begin(); |
1062 |
13 May 09 |
jari |
172 |
i!=calc_support.end(); ++i) |
1062 |
13 May 09 |
jari |
173 |
i->reserve(m.columns()); |
1062 |
13 May 09 |
jari |
174 |
|
1020 |
03 Apr 09 |
jari |
175 |
std::ifstream is(assay.c_str()); |
1020 |
03 Apr 09 |
jari |
176 |
std::string line; |
1020 |
03 Apr 09 |
jari |
177 |
size_t column=0; |
1020 |
03 Apr 09 |
jari |
178 |
while (getline(is, line)) { |
1020 |
03 Apr 09 |
jari |
179 |
size_t found=line.find("yes"); |
1062 |
13 May 09 |
jari |
180 |
if (found!=std::string::npos) // use this assay as a part of reference |
1020 |
03 Apr 09 |
jari |
181 |
for (size_t row=0; row<m.rows(); ++row) |
1062 |
13 May 09 |
jari |
182 |
if (m(row,column).weight()) // weight either 0 or 1 |
1062 |
13 May 09 |
jari |
183 |
calc_support[row].push_back(m(row,column).data()); |
1020 |
03 Apr 09 |
jari |
184 |
++column; |
1020 |
03 Apr 09 |
jari |
185 |
if (column>m.columns()) |
1020 |
03 Apr 09 |
jari |
186 |
throw std::runtime_error("Too many annotation columns wrt data matrix"); |
1020 |
03 Apr 09 |
jari |
187 |
} |
1062 |
13 May 09 |
jari |
188 |
|
1047 |
27 Apr 09 |
jari |
189 |
t.reserve(m.rows()); |
1062 |
13 May 09 |
jari |
190 |
for (size_t row=0; row<m.rows(); ++row) { |
1062 |
13 May 09 |
jari |
191 |
const std::vector<double>& cs=calc_support[row]; |
1062 |
13 May 09 |
jari |
192 |
if (!cs.empty()) |
1062 |
13 May 09 |
jari |
193 |
t.push_back( use_median ? |
1062 |
13 May 09 |
jari |
194 |
theplu::yat::statistics::median(cs.begin(), cs.end()) : |
1062 |
13 May 09 |
jari |
195 |
std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() ); |
1062 |
13 May 09 |
jari |
196 |
} |
1047 |
27 Apr 09 |
jari |
197 |
if (!t.size()) |
1047 |
27 Apr 09 |
jari |
198 |
throw std::runtime_error("Not a well defined reference, aborting"); |
1020 |
03 Apr 09 |
jari |
199 |
} |
1020 |
03 Apr 09 |
jari |
200 |
|
1020 |
03 Apr 09 |
jari |
201 |
|
1062 |
13 May 09 |
jari |
202 |
void create_target(std::vector<double>& t, const MatrixWeighted& m, |
1062 |
13 May 09 |
jari |
203 |
bool use_median) |
1020 |
03 Apr 09 |
jari |
204 |
{ |
1047 |
27 Apr 09 |
jari |
205 |
t.reserve(m.rows()); |
1062 |
13 May 09 |
jari |
206 |
std::vector<double> cs; |
1062 |
13 May 09 |
jari |
207 |
cs.reserve(m.columns()); |
1020 |
03 Apr 09 |
jari |
208 |
for (size_t row=0; row<m.rows(); ++row) { |
1062 |
13 May 09 |
jari |
209 |
cs.clear(); |
1020 |
03 Apr 09 |
jari |
210 |
for (size_t column=0; column<m.columns(); ++column) |
1062 |
13 May 09 |
jari |
211 |
if (m(row,column).weight()) // weight either 0 or 1 |
1062 |
13 May 09 |
jari |
212 |
cs.push_back(m(row,column).data()); |
1062 |
13 May 09 |
jari |
213 |
if (!cs.empty()) |
1062 |
13 May 09 |
jari |
214 |
t.push_back( use_median ? |
1062 |
13 May 09 |
jari |
215 |
theplu::yat::statistics::median(cs.begin(), cs.end()) : |
1062 |
13 May 09 |
jari |
216 |
std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() ); |
1020 |
03 Apr 09 |
jari |
217 |
} |
1047 |
27 Apr 09 |
jari |
218 |
if (!t.size()) |
1047 |
27 Apr 09 |
jari |
219 |
throw std::runtime_error("Not a well defined reference, aborting"); |
1020 |
03 Apr 09 |
jari |
220 |
} |
1043 |
23 Apr 09 |
jari |
221 |
|
1043 |
23 Apr 09 |
jari |
222 |
|
1043 |
23 Apr 09 |
jari |
223 |
std::ostream& operator<< (std::ostream& s, const MatrixWeighted& m) |
1043 |
23 Apr 09 |
jari |
224 |
{ |
1043 |
23 Apr 09 |
jari |
225 |
s.setf(std::ios::dec); |
1043 |
23 Apr 09 |
jari |
226 |
s.precision(12); |
1043 |
23 Apr 09 |
jari |
227 |
for(size_t i=0, j=0; i<m.rows(); i++) |
1043 |
23 Apr 09 |
jari |
228 |
for (j=0; j<m.columns(); j++) { |
1052 |
06 May 09 |
jari |
229 |
s << m(i,j).data(); |
1043 |
23 Apr 09 |
jari |
230 |
if (j<m.columns()-1) |
1043 |
23 Apr 09 |
jari |
231 |
s << s.fill(); |
1043 |
23 Apr 09 |
jari |
232 |
else if (i<m.rows()-1) |
1043 |
23 Apr 09 |
jari |
233 |
s << "\n"; |
1043 |
23 Apr 09 |
jari |
234 |
} |
1043 |
23 Apr 09 |
jari |
235 |
return s; |
1043 |
23 Apr 09 |
jari |
236 |
} |