195 |
07 Nov 06 |
enell |
1 |
/* |
195 |
07 Nov 06 |
enell |
* Created on 21-Sep-2004 |
195 |
07 Nov 06 |
enell |
3 |
* |
195 |
07 Nov 06 |
enell |
* Main.java is a part of GoMiner |
195 |
07 Nov 06 |
enell |
* Copyright (C) 2004 Johan Enell, Dept Oncology, Lund University |
195 |
07 Nov 06 |
enell |
6 |
* |
195 |
07 Nov 06 |
enell |
* This program is free software; you can redistribute it and/or |
195 |
07 Nov 06 |
enell |
* modify it under the terms of the GNU General Public License |
195 |
07 Nov 06 |
enell |
* as published by the Free Software Foundation; either version 2 |
195 |
07 Nov 06 |
enell |
* of the License, or (at your option) any later version. |
195 |
07 Nov 06 |
enell |
11 |
* |
195 |
07 Nov 06 |
enell |
* This program is distributed in the hope that it will be useful, |
195 |
07 Nov 06 |
enell |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
195 |
07 Nov 06 |
enell |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
195 |
07 Nov 06 |
enell |
* GNU General Public License for more details. |
195 |
07 Nov 06 |
enell |
16 |
* |
195 |
07 Nov 06 |
enell |
* You should have received a copy of the GNU General Public License |
195 |
07 Nov 06 |
enell |
* along with this program; if not, write toX the Free Software |
195 |
07 Nov 06 |
enell |
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
195 |
07 Nov 06 |
enell |
20 |
*/ |
195 |
07 Nov 06 |
enell |
21 |
package net.sf.basedb.plugin.gominer; |
195 |
07 Nov 06 |
enell |
22 |
|
195 |
07 Nov 06 |
enell |
23 |
import basefile.BASEFileReader; |
195 |
07 Nov 06 |
enell |
24 |
import basefile.BASEFileSection; |
195 |
07 Nov 06 |
enell |
25 |
import basefile.BadFormatException; |
195 |
07 Nov 06 |
enell |
26 |
import basefile.BadSectionException; |
195 |
07 Nov 06 |
enell |
27 |
|
195 |
07 Nov 06 |
enell |
28 |
import java.io.File; |
195 |
07 Nov 06 |
enell |
29 |
import java.io.FileNotFoundException; |
195 |
07 Nov 06 |
enell |
30 |
import java.io.FileOutputStream; |
195 |
07 Nov 06 |
enell |
31 |
import java.io.IOException; |
195 |
07 Nov 06 |
enell |
32 |
import java.io.PrintStream; |
195 |
07 Nov 06 |
enell |
33 |
import java.util.ArrayList; |
195 |
07 Nov 06 |
enell |
34 |
import java.util.HashMap; |
195 |
07 Nov 06 |
enell |
35 |
import java.util.HashSet; |
195 |
07 Nov 06 |
enell |
36 |
import java.util.Iterator; |
195 |
07 Nov 06 |
enell |
37 |
import java.util.List; |
195 |
07 Nov 06 |
enell |
38 |
|
195 |
07 Nov 06 |
enell |
39 |
/** |
195 |
07 Nov 06 |
enell |
40 |
* |
195 |
07 Nov 06 |
enell |
* @author Johan Enell, johan.enell@onk.lu.se, Dept Oncology, Lund University, S-221 85 Lund, Sweden |
195 |
07 Nov 06 |
enell |
42 |
*/ |
195 |
07 Nov 06 |
enell |
43 |
public class GoMinerExport |
195 |
07 Nov 06 |
enell |
44 |
{ |
195 |
07 Nov 06 |
enell |
45 |
|
195 |
07 Nov 06 |
enell |
46 |
private HashMap<String, PrintStream> assayFiles = new HashMap<String, PrintStream>(); |
195 |
07 Nov 06 |
enell |
47 |
|
195 |
07 Nov 06 |
enell |
48 |
private Float param_threshold; |
195 |
07 Nov 06 |
enell |
49 |
|
195 |
07 Nov 06 |
enell |
50 |
private HashSet<String> geneList; |
195 |
07 Nov 06 |
enell |
51 |
|
195 |
07 Nov 06 |
enell |
52 |
public GoMinerExport(BASEFileReader bfr) throws IOException, BadFormatException, BadSectionException |
195 |
07 Nov 06 |
enell |
53 |
{ |
195 |
07 Nov 06 |
enell |
54 |
BASEFileSection section = bfr.readSection(true); |
195 |
07 Nov 06 |
enell |
55 |
while (section != null) |
195 |
07 Nov 06 |
enell |
56 |
{ |
195 |
07 Nov 06 |
enell |
57 |
if (section.isType("settings")) |
195 |
07 Nov 06 |
enell |
58 |
{ |
195 |
07 Nov 06 |
enell |
59 |
extractSettings(section); |
195 |
07 Nov 06 |
enell |
60 |
} |
195 |
07 Nov 06 |
enell |
61 |
else if (section.isType("assays")) |
195 |
07 Nov 06 |
enell |
62 |
{ |
195 |
07 Nov 06 |
enell |
63 |
extractAssays(section, bfr); |
195 |
07 Nov 06 |
enell |
64 |
} |
195 |
07 Nov 06 |
enell |
65 |
else if (section.isType("spots")) |
195 |
07 Nov 06 |
enell |
66 |
{ |
195 |
07 Nov 06 |
enell |
67 |
extractSpots(section, bfr); |
195 |
07 Nov 06 |
enell |
68 |
} |
195 |
07 Nov 06 |
enell |
69 |
section = bfr.readSection(); |
195 |
07 Nov 06 |
enell |
70 |
} |
195 |
07 Nov 06 |
enell |
71 |
|
195 |
07 Nov 06 |
enell |
72 |
PrintStream geneList_out = new PrintStream(new FileOutputStream("genelist.txt"), true); |
195 |
07 Nov 06 |
enell |
73 |
|
195 |
07 Nov 06 |
enell |
74 |
Iterator it = geneList.iterator(); |
195 |
07 Nov 06 |
enell |
75 |
for (String gene = (String) it.next(); it.hasNext(); gene = (String) it.next()) |
195 |
07 Nov 06 |
enell |
76 |
{ |
195 |
07 Nov 06 |
enell |
77 |
geneList_out.println(gene); |
195 |
07 Nov 06 |
enell |
78 |
} |
195 |
07 Nov 06 |
enell |
79 |
} |
195 |
07 Nov 06 |
enell |
80 |
|
195 |
07 Nov 06 |
enell |
81 |
/** |
195 |
07 Nov 06 |
enell |
* @param section |
195 |
07 Nov 06 |
enell |
* @param bfr |
195 |
07 Nov 06 |
enell |
84 |
*/ |
195 |
07 Nov 06 |
enell |
85 |
private void extractSpots(BASEFileSection section, BASEFileReader reader) |
195 |
07 Nov 06 |
enell |
86 |
{ |
195 |
07 Nov 06 |
enell |
87 |
String[] assays = null; |
195 |
07 Nov 06 |
enell |
88 |
|
195 |
07 Nov 06 |
enell |
89 |
List<String> columns = section.findFieldList("columns"); |
195 |
07 Nov 06 |
enell |
90 |
List<String> assayFields = section.findFieldList("assayFields"); |
195 |
07 Nov 06 |
enell |
91 |
|
195 |
07 Nov 06 |
enell |
92 |
HashMap<String, List<Float>> geneRatios = new HashMap<String, List<Float>>(); |
195 |
07 Nov 06 |
enell |
93 |
|
195 |
07 Nov 06 |
enell |
94 |
assays = section.findStringOpts("assays"); |
195 |
07 Nov 06 |
enell |
95 |
PrintStream file = assayFiles.get(assays[0]); |
195 |
07 Nov 06 |
enell |
96 |
|
195 |
07 Nov 06 |
enell |
97 |
int symCol = columns.indexOf("geneSymbol"); |
195 |
07 Nov 06 |
enell |
98 |
int dataCol = columns.indexOf("assayData"); |
195 |
07 Nov 06 |
enell |
99 |
int ratioCol = assayFields.indexOf("l2ratio1_2") + dataCol; |
195 |
07 Nov 06 |
enell |
100 |
|
195 |
07 Nov 06 |
enell |
101 |
String[] vals; |
195 |
07 Nov 06 |
enell |
102 |
try |
195 |
07 Nov 06 |
enell |
103 |
{ |
195 |
07 Nov 06 |
enell |
104 |
while ((vals = reader.readDataRow()) != null) |
195 |
07 Nov 06 |
enell |
105 |
{ |
195 |
07 Nov 06 |
enell |
106 |
if (!vals[symCol].equals("")) |
195 |
07 Nov 06 |
enell |
107 |
{ |
195 |
07 Nov 06 |
enell |
108 |
if (param_threshold != null) |
195 |
07 Nov 06 |
enell |
109 |
{ |
195 |
07 Nov 06 |
enell |
110 |
if (!vals[ratioCol].equals("")) |
195 |
07 Nov 06 |
enell |
111 |
{ |
195 |
07 Nov 06 |
enell |
112 |
geneList.add(vals[symCol]); |
195 |
07 Nov 06 |
enell |
113 |
|
195 |
07 Nov 06 |
enell |
114 |
List<Float> geneR = geneRatios.get(vals[symCol]); |
195 |
07 Nov 06 |
enell |
115 |
if (geneR == null) |
195 |
07 Nov 06 |
enell |
116 |
{ |
195 |
07 Nov 06 |
enell |
117 |
geneR = new ArrayList<Float>(); |
195 |
07 Nov 06 |
enell |
118 |
geneRatios.put(vals[symCol], geneR); |
195 |
07 Nov 06 |
enell |
119 |
} |
195 |
07 Nov 06 |
enell |
120 |
geneR.add(new Float(vals[ratioCol])); |
195 |
07 Nov 06 |
enell |
121 |
} |
195 |
07 Nov 06 |
enell |
122 |
} |
195 |
07 Nov 06 |
enell |
123 |
} |
195 |
07 Nov 06 |
enell |
124 |
} |
195 |
07 Nov 06 |
enell |
125 |
|
195 |
07 Nov 06 |
enell |
126 |
for (String symbol : geneRatios.keySet()) |
195 |
07 Nov 06 |
enell |
127 |
{ |
195 |
07 Nov 06 |
enell |
128 |
float v = mean(geneRatios.get(symbol)); |
195 |
07 Nov 06 |
enell |
129 |
if (v > param_threshold) |
195 |
07 Nov 06 |
enell |
130 |
{ |
195 |
07 Nov 06 |
enell |
131 |
file.println(symbol + "\t1"); |
195 |
07 Nov 06 |
enell |
132 |
} |
195 |
07 Nov 06 |
enell |
133 |
else if (v < -1.0 * param_threshold) |
195 |
07 Nov 06 |
enell |
134 |
{ |
195 |
07 Nov 06 |
enell |
135 |
file.println(symbol + "\t-1"); |
195 |
07 Nov 06 |
enell |
136 |
} |
195 |
07 Nov 06 |
enell |
137 |
} |
195 |
07 Nov 06 |
enell |
138 |
} |
195 |
07 Nov 06 |
enell |
139 |
catch (IOException e) |
195 |
07 Nov 06 |
enell |
140 |
{ |
195 |
07 Nov 06 |
enell |
141 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
142 |
} |
195 |
07 Nov 06 |
enell |
143 |
} |
195 |
07 Nov 06 |
enell |
144 |
|
195 |
07 Nov 06 |
enell |
145 |
/** |
195 |
07 Nov 06 |
enell |
* @param section |
195 |
07 Nov 06 |
enell |
* @param bfr |
195 |
07 Nov 06 |
enell |
148 |
*/ |
195 |
07 Nov 06 |
enell |
149 |
private void extractAssays(BASEFileSection section, BASEFileReader reader) |
195 |
07 Nov 06 |
enell |
150 |
{ |
195 |
07 Nov 06 |
enell |
151 |
String[] vals; |
195 |
07 Nov 06 |
enell |
152 |
try |
195 |
07 Nov 06 |
enell |
153 |
{ |
195 |
07 Nov 06 |
enell |
154 |
if (param_threshold != null) |
195 |
07 Nov 06 |
enell |
155 |
{ |
195 |
07 Nov 06 |
enell |
156 |
while ((vals = reader.readDataRow()) != null) |
195 |
07 Nov 06 |
enell |
157 |
{ |
195 |
07 Nov 06 |
enell |
158 |
assayFiles.put(vals[0], new PrintStream(new FileOutputStream(vals[1] + ".txt"), true)); |
195 |
07 Nov 06 |
enell |
159 |
} |
195 |
07 Nov 06 |
enell |
160 |
} |
195 |
07 Nov 06 |
enell |
161 |
geneList = new HashSet<String>(); |
195 |
07 Nov 06 |
enell |
162 |
} |
195 |
07 Nov 06 |
enell |
163 |
catch (IOException e) |
195 |
07 Nov 06 |
enell |
164 |
{ |
195 |
07 Nov 06 |
enell |
165 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
166 |
System.exit(-1); |
195 |
07 Nov 06 |
enell |
167 |
} |
195 |
07 Nov 06 |
enell |
168 |
} |
195 |
07 Nov 06 |
enell |
169 |
|
195 |
07 Nov 06 |
enell |
170 |
/** |
195 |
07 Nov 06 |
enell |
* @param section |
195 |
07 Nov 06 |
enell |
* @throws MissingOptionException |
195 |
07 Nov 06 |
enell |
* @throws NumberFormatException |
195 |
07 Nov 06 |
enell |
174 |
*/ |
195 |
07 Nov 06 |
enell |
175 |
private void extractSettings(BASEFileSection section) |
195 |
07 Nov 06 |
enell |
176 |
{ |
195 |
07 Nov 06 |
enell |
177 |
try |
195 |
07 Nov 06 |
enell |
178 |
{ |
195 |
07 Nov 06 |
enell |
179 |
String threshold = section.findStringOpt("threshold"); |
195 |
07 Nov 06 |
enell |
180 |
if (threshold.equals("")) |
195 |
07 Nov 06 |
enell |
181 |
{ |
195 |
07 Nov 06 |
enell |
182 |
param_threshold = null; |
195 |
07 Nov 06 |
enell |
183 |
} |
195 |
07 Nov 06 |
enell |
184 |
else |
195 |
07 Nov 06 |
enell |
185 |
{ |
195 |
07 Nov 06 |
enell |
186 |
param_threshold = new Float(threshold); |
195 |
07 Nov 06 |
enell |
187 |
} |
195 |
07 Nov 06 |
enell |
188 |
} |
195 |
07 Nov 06 |
enell |
189 |
catch (Exception e) |
195 |
07 Nov 06 |
enell |
190 |
{ |
195 |
07 Nov 06 |
enell |
191 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
192 |
System.exit(-1); |
195 |
07 Nov 06 |
enell |
193 |
} |
195 |
07 Nov 06 |
enell |
194 |
} |
195 |
07 Nov 06 |
enell |
195 |
|
195 |
07 Nov 06 |
enell |
196 |
public static void main(String[] args) |
195 |
07 Nov 06 |
enell |
197 |
{ |
195 |
07 Nov 06 |
enell |
198 |
try |
195 |
07 Nov 06 |
enell |
199 |
{ |
195 |
07 Nov 06 |
enell |
200 |
BASEFileReader bfr = null; |
195 |
07 Nov 06 |
enell |
201 |
if (args.length == 1) |
195 |
07 Nov 06 |
enell |
202 |
{ |
195 |
07 Nov 06 |
enell |
203 |
bfr = new BASEFileReader(new File(args[0])); |
195 |
07 Nov 06 |
enell |
204 |
} |
195 |
07 Nov 06 |
enell |
205 |
else |
195 |
07 Nov 06 |
enell |
206 |
{ |
195 |
07 Nov 06 |
enell |
207 |
bfr = new BASEFileReader(new File("stdin.txt")); |
195 |
07 Nov 06 |
enell |
208 |
} |
195 |
07 Nov 06 |
enell |
209 |
|
195 |
07 Nov 06 |
enell |
210 |
new GoMinerExport(bfr); |
195 |
07 Nov 06 |
enell |
211 |
} |
195 |
07 Nov 06 |
enell |
212 |
catch (OutOfMemoryError e) |
195 |
07 Nov 06 |
enell |
213 |
{ |
195 |
07 Nov 06 |
enell |
214 |
System.err.println("No more memory in the java virtual machine. Try to start the application with the flag '-Xmx256m' or '-Xmx512m'. If you need even more memory, duplicate the digit"); |
195 |
07 Nov 06 |
enell |
215 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
216 |
System.exit(-1); |
195 |
07 Nov 06 |
enell |
217 |
} |
195 |
07 Nov 06 |
enell |
218 |
catch (FileNotFoundException e) |
195 |
07 Nov 06 |
enell |
219 |
{ |
195 |
07 Nov 06 |
enell |
220 |
System.out.println("GoMiner: Could not find the file " + args[0]); |
195 |
07 Nov 06 |
enell |
221 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
222 |
} |
195 |
07 Nov 06 |
enell |
223 |
catch (BadFormatException e) |
195 |
07 Nov 06 |
enell |
224 |
{ |
195 |
07 Nov 06 |
enell |
225 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
226 |
} |
195 |
07 Nov 06 |
enell |
227 |
catch (BadSectionException e) |
195 |
07 Nov 06 |
enell |
228 |
{ |
195 |
07 Nov 06 |
enell |
229 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
230 |
} |
195 |
07 Nov 06 |
enell |
231 |
catch (IOException e) |
195 |
07 Nov 06 |
enell |
232 |
{ |
195 |
07 Nov 06 |
enell |
233 |
System.out.println("GoMiner: Error reading the file " + args[0]); |
195 |
07 Nov 06 |
enell |
234 |
e.printStackTrace(); |
195 |
07 Nov 06 |
enell |
235 |
} |
195 |
07 Nov 06 |
enell |
236 |
} |
195 |
07 Nov 06 |
enell |
237 |
|
195 |
07 Nov 06 |
enell |
238 |
private float mean(List<Float> list) |
195 |
07 Nov 06 |
enell |
239 |
{ |
195 |
07 Nov 06 |
enell |
240 |
float sum = 0; |
195 |
07 Nov 06 |
enell |
241 |
for (Float f : list) |
195 |
07 Nov 06 |
enell |
242 |
{ |
195 |
07 Nov 06 |
enell |
243 |
sum += f; |
195 |
07 Nov 06 |
enell |
244 |
} |
195 |
07 Nov 06 |
enell |
245 |
return sum / list.size(); |
195 |
07 Nov 06 |
enell |
246 |
} |
195 |
07 Nov 06 |
enell |
247 |
} |