2 |
26 Feb 07 |
jari |
1 |
/* |
2 |
26 Feb 07 |
jari |
Copyright @ 1999-2003, The Institute for Genomic Research (TIGR). |
2 |
26 Feb 07 |
jari |
All rights reserved. |
2 |
26 Feb 07 |
jari |
4 |
*/ |
2 |
26 Feb 07 |
jari |
5 |
/* |
2 |
26 Feb 07 |
jari |
* $RCSfile: AnnFileParser.java,v $ |
2 |
26 Feb 07 |
jari |
* $Revision: 1.3 $ |
2 |
26 Feb 07 |
jari |
* $Date: 2006/05/02 20:52:47 $ |
2 |
26 Feb 07 |
jari |
* $Author: eleanorahowe $ |
2 |
26 Feb 07 |
jari |
* $State: Exp $ |
2 |
26 Feb 07 |
jari |
11 |
*/ |
2 |
26 Feb 07 |
jari |
12 |
|
2 |
26 Feb 07 |
jari |
13 |
package org.tigr.microarray.file; |
2 |
26 Feb 07 |
jari |
14 |
|
2 |
26 Feb 07 |
jari |
15 |
import java.awt.Component; |
2 |
26 Feb 07 |
jari |
16 |
import java.io.BufferedReader; |
2 |
26 Feb 07 |
jari |
17 |
import java.io.IOException; |
2 |
26 Feb 07 |
jari |
18 |
import java.io.FileReader; |
2 |
26 Feb 07 |
jari |
19 |
import java.io.File; |
2 |
26 Feb 07 |
jari |
20 |
import java.util.StringTokenizer; |
2 |
26 Feb 07 |
jari |
21 |
import java.util.Vector; |
2 |
26 Feb 07 |
jari |
22 |
import javax.swing.JFileChooser; |
2 |
26 Feb 07 |
jari |
23 |
import javax.swing.filechooser.FileFilter; |
2 |
26 Feb 07 |
jari |
24 |
|
2 |
26 Feb 07 |
jari |
25 |
import org.tigr.util.StringSplitter; |
2 |
26 Feb 07 |
jari |
26 |
/** |
2 |
26 Feb 07 |
jari |
Parses and stores annotation (.ann, .dat, .txt) file data |
2 |
26 Feb 07 |
jari |
28 |
|
2 |
26 Feb 07 |
jari |
@author aisaeed |
2 |
26 Feb 07 |
jari |
@version "1.2, 3 June 2003" |
2 |
26 Feb 07 |
jari |
31 |
*/ |
2 |
26 Feb 07 |
jari |
32 |
|
2 |
26 Feb 07 |
jari |
33 |
/* |
2 |
26 Feb 07 |
jari |
To do: |
2 |
26 Feb 07 |
jari |
35 |
|
2 |
26 Feb 07 |
jari |
1. Add support for duplicate UID checking in validate(java.io.File). |
2 |
26 Feb 07 |
jari |
37 |
*/ |
2 |
26 Feb 07 |
jari |
38 |
|
2 |
26 Feb 07 |
jari |
39 |
public class AnnFileParser { |
2 |
26 Feb 07 |
jari |
40 |
|
2 |
26 Feb 07 |
jari |
41 |
public static final int INVALID_FILE = 0; |
2 |
26 Feb 07 |
jari |
42 |
public static final int ANN_FILE = 1; |
2 |
26 Feb 07 |
jari |
43 |
|
2 |
26 Feb 07 |
jari |
44 |
public static final String UNIQUE_ID_STRING = "UID"; |
2 |
26 Feb 07 |
jari |
45 |
|
2 |
26 Feb 07 |
jari |
46 |
private Vector columnHeaders; |
2 |
26 Feb 07 |
jari |
47 |
private Vector rawLines; |
2 |
26 Feb 07 |
jari |
48 |
private IntVector dataLinesMap; |
2 |
26 Feb 07 |
jari |
49 |
|
2 |
26 Feb 07 |
jari |
50 |
private boolean annFileLoaded; |
2 |
26 Feb 07 |
jari |
51 |
|
2 |
26 Feb 07 |
jari |
52 |
|
2 |
26 Feb 07 |
jari |
53 |
/** |
2 |
26 Feb 07 |
jari |
Default and sole constructor |
2 |
26 Feb 07 |
jari |
55 |
*/ |
2 |
26 Feb 07 |
jari |
56 |
public AnnFileParser() { |
2 |
26 Feb 07 |
jari |
57 |
} |
2 |
26 Feb 07 |
jari |
58 |
|
2 |
26 Feb 07 |
jari |
59 |
|
2 |
26 Feb 07 |
jari |
60 |
/** |
2 |
26 Feb 07 |
jari |
Displays a JFileChooser with an ann file filter. The default directory |
2 |
26 Feb 07 |
jari |
is <i>user.dir</i>. |
2 |
26 Feb 07 |
jari |
63 |
|
2 |
26 Feb 07 |
jari |
@param dialogParent Parent component of the JFileChooser |
2 |
26 Feb 07 |
jari |
65 |
|
2 |
26 Feb 07 |
jari |
@return The selected ann file |
2 |
26 Feb 07 |
jari |
67 |
*/ |
2 |
26 Feb 07 |
jari |
68 |
public static File selectFile(Component dialogParent) { |
2 |
26 Feb 07 |
jari |
69 |
return selectFile(new File(System.getProperty("user.dir")), dialogParent); |
2 |
26 Feb 07 |
jari |
70 |
} |
2 |
26 Feb 07 |
jari |
71 |
|
2 |
26 Feb 07 |
jari |
72 |
/** |
2 |
26 Feb 07 |
jari |
Displays a JFileChooser with an ann file filter that opens to a specified |
2 |
26 Feb 07 |
jari |
directory. |
2 |
26 Feb 07 |
jari |
75 |
|
2 |
26 Feb 07 |
jari |
@param defaultDirectory The default directory for the JFileChooser to |
2 |
26 Feb 07 |
jari |
open to |
2 |
26 Feb 07 |
jari |
78 |
|
2 |
26 Feb 07 |
jari |
@param dialogParent Parent component of the JFileChooser |
2 |
26 Feb 07 |
jari |
80 |
|
2 |
26 Feb 07 |
jari |
@return The selected ann file |
2 |
26 Feb 07 |
jari |
82 |
*/ |
2 |
26 Feb 07 |
jari |
83 |
public static File selectFile(File defaultDirectory, Component dialogParent) { |
2 |
26 Feb 07 |
jari |
84 |
|
2 |
26 Feb 07 |
jari |
85 |
JFileChooser chooser = new JFileChooser(System.getProperty("user.dir")); |
2 |
26 Feb 07 |
jari |
86 |
chooser.setDialogTitle("Select an annotation file"); |
2 |
26 Feb 07 |
jari |
87 |
chooser.setCurrentDirectory(defaultDirectory); |
2 |
26 Feb 07 |
jari |
88 |
chooser.setMultiSelectionEnabled(false); |
2 |
26 Feb 07 |
jari |
89 |
chooser.addChoosableFileFilter(new FileFilter() { |
2 |
26 Feb 07 |
jari |
90 |
public boolean accept(File f) { |
2 |
26 Feb 07 |
jari |
91 |
String extension = ""; |
2 |
26 Feb 07 |
jari |
92 |
if (f.isDirectory()) return true; |
2 |
26 Feb 07 |
jari |
93 |
|
2 |
26 Feb 07 |
jari |
94 |
if (f.getName().endsWith(".ann")) return true; |
2 |
26 Feb 07 |
jari |
95 |
else if (f.getName().endsWith(".dat")) return true; |
2 |
26 Feb 07 |
jari |
96 |
else if (f.getName().endsWith(".txt")) return true; |
2 |
26 Feb 07 |
jari |
97 |
else return false; |
2 |
26 Feb 07 |
jari |
98 |
} |
2 |
26 Feb 07 |
jari |
99 |
|
2 |
26 Feb 07 |
jari |
100 |
public String getDescription() { |
2 |
26 Feb 07 |
jari |
101 |
return "MeV Annotation Files (*.ann, *.dat, *.txt)"; |
2 |
26 Feb 07 |
jari |
102 |
} |
2 |
26 Feb 07 |
jari |
103 |
|
2 |
26 Feb 07 |
jari |
104 |
}); |
2 |
26 Feb 07 |
jari |
105 |
|
2 |
26 Feb 07 |
jari |
106 |
if (chooser.showOpenDialog(dialogParent) == JFileChooser.APPROVE_OPTION) { |
2 |
26 Feb 07 |
jari |
107 |
return chooser.getSelectedFile(); |
2 |
26 Feb 07 |
jari |
108 |
} else { |
2 |
26 Feb 07 |
jari |
109 |
return null; |
2 |
26 Feb 07 |
jari |
110 |
} |
2 |
26 Feb 07 |
jari |
111 |
} |
2 |
26 Feb 07 |
jari |
112 |
|
2 |
26 Feb 07 |
jari |
113 |
/** |
2 |
26 Feb 07 |
jari |
Scans the specified file and returns filetype/validity code. |
2 |
26 Feb 07 |
jari |
115 |
|
2 |
26 Feb 07 |
jari |
<p> Duplicate UID check not yet implemented. |
2 |
26 Feb 07 |
jari |
117 |
|
2 |
26 Feb 07 |
jari |
@param targetFile The ann file to validate |
2 |
26 Feb 07 |
jari |
119 |
|
2 |
26 Feb 07 |
jari |
@throws FileFormatException |
2 |
26 Feb 07 |
jari |
121 |
|
2 |
26 Feb 07 |
jari |
@return The filetype/validity code |
2 |
26 Feb 07 |
jari |
123 |
*/ |
2 |
26 Feb 07 |
jari |
124 |
public static int validate(File targetFile) { |
2 |
26 Feb 07 |
jari |
125 |
|
2 |
26 Feb 07 |
jari |
126 |
IntVector dataLinesMap = new IntVector(); |
2 |
26 Feb 07 |
jari |
127 |
Vector rawLines = new Vector(); |
2 |
26 Feb 07 |
jari |
128 |
Vector columnHeaders = new Vector(); |
2 |
26 Feb 07 |
jari |
129 |
|
2 |
26 Feb 07 |
jari |
130 |
String currentLine = new String(); |
2 |
26 Feb 07 |
jari |
131 |
BufferedReader reader = null; |
2 |
26 Feb 07 |
jari |
132 |
boolean readHeaders = false; |
2 |
26 Feb 07 |
jari |
133 |
|
2 |
26 Feb 07 |
jari |
134 |
boolean valid1 = false; // Has a header containing UNIQUE_ID_STRING |
2 |
26 Feb 07 |
jari |
135 |
boolean valid2 = true; // No duplicate header fields |
2 |
26 Feb 07 |
jari |
136 |
boolean valid3 = false; // Dataset contains at least one row |
2 |
26 Feb 07 |
jari |
//boolean valid4 = false; // No duplicate UID values in dataset |
2 |
26 Feb 07 |
jari |
138 |
boolean valid4 = true; // Just until it's implemented... |
2 |
26 Feb 07 |
jari |
139 |
|
2 |
26 Feb 07 |
jari |
140 |
try { |
2 |
26 Feb 07 |
jari |
141 |
reader = new BufferedReader(new FileReader(targetFile)); |
2 |
26 Feb 07 |
jari |
142 |
for (int lineCount = 0; ((currentLine = reader.readLine()) != null); lineCount++) { |
2 |
26 Feb 07 |
jari |
143 |
rawLines.add(currentLine); |
2 |
26 Feb 07 |
jari |
144 |
if (! ( currentLine.startsWith("#") || currentLine.startsWith("\"#") ) ) { // Non-comment line |
2 |
26 Feb 07 |
jari |
145 |
if (! readHeaders) { // Read/load the column headers |
2 |
26 Feb 07 |
jari |
146 |
readHeaders = true; |
2 |
26 Feb 07 |
jari |
147 |
StringTokenizer st = new StringTokenizer(currentLine, "\t"); |
2 |
26 Feb 07 |
jari |
148 |
while (st.hasMoreTokens()) { |
2 |
26 Feb 07 |
jari |
149 |
String token = st.nextToken(); |
2 |
26 Feb 07 |
jari |
150 |
|
2 |
26 Feb 07 |
jari |
151 |
if (token.equals(MevFileParser.UNIQUE_ID_STRING)) { // Validity test 1 |
2 |
26 Feb 07 |
jari |
152 |
valid1 = true; |
2 |
26 Feb 07 |
jari |
153 |
} |
2 |
26 Feb 07 |
jari |
154 |
|
2 |
26 Feb 07 |
jari |
155 |
for (int i = 0; i < columnHeaders.size(); i++) { // Validity test 2 |
2 |
26 Feb 07 |
jari |
156 |
String headerValue = (String) columnHeaders.elementAt(i); |
2 |
26 Feb 07 |
jari |
157 |
if (token.equals(headerValue)) { |
2 |
26 Feb 07 |
jari |
158 |
valid2 = false; |
2 |
26 Feb 07 |
jari |
159 |
return MevFileParser.INVALID_FILE; |
2 |
26 Feb 07 |
jari |
160 |
} |
2 |
26 Feb 07 |
jari |
161 |
} |
2 |
26 Feb 07 |
jari |
162 |
|
2 |
26 Feb 07 |
jari |
163 |
columnHeaders.add(token); |
2 |
26 Feb 07 |
jari |
164 |
} |
2 |
26 Feb 07 |
jari |
165 |
|
2 |
26 Feb 07 |
jari |
166 |
} else { |
2 |
26 Feb 07 |
jari |
167 |
dataLinesMap.add(lineCount); |
2 |
26 Feb 07 |
jari |
168 |
} |
2 |
26 Feb 07 |
jari |
169 |
} |
2 |
26 Feb 07 |
jari |
170 |
} |
2 |
26 Feb 07 |
jari |
171 |
|
2 |
26 Feb 07 |
jari |
172 |
if (dataLinesMap.size() > 0) { // Validity test 3 |
2 |
26 Feb 07 |
jari |
173 |
valid3 = true; |
2 |
26 Feb 07 |
jari |
174 |
} |
2 |
26 Feb 07 |
jari |
175 |
|
2 |
26 Feb 07 |
jari |
176 |
} catch (IOException ioe) { |
2 |
26 Feb 07 |
jari |
177 |
return MevFileParser.INVALID_FILE; |
2 |
26 Feb 07 |
jari |
178 |
} |
2 |
26 Feb 07 |
jari |
179 |
|
2 |
26 Feb 07 |
jari |
180 |
if (valid1 && valid2 && valid3 && valid4) { |
2 |
26 Feb 07 |
jari |
181 |
return MevFileParser.MEV_FILE; |
2 |
26 Feb 07 |
jari |
182 |
} else { |
2 |
26 Feb 07 |
jari |
183 |
return MevFileParser.INVALID_FILE; |
2 |
26 Feb 07 |
jari |
184 |
} |
2 |
26 Feb 07 |
jari |
185 |
} |
2 |
26 Feb 07 |
jari |
186 |
|
2 |
26 Feb 07 |
jari |
187 |
/** |
2 |
26 Feb 07 |
jari |
Reads the specified ann file, then instantiates and populates the |
2 |
26 Feb 07 |
jari |
appropriate data objects. <code>isAnnFileLoaded</code> will return true |
2 |
26 Feb 07 |
jari |
if this method was successful in loading the ann file. |
2 |
26 Feb 07 |
jari |
191 |
|
2 |
26 Feb 07 |
jari |
@param targetFile The ann file to load |
2 |
26 Feb 07 |
jari |
193 |
*/ |
2 |
26 Feb 07 |
jari |
194 |
public void loadFile(File targetFile) { |
2 |
26 Feb 07 |
jari |
195 |
|
2 |
26 Feb 07 |
jari |
196 |
dataLinesMap = new IntVector(); |
2 |
26 Feb 07 |
jari |
197 |
rawLines = new Vector(); |
2 |
26 Feb 07 |
jari |
198 |
columnHeaders = new Vector(); |
2 |
26 Feb 07 |
jari |
199 |
|
2 |
26 Feb 07 |
jari |
200 |
String currentLine = new String(); |
2 |
26 Feb 07 |
jari |
201 |
BufferedReader reader = null; |
2 |
26 Feb 07 |
jari |
202 |
boolean readHeaders = false; |
2 |
26 Feb 07 |
jari |
203 |
|
2 |
26 Feb 07 |
jari |
204 |
try { |
2 |
26 Feb 07 |
jari |
205 |
reader = new BufferedReader(new FileReader(targetFile)); |
2 |
26 Feb 07 |
jari |
206 |
for (int lineCount = 0; ((currentLine = reader.readLine()) != null); lineCount++) { |
2 |
26 Feb 07 |
jari |
207 |
rawLines.add(currentLine); |
2 |
26 Feb 07 |
jari |
208 |
if (! ( currentLine.startsWith("#") || currentLine.startsWith("\"#") )) { // Non-comment line |
2 |
26 Feb 07 |
jari |
209 |
if (! readHeaders) { // Read/load the column headers |
2 |
26 Feb 07 |
jari |
210 |
readHeaders = true; |
2 |
26 Feb 07 |
jari |
211 |
StringTokenizer st = new StringTokenizer(currentLine, "\t"); |
2 |
26 Feb 07 |
jari |
212 |
while (st.hasMoreTokens()) { |
2 |
26 Feb 07 |
jari |
213 |
columnHeaders.add(st.nextToken()); |
2 |
26 Feb 07 |
jari |
214 |
} |
2 |
26 Feb 07 |
jari |
215 |
} else { |
2 |
26 Feb 07 |
jari |
216 |
dataLinesMap.add(lineCount); |
2 |
26 Feb 07 |
jari |
217 |
} |
2 |
26 Feb 07 |
jari |
218 |
} |
2 |
26 Feb 07 |
jari |
219 |
} |
2 |
26 Feb 07 |
jari |
220 |
|
2 |
26 Feb 07 |
jari |
221 |
} catch (IOException ioe) { |
2 |
26 Feb 07 |
jari |
222 |
ioe.printStackTrace(); |
2 |
26 Feb 07 |
jari |
223 |
annFileLoaded = false; |
2 |
26 Feb 07 |
jari |
224 |
return; |
2 |
26 Feb 07 |
jari |
225 |
} |
2 |
26 Feb 07 |
jari |
226 |
|
2 |
26 Feb 07 |
jari |
227 |
annFileLoaded = true; |
2 |
26 Feb 07 |
jari |
228 |
} |
2 |
26 Feb 07 |
jari |
229 |
|
2 |
26 Feb 07 |
jari |
230 |
/** |
2 |
26 Feb 07 |
jari |
Returns true if the <code>loadFile</code> method was successful. |
2 |
26 Feb 07 |
jari |
232 |
|
2 |
26 Feb 07 |
jari |
@return The file load status |
2 |
26 Feb 07 |
jari |
234 |
*/ |
2 |
26 Feb 07 |
jari |
235 |
public boolean isAnnFileLoaded() { |
2 |
26 Feb 07 |
jari |
236 |
return annFileLoaded; |
2 |
26 Feb 07 |
jari |
237 |
} |
2 |
26 Feb 07 |
jari |
238 |
|
2 |
26 Feb 07 |
jari |
239 |
/** |
2 |
26 Feb 07 |
jari |
Returns a Vector containing the required row of column headers. Each |
2 |
26 Feb 07 |
jari |
element in the Vector is one of the tab-delimited tokens from the first |
2 |
26 Feb 07 |
jari |
non-comment line in the mev file. |
2 |
26 Feb 07 |
jari |
243 |
|
2 |
26 Feb 07 |
jari |
@return The Vector of column headers |
2 |
26 Feb 07 |
jari |
245 |
*/ |
2 |
26 Feb 07 |
jari |
246 |
public Vector getColumnHeaders() { |
2 |
26 Feb 07 |
jari |
247 |
return columnHeaders; |
2 |
26 Feb 07 |
jari |
248 |
} |
2 |
26 Feb 07 |
jari |
249 |
|
2 |
26 Feb 07 |
jari |
250 |
/** |
2 |
26 Feb 07 |
jari |
Returns a Vector containing the fields in the target column. All |
2 |
26 Feb 07 |
jari |
comment lines will be ignored. |
2 |
26 Feb 07 |
jari |
253 |
|
2 |
26 Feb 07 |
jari |
@param targetColumn The index of the target column; valid values range |
2 |
26 Feb 07 |
jari |
from 0 to n-1, where n is the number of columns in the ann file. |
2 |
26 Feb 07 |
jari |
256 |
|
2 |
26 Feb 07 |
jari |
@return The Vector of column data |
2 |
26 Feb 07 |
jari |
258 |
*/ |
2 |
26 Feb 07 |
jari |
259 |
public Vector getColumnAt(int targetColumn) { |
2 |
26 Feb 07 |
jari |
260 |
return getColumnAt(targetColumn, false); |
2 |
26 Feb 07 |
jari |
261 |
} |
2 |
26 Feb 07 |
jari |
262 |
|
2 |
26 Feb 07 |
jari |
263 |
/** |
2 |
26 Feb 07 |
jari |
Returns a Vector containing the fields and an optional header in the |
2 |
26 Feb 07 |
jari |
target column. If requested, the first element of the Vector will be the |
2 |
26 Feb 07 |
jari |
column header value. All comment lines will be ignored. |
2 |
26 Feb 07 |
jari |
267 |
|
2 |
26 Feb 07 |
jari |
@param targetColumn The index of the target column; valid values range |
2 |
26 Feb 07 |
jari |
from 0 to n-1, where n is the number of columns in the ann file. |
2 |
26 Feb 07 |
jari |
270 |
|
2 |
26 Feb 07 |
jari |
@param withHeaders If true, the first element in the return Vector will |
2 |
26 Feb 07 |
jari |
be the column header for the target column. |
2 |
26 Feb 07 |
jari |
273 |
|
2 |
26 Feb 07 |
jari |
@return The Vector of column data |
2 |
26 Feb 07 |
jari |
275 |
*/ |
2 |
26 Feb 07 |
jari |
276 |
public Vector getColumnAt(int targetColumn, boolean withHeaders) { |
2 |
26 Feb 07 |
jari |
277 |
|
2 |
26 Feb 07 |
jari |
278 |
Vector columnVector = new Vector(dataLinesMap.size() + (withHeaders ? 1 : 0)); |
2 |
26 Feb 07 |
jari |
279 |
|
2 |
26 Feb 07 |
jari |
280 |
if ((targetColumn >= columnHeaders.size()) || (targetColumn < 0)) { |
2 |
26 Feb 07 |
jari |
281 |
throw new IndexOutOfBoundsException("Column Index out of bounds."); |
2 |
26 Feb 07 |
jari |
282 |
} |
2 |
26 Feb 07 |
jari |
283 |
|
2 |
26 Feb 07 |
jari |
284 |
if (withHeaders) columnVector.add(columnHeaders.elementAt(targetColumn)); |
2 |
26 Feb 07 |
jari |
285 |
|
2 |
26 Feb 07 |
jari |
286 |
for (int i = 0; i < dataLinesMap.size(); i++) { |
2 |
26 Feb 07 |
jari |
287 |
StringTokenizer st = new StringTokenizer(getElementAtIndex(i)); |
2 |
26 Feb 07 |
jari |
288 |
for (int j = 0; j < targetColumn; j++) { |
2 |
26 Feb 07 |
jari |
289 |
st.nextToken(); |
2 |
26 Feb 07 |
jari |
290 |
} |
2 |
26 Feb 07 |
jari |
291 |
|
2 |
26 Feb 07 |
jari |
292 |
columnVector.add(st.nextToken()); |
2 |
26 Feb 07 |
jari |
293 |
} |
2 |
26 Feb 07 |
jari |
294 |
|
2 |
26 Feb 07 |
jari |
295 |
return columnVector; |
2 |
26 Feb 07 |
jari |
296 |
} |
2 |
26 Feb 07 |
jari |
297 |
|
2 |
26 Feb 07 |
jari |
298 |
/** |
2 |
26 Feb 07 |
jari |
Returns a Vector containing the fields in the column which is |
2 |
26 Feb 07 |
jari |
identified by the specified column header. All comment lines will be |
2 |
26 Feb 07 |
jari |
ignored. |
2 |
26 Feb 07 |
jari |
302 |
|
2 |
26 Feb 07 |
jari |
@param columnName The column header of the target column |
2 |
26 Feb 07 |
jari |
304 |
|
2 |
26 Feb 07 |
jari |
@throws FieldNotFoundException |
2 |
26 Feb 07 |
jari |
306 |
|
2 |
26 Feb 07 |
jari |
@return The Vector of column data. If the specified column header is not |
2 |
26 Feb 07 |
jari |
found, the return Vector will be null. |
2 |
26 Feb 07 |
jari |
309 |
*/ |
2 |
26 Feb 07 |
jari |
310 |
public Vector getColumnNamed(String columnName) throws FieldNotFoundException { |
2 |
26 Feb 07 |
jari |
311 |
return getColumnNamed(columnName, false); |
2 |
26 Feb 07 |
jari |
312 |
} |
2 |
26 Feb 07 |
jari |
313 |
|
2 |
26 Feb 07 |
jari |
314 |
/** |
2 |
26 Feb 07 |
jari |
Returns a Vector containing the fields and an optional header in the |
2 |
26 Feb 07 |
jari |
column which is identified by the specified column header. If requested, |
2 |
26 Feb 07 |
jari |
the first element of the Vector will be the column header value. All |
2 |
26 Feb 07 |
jari |
comment lines will be ignored. |
2 |
26 Feb 07 |
jari |
319 |
|
2 |
26 Feb 07 |
jari |
@param columnName The column header of the target column |
2 |
26 Feb 07 |
jari |
321 |
|
2 |
26 Feb 07 |
jari |
@param withHeaders If true, the first element in the return Vector will |
2 |
26 Feb 07 |
jari |
be the column header for the target column. |
2 |
26 Feb 07 |
jari |
324 |
|
2 |
26 Feb 07 |
jari |
@throws FieldNotFoundException |
2 |
26 Feb 07 |
jari |
326 |
|
2 |
26 Feb 07 |
jari |
@return The Vector of column data. |
2 |
26 Feb 07 |
jari |
328 |
*/ |
2 |
26 Feb 07 |
jari |
329 |
public Vector getColumnNamed(String columnName, boolean withHeaders) throws FieldNotFoundException { |
2 |
26 Feb 07 |
jari |
330 |
|
2 |
26 Feb 07 |
jari |
331 |
Vector columnHeaders = getColumnHeaders(); |
2 |
26 Feb 07 |
jari |
332 |
|
2 |
26 Feb 07 |
jari |
333 |
if (columnHeaders.contains(columnName)) { |
2 |
26 Feb 07 |
jari |
334 |
return getColumnAt(columnHeaders.indexOf(columnName), withHeaders); |
2 |
26 Feb 07 |
jari |
335 |
} else { |
2 |
26 Feb 07 |
jari |
336 |
throw new FieldNotFoundException("Field " + columnName + " not found."); |
2 |
26 Feb 07 |
jari |
337 |
} |
2 |
26 Feb 07 |
jari |
338 |
} |
2 |
26 Feb 07 |
jari |
339 |
|
2 |
26 Feb 07 |
jari |
340 |
/** |
2 |
26 Feb 07 |
jari |
Returns the line from the ann file at the specified index. |
2 |
26 Feb 07 |
jari |
342 |
|
2 |
26 Feb 07 |
jari |
@param rawTargetline The index of the target line to be retrieved. |
2 |
26 Feb 07 |
jari |
344 |
|
2 |
26 Feb 07 |
jari |
@return The String containing the target line of text, as it appears in |
2 |
26 Feb 07 |
jari |
the ann file. The trailing newline character, <code>\n</code>, if |
2 |
26 Feb 07 |
jari |
present, is omitted. |
2 |
26 Feb 07 |
jari |
348 |
*/ |
2 |
26 Feb 07 |
jari |
349 |
public String getLineAt(int rawTargetLine) { |
2 |
26 Feb 07 |
jari |
350 |
return (String) rawLines.elementAt(rawTargetLine); |
2 |
26 Feb 07 |
jari |
351 |
} |
2 |
26 Feb 07 |
jari |
352 |
|
2 |
26 Feb 07 |
jari |
353 |
/** |
2 |
26 Feb 07 |
jari |
Returns the spot element line from the ann file at the specified index. |
2 |
26 Feb 07 |
jari |
The index should refer to the position of the element in the ann file, |
2 |
26 Feb 07 |
jari |
such that an index of 0 refers to the first record in the file, an |
2 |
26 Feb 07 |
jari |
index of 1 refers to the second record in the file, and so forth. The |
2 |
26 Feb 07 |
jari |
header row and all comment lines do not count towards this index. |
2 |
26 Feb 07 |
jari |
359 |
|
2 |
26 Feb 07 |
jari |
@param rawTargetline The index of the target element to be retrieved. |
2 |
26 Feb 07 |
jari |
361 |
|
2 |
26 Feb 07 |
jari |
@return The String encapsulating the target element, as it appears in |
2 |
26 Feb 07 |
jari |
the ann file. The trailing newline character, <code>\n</code>, if |
2 |
26 Feb 07 |
jari |
present, is omitted. |
2 |
26 Feb 07 |
jari |
365 |
*/ |
2 |
26 Feb 07 |
jari |
366 |
public String getElementAtIndex(int index) { |
2 |
26 Feb 07 |
jari |
367 |
return getLineAt(dataLinesMap.intElementAt(index)); |
2 |
26 Feb 07 |
jari |
368 |
} |
2 |
26 Feb 07 |
jari |
369 |
|
2 |
26 Feb 07 |
jari |
370 |
/** |
2 |
26 Feb 07 |
jari |
Returns the record from the ann file that has a <i>UID</i> that |
2 |
26 Feb 07 |
jari |
matches the specified id value. If there are multiple matches, only |
2 |
26 Feb 07 |
jari |
the first matched record will be returned. |
2 |
26 Feb 07 |
jari |
374 |
|
2 |
26 Feb 07 |
jari |
<p> Note: There should not be multiple records with the same |
2 |
26 Feb 07 |
jari |
<i>UID</i>, as defined in the annotation file format description. |
2 |
26 Feb 07 |
jari |
377 |
|
2 |
26 Feb 07 |
jari |
@param id The <i>id</i> of the target record to be retrieved. |
2 |
26 Feb 07 |
jari |
379 |
|
2 |
26 Feb 07 |
jari |
@throws FieldNotFoundException |
2 |
26 Feb 07 |
jari |
381 |
|
2 |
26 Feb 07 |
jari |
@return The String encapsulating the target record, as it appears in |
2 |
26 Feb 07 |
jari |
the ann file. The trailing newline character, <code>\n</code>, if |
2 |
26 Feb 07 |
jari |
present, is omitted. |
2 |
26 Feb 07 |
jari |
385 |
*/ |
2 |
26 Feb 07 |
jari |
386 |
public String getElementById(String id) throws FieldNotFoundException { |
2 |
26 Feb 07 |
jari |
387 |
|
2 |
26 Feb 07 |
jari |
388 |
String element = null; |
2 |
26 Feb 07 |
jari |
389 |
|
2 |
26 Feb 07 |
jari |
390 |
try { |
2 |
26 Feb 07 |
jari |
391 |
element = getElementByField(AnnFileParser.UNIQUE_ID_STRING, id); |
2 |
26 Feb 07 |
jari |
392 |
return element; |
2 |
26 Feb 07 |
jari |
393 |
} catch (FieldNotFoundException fnfe) { |
2 |
26 Feb 07 |
jari |
394 |
throw new FieldNotFoundException("Unique Identifier field (" + MevFileParser.UNIQUE_ID_STRING + ") not found."); |
2 |
26 Feb 07 |
jari |
395 |
} |
2 |
26 Feb 07 |
jari |
396 |
} |
2 |
26 Feb 07 |
jari |
397 |
|
2 |
26 Feb 07 |
jari |
398 |
/** |
2 |
26 Feb 07 |
jari |
Returns the record from the ann file that contains the specified value |
2 |
26 Feb 07 |
jari |
for the specified field. If there are multiple matches, only the first |
2 |
26 Feb 07 |
jari |
record will be returned. |
2 |
26 Feb 07 |
jari |
402 |
|
2 |
26 Feb 07 |
jari |
@param fieldName The column header that identifies the column in which |
2 |
26 Feb 07 |
jari |
to find the specified value of the target element to be retrieved. |
2 |
26 Feb 07 |
jari |
405 |
|
2 |
26 Feb 07 |
jari |
@param value The value in the specified column that identifies the |
2 |
26 Feb 07 |
jari |
target record to be retrieved. |
2 |
26 Feb 07 |
jari |
408 |
|
2 |
26 Feb 07 |
jari |
@throws FieldNotFoundException |
2 |
26 Feb 07 |
jari |
410 |
|
2 |
26 Feb 07 |
jari |
@return The String encapsulating the target element, as it appears in |
2 |
26 Feb 07 |
jari |
the ann file. The trailing newline character, <code>\n</code>, if |
2 |
26 Feb 07 |
jari |
present, is omitted. |
2 |
26 Feb 07 |
jari |
414 |
*/ |
2 |
26 Feb 07 |
jari |
415 |
public String getElementByField(String fieldName, String value) throws FieldNotFoundException { |
2 |
26 Feb 07 |
jari |
416 |
|
2 |
26 Feb 07 |
jari |
417 |
Vector targetColumn = getColumnNamed(fieldName); |
2 |
26 Feb 07 |
jari |
418 |
|
2 |
26 Feb 07 |
jari |
419 |
if (targetColumn == null) throw new FieldNotFoundException("Field " + fieldName + " not found."); |
2 |
26 Feb 07 |
jari |
420 |
|
2 |
26 Feb 07 |
jari |
421 |
for (int i = 0; i < targetColumn.size(); i++) { |
2 |
26 Feb 07 |
jari |
422 |
if (((String) targetColumn.elementAt(i)).equals(value)) { |
2 |
26 Feb 07 |
jari |
423 |
return getElementAtIndex(i); |
2 |
26 Feb 07 |
jari |
424 |
} |
2 |
26 Feb 07 |
jari |
425 |
} |
2 |
26 Feb 07 |
jari |
426 |
|
2 |
26 Feb 07 |
jari |
427 |
return null; |
2 |
26 Feb 07 |
jari |
428 |
} |
2 |
26 Feb 07 |
jari |
429 |
|
2 |
26 Feb 07 |
jari |
430 |
/** |
2 |
26 Feb 07 |
jari |
Returns a Vector of records from the ann file that contains the |
2 |
26 Feb 07 |
jari |
specified value for the specified field. |
2 |
26 Feb 07 |
jari |
433 |
|
2 |
26 Feb 07 |
jari |
@param fieldName The column header that identifies the column in which |
2 |
26 Feb 07 |
jari |
to find the specified value of the record to be retrieved. |
2 |
26 Feb 07 |
jari |
436 |
|
2 |
26 Feb 07 |
jari |
@param value The value in the specified column that identifies the |
2 |
26 Feb 07 |
jari |
target element to be retrieved. |
2 |
26 Feb 07 |
jari |
439 |
|
2 |
26 Feb 07 |
jari |
@throws FieldNotFoundException |
2 |
26 Feb 07 |
jari |
441 |
|
2 |
26 Feb 07 |
jari |
@return The String encapsulating the target element, as it appears in |
2 |
26 Feb 07 |
jari |
the ann file. The trailing newline character, <code>\n</code>, if |
2 |
26 Feb 07 |
jari |
present, is omitted. If there are no matches, the return Vector will |
2 |
26 Feb 07 |
jari |
be null. |
2 |
26 Feb 07 |
jari |
446 |
*/ |
2 |
26 Feb 07 |
jari |
447 |
public Vector getElementsByField(String fieldName, String value) throws FieldNotFoundException { |
2 |
26 Feb 07 |
jari |
448 |
|
2 |
26 Feb 07 |
jari |
449 |
Vector targetColumn = getColumnNamed(fieldName); |
2 |
26 Feb 07 |
jari |
450 |
Vector matchesVector = null; |
2 |
26 Feb 07 |
jari |
451 |
|
2 |
26 Feb 07 |
jari |
452 |
if (targetColumn == null) throw new FieldNotFoundException("Field " + fieldName + " not found."); |
2 |
26 Feb 07 |
jari |
453 |
|
2 |
26 Feb 07 |
jari |
454 |
for (int i = 0; i < targetColumn.size(); i++) { |
2 |
26 Feb 07 |
jari |
455 |
if (((String) targetColumn.elementAt(i)).equals(value)) { |
2 |
26 Feb 07 |
jari |
456 |
if (matchesVector == null) matchesVector = new Vector(); |
2 |
26 Feb 07 |
jari |
457 |
matchesVector.add(getElementAtIndex(i)); |
2 |
26 Feb 07 |
jari |
458 |
} |
2 |
26 Feb 07 |
jari |
459 |
} |
2 |
26 Feb 07 |
jari |
460 |
|
2 |
26 Feb 07 |
jari |
461 |
return matchesVector; |
2 |
26 Feb 07 |
jari |
462 |
} |
2 |
26 Feb 07 |
jari |
463 |
|
2 |
26 Feb 07 |
jari |
464 |
/** |
2 |
26 Feb 07 |
jari |
Returns a two-dimensional String array containing every value for each |
2 |
26 Feb 07 |
jari |
column header for every record in the ann file. The first dimension of |
2 |
26 Feb 07 |
jari |
the array iterates over the columns, while the second dimension iterates |
2 |
26 Feb 07 |
jari |
over the spots. All comment lines will be ignored. |
2 |
26 Feb 07 |
jari |
469 |
|
2 |
26 Feb 07 |
jari |
@return The String[][] containing all annotation data |
2 |
26 Feb 07 |
jari |
471 |
*/ |
2 |
26 Feb 07 |
jari |
472 |
public String[][] getDataMatrix() { |
2 |
26 Feb 07 |
jari |
473 |
return getDataMatrix(false); |
2 |
26 Feb 07 |
jari |
474 |
} |
2 |
26 Feb 07 |
jari |
475 |
|
2 |
26 Feb 07 |
jari |
476 |
/** |
2 |
26 Feb 07 |
jari |
Returns a two-dimensional String array containing every value for each |
2 |
26 Feb 07 |
jari |
column header for every record in the ann file. The first dimension of |
2 |
26 Feb 07 |
jari |
the array iterates over the columns, while the second dimension iterates |
2 |
26 Feb 07 |
jari |
over the spots. Optionally, the first element in the first dimension of |
2 |
26 Feb 07 |
jari |
the array can be an array of all column headers. All comment lines will |
2 |
26 Feb 07 |
jari |
be ignored. |
2 |
26 Feb 07 |
jari |
483 |
|
2 |
26 Feb 07 |
jari |
@param withHeaders If true, headers are included in the returned array |
2 |
26 Feb 07 |
jari |
485 |
|
2 |
26 Feb 07 |
jari |
@return The String[][] containing all annotation data |
2 |
26 Feb 07 |
jari |
487 |
*/ |
2 |
26 Feb 07 |
jari |
488 |
public String[][] getDataMatrix(boolean withHeaders) { |
2 |
26 Feb 07 |
jari |
489 |
|
2 |
26 Feb 07 |
jari |
490 |
Vector columnHeaders = getColumnHeaders(); |
2 |
26 Feb 07 |
jari |
491 |
int hc = withHeaders ? 1 : 0; |
2 |
26 Feb 07 |
jari |
492 |
|
2 |
26 Feb 07 |
jari |
493 |
String[][] matrix = new String[dataLinesMap.size() + hc][columnHeaders.size()]; |
2 |
26 Feb 07 |
jari |
494 |
|
2 |
26 Feb 07 |
jari |
495 |
if (withHeaders) { |
2 |
26 Feb 07 |
jari |
496 |
for (int i = 0; i < columnHeaders.size(); i++) { |
2 |
26 Feb 07 |
jari |
497 |
matrix[0][i] = (String) columnHeaders.elementAt(i); |
2 |
26 Feb 07 |
jari |
498 |
} |
2 |
26 Feb 07 |
jari |
499 |
} |
2 |
26 Feb 07 |
jari |
500 |
|
2 |
26 Feb 07 |
jari |
//jcb use StringSplitter to return empty tokens |
2 |
26 Feb 07 |
jari |
502 |
StringSplitter ss = new StringSplitter('\t'); |
2 |
26 Feb 07 |
jari |
503 |
|
2 |
26 Feb 07 |
jari |
504 |
for (int i = hc; i < matrix.length; i++) { |
2 |
26 Feb 07 |
jari |
505 |
|
2 |
26 Feb 07 |
jari |
506 |
String currentLine = getElementAtIndex(i - hc); |
2 |
26 Feb 07 |
jari |
//jcb StringTokenizer st = new StringTokenizer(currentLine, "\t"); |
2 |
26 Feb 07 |
jari |
508 |
ss.init(currentLine); |
2 |
26 Feb 07 |
jari |
509 |
|
2 |
26 Feb 07 |
jari |
510 |
for (int j = 0; j < matrix[i].length; j++) { |
2 |
26 Feb 07 |
jari |
511 |
try{ |
2 |
26 Feb 07 |
jari |
512 |
matrix[i][j] = ss.nextToken(); |
2 |
26 Feb 07 |
jari |
513 |
} catch (Exception e){ |
2 |
26 Feb 07 |
jari |
514 |
matrix[i][j] = ""; |
2 |
26 Feb 07 |
jari |
//e.printStackTrace(); |
2 |
26 Feb 07 |
jari |
516 |
} |
2 |
26 Feb 07 |
jari |
517 |
} |
2 |
26 Feb 07 |
jari |
518 |
} |
2 |
26 Feb 07 |
jari |
519 |
|
2 |
26 Feb 07 |
jari |
520 |
return matrix; |
2 |
26 Feb 07 |
jari |
521 |
} |
2 |
26 Feb 07 |
jari |
522 |
|
2 |
26 Feb 07 |
jari |
523 |
|
2 |
26 Feb 07 |
jari |
524 |
/** |
2 |
26 Feb 07 |
jari |
Returns a two-dimensional String array containing every value for each |
2 |
26 Feb 07 |
jari |
column header for every record in the ann file. The first dimension of |
2 |
26 Feb 07 |
jari |
the array iterates over the columns, while the second dimension iterates |
2 |
26 Feb 07 |
jari |
over the spots. All comment lines will be ignored. |
2 |
26 Feb 07 |
jari |
529 |
|
2 |
26 Feb 07 |
jari |
Beginning and ending quotes will be eliminated if both are present. |
2 |
26 Feb 07 |
jari |
531 |
|
2 |
26 Feb 07 |
jari |
@return The String[][] containing all annotation data |
2 |
26 Feb 07 |
jari |
533 |
*/ |
2 |
26 Feb 07 |
jari |
534 |
public String[][] getDataMatrixMinusQuotes() { |
2 |
26 Feb 07 |
jari |
535 |
return getDataMatrixMinusQuotes(false); |
2 |
26 Feb 07 |
jari |
536 |
} |
2 |
26 Feb 07 |
jari |
537 |
|
2 |
26 Feb 07 |
jari |
538 |
/** |
2 |
26 Feb 07 |
jari |
Returns a two-dimensional String array containing every value for each |
2 |
26 Feb 07 |
jari |
column header for every record in the ann file. The first dimension of |
2 |
26 Feb 07 |
jari |
the array iterates over the columns, while the second dimension iterates |
2 |
26 Feb 07 |
jari |
over the spots. Optionally, the first element in the first dimension of |
2 |
26 Feb 07 |
jari |
the array can be an array of all column headers. All comment lines will |
2 |
26 Feb 07 |
jari |
be ignored. |
2 |
26 Feb 07 |
jari |
545 |
|
2 |
26 Feb 07 |
jari |
Starting and trailing quotes will be eliminated if both are present. |
2 |
26 Feb 07 |
jari |
547 |
|
2 |
26 Feb 07 |
jari |
548 |
|
2 |
26 Feb 07 |
jari |
@param withHeaders If true, headers are included in the returned array |
2 |
26 Feb 07 |
jari |
550 |
|
2 |
26 Feb 07 |
jari |
@return The String[][] containing all annotation data |
2 |
26 Feb 07 |
jari |
552 |
*/ |
2 |
26 Feb 07 |
jari |
553 |
public String[][] getDataMatrixMinusQuotes(boolean withHeaders) { |
2 |
26 Feb 07 |
jari |
554 |
System.out.println("data matrix minus quotes"); |
2 |
26 Feb 07 |
jari |
555 |
Vector columnHeaders = getColumnHeaders(); |
2 |
26 Feb 07 |
jari |
556 |
int hc = withHeaders ? 1 : 0; |
2 |
26 Feb 07 |
jari |
557 |
|
2 |
26 Feb 07 |
jari |
558 |
String[][] matrix = new String[dataLinesMap.size() + hc][columnHeaders.size()]; |
2 |
26 Feb 07 |
jari |
559 |
|
2 |
26 Feb 07 |
jari |
560 |
if (withHeaders) { |
2 |
26 Feb 07 |
jari |
561 |
for (int i = 0; i < columnHeaders.size(); i++) { |
2 |
26 Feb 07 |
jari |
562 |
matrix[0][i] = (String) columnHeaders.elementAt(i); |
2 |
26 Feb 07 |
jari |
563 |
} |
2 |
26 Feb 07 |
jari |
564 |
} |
2 |
26 Feb 07 |
jari |
565 |
|
2 |
26 Feb 07 |
jari |
//jcb use StringSplitter to return empty tokens |
2 |
26 Feb 07 |
jari |
567 |
StringSplitter ss = new StringSplitter('\t'); |
2 |
26 Feb 07 |
jari |
568 |
|
2 |
26 Feb 07 |
jari |
569 |
for (int i = hc; i < matrix.length; i++) { |
2 |
26 Feb 07 |
jari |
570 |
|
2 |
26 Feb 07 |
jari |
571 |
String currentLine = getElementAtIndex(i - hc); |
2 |
26 Feb 07 |
jari |
//jcb StringTokenizer st = new StringTokenizer(currentLine, "\t"); |
2 |
26 Feb 07 |
jari |
573 |
ss.init(currentLine); |
2 |
26 Feb 07 |
jari |
574 |
|
2 |
26 Feb 07 |
jari |
575 |
for (int j = 0; j < matrix[i].length; j++) { |
2 |
26 Feb 07 |
jari |
576 |
try{ |
2 |
26 Feb 07 |
jari |
577 |
matrix[i][j] = ss.nextToken(); |
2 |
26 Feb 07 |
jari |
578 |
if(matrix[i][j].startsWith("\"") && matrix[i][j].endsWith("\"")) { |
2 |
26 Feb 07 |
jari |
579 |
matrix[i][j] = matrix[i][j].replaceFirst("\"", ""); |
2 |
26 Feb 07 |
jari |
580 |
matrix[i][j] = matrix[i][j].substring(0, matrix[i][j].length()-2); |
2 |
26 Feb 07 |
jari |
581 |
} |
2 |
26 Feb 07 |
jari |
582 |
} catch (Exception e){ |
2 |
26 Feb 07 |
jari |
583 |
matrix[i][j] = ""; |
2 |
26 Feb 07 |
jari |
//e.printStackTrace(); |
2 |
26 Feb 07 |
jari |
585 |
} |
2 |
26 Feb 07 |
jari |
586 |
} |
2 |
26 Feb 07 |
jari |
587 |
} |
2 |
26 Feb 07 |
jari |
588 |
|
2 |
26 Feb 07 |
jari |
589 |
return matrix; |
2 |
26 Feb 07 |
jari |
590 |
} |
2 |
26 Feb 07 |
jari |
591 |
|
2 |
26 Feb 07 |
jari |
592 |
|
2 |
26 Feb 07 |
jari |
593 |
|
2 |
26 Feb 07 |
jari |
594 |
private static class IntVector extends Vector { |
2 |
26 Feb 07 |
jari |
595 |
|
2 |
26 Feb 07 |
jari |
596 |
public void add(int element) { |
2 |
26 Feb 07 |
jari |
597 |
super.add(new Integer(element)); |
2 |
26 Feb 07 |
jari |
598 |
} |
2 |
26 Feb 07 |
jari |
599 |
|
2 |
26 Feb 07 |
jari |
600 |
public int intElementAt(int index) { |
2 |
26 Feb 07 |
jari |
601 |
return ((Integer) super.elementAt(index)).intValue(); |
2 |
26 Feb 07 |
jari |
602 |
} |
2 |
26 Feb 07 |
jari |
603 |
} |
2 |
26 Feb 07 |
jari |
604 |
} |