2 |
26 Feb 07 |
jari |
1 |
package org.tigr.microarray.mev.file.agilent; |
2 |
26 Feb 07 |
jari |
2 |
|
2 |
26 Feb 07 |
jari |
3 |
import java.io.File; |
2 |
26 Feb 07 |
jari |
4 |
import java.io.IOException; |
2 |
26 Feb 07 |
jari |
5 |
import java.util.StringTokenizer; |
2 |
26 Feb 07 |
jari |
6 |
import java.util.Vector; |
2 |
26 Feb 07 |
jari |
7 |
|
2 |
26 Feb 07 |
jari |
8 |
/* |
2 |
26 Feb 07 |
jari |
* Created on Apr 5, 2004 |
2 |
26 Feb 07 |
jari |
10 |
*/ |
2 |
26 Feb 07 |
jari |
11 |
|
2 |
26 Feb 07 |
jari |
12 |
/** |
2 |
26 Feb 07 |
jari |
* MeVerizer reformats an Agilent Feature Extractor (".txt") output file into an MeV |
2 |
26 Feb 07 |
jari |
* formatted file (".mev"). |
2 |
26 Feb 07 |
jari |
15 |
* |
2 |
26 Feb 07 |
jari |
* @author vu |
2 |
26 Feb 07 |
jari |
17 |
*/ |
2 |
26 Feb 07 |
jari |
18 |
public class MeVerizer { |
2 |
26 Feb 07 |
jari |
19 |
/** |
2 |
26 Feb 07 |
jari |
* The String that characterizes the Header Line in the Agilent File --> "FEATURES" |
2 |
26 Feb 07 |
jari |
21 |
*/ |
2 |
26 Feb 07 |
jari |
22 |
public static String HEADER_HEADER = "FEATURES"; |
2 |
26 Feb 07 |
jari |
23 |
/** |
2 |
26 Feb 07 |
jari |
* The String used in lieu of an actual column Header when none are available. |
2 |
26 Feb 07 |
jari |
25 |
*/ |
2 |
26 Feb 07 |
jari |
26 |
public static String FUDGE = "FUDGE"; |
2 |
26 Feb 07 |
jari |
27 |
/** |
2 |
26 Feb 07 |
jari |
* The String used as a placeholder in the tab-delimited file so that blank cells are |
2 |
26 Feb 07 |
jari |
* accounted for. |
2 |
26 Feb 07 |
jari |
30 |
*/ |
2 |
26 Feb 07 |
jari |
31 |
public static String PLACE_HOLDER = "MOTHRA"; |
2 |
26 Feb 07 |
jari |
32 |
|
2 |
26 Feb 07 |
jari |
33 |
public static boolean PARSE_BY_HEADERS = true; |
2 |
26 Feb 07 |
jari |
34 |
public static boolean PARSE_BY_INDICES = false; |
2 |
26 Feb 07 |
jari |
35 |
|
2 |
26 Feb 07 |
jari |
36 |
|
2 |
26 Feb 07 |
jari |
//The Header line of the .mev File to be created. |
2 |
26 Feb 07 |
jari |
38 |
private String mHeaders; |
2 |
26 Feb 07 |
jari |
//The Agilent equivalent of the desired mHeaders |
2 |
26 Feb 07 |
jari |
40 |
private String columnDef; |
2 |
26 Feb 07 |
jari |
//The actual header line of the Agilent file |
2 |
26 Feb 07 |
jari |
42 |
private String headerLine; |
2 |
26 Feb 07 |
jari |
43 |
|
2 |
26 Feb 07 |
jari |
44 |
private int iHeaders; |
2 |
26 Feb 07 |
jari |
45 |
|
2 |
26 Feb 07 |
jari |
//Vector of Extract objects |
2 |
26 Feb 07 |
jari |
47 |
private Vector vExtract; |
2 |
26 Feb 07 |
jari |
48 |
|
2 |
26 Feb 07 |
jari |
49 |
|
2 |
26 Feb 07 |
jari |
50 |
/** |
2 |
26 Feb 07 |
jari |
* Constructor |
2 |
26 Feb 07 |
jari |
* @param mHeadersP tab-delim String of MeV's Headers |
2 |
26 Feb 07 |
jari |
* @param aHeadersP tab-delim String of Agilent Headers equivalent to MeV's |
2 |
26 Feb 07 |
jari |
* @throws ColumnMismatchException If the number of columns in the header strings don't match |
2 |
26 Feb 07 |
jari |
55 |
*/ |
2 |
26 Feb 07 |
jari |
56 |
public MeVerizer(String mHeadersP, String columnDefP) throws |
2 |
26 Feb 07 |
jari |
57 |
ColumnMismatchException { |
2 |
26 Feb 07 |
jari |
58 |
this.mHeaders = mHeadersP; |
2 |
26 Feb 07 |
jari |
59 |
this.columnDef = columnDefP; |
2 |
26 Feb 07 |
jari |
60 |
|
2 |
26 Feb 07 |
jari |
61 |
StringTokenizer st1 = new StringTokenizer(mHeadersP, "\t"); |
2 |
26 Feb 07 |
jari |
62 |
StringTokenizer st2 = new StringTokenizer(columnDefP, "\t"); |
2 |
26 Feb 07 |
jari |
63 |
|
2 |
26 Feb 07 |
jari |
64 |
if(st1.countTokens() != st2.countTokens()) { |
2 |
26 Feb 07 |
jari |
65 |
throw new ColumnMismatchException("Header columns are mismatched"); |
2 |
26 Feb 07 |
jari |
66 |
} |
2 |
26 Feb 07 |
jari |
67 |
}//end constructor |
2 |
26 Feb 07 |
jari |
68 |
|
2 |
26 Feb 07 |
jari |
69 |
|
2 |
26 Feb 07 |
jari |
70 |
/** |
2 |
26 Feb 07 |
jari |
* Parses the Agilent file into a Vector of Extract objects |
2 |
26 Feb 07 |
jari |
* @param f |
2 |
26 Feb 07 |
jari |
* @throws IOException |
2 |
26 Feb 07 |
jari |
74 |
*/ |
2 |
26 Feb 07 |
jari |
75 |
public void parseExtract(File f, boolean parseByHeaders, String replacement) |
2 |
26 Feb 07 |
jari |
76 |
throws IOException { |
2 |
26 Feb 07 |
jari |
77 |
|
2 |
26 Feb 07 |
jari |
//get file as Vector of rows where null cells have been replaced |
2 |
26 Feb 07 |
jari |
79 |
Vector vLine = this.readFileAsVector(f); |
2 |
26 Feb 07 |
jari |
80 |
|
2 |
26 Feb 07 |
jari |
//figure out where the data starts and get the header line |
2 |
26 Feb 07 |
jari |
82 |
this.iHeaders = this.findHeaderRow(vLine, MeVerizer.HEADER_HEADER); |
2 |
26 Feb 07 |
jari |
83 |
this.headerLine = this.findHeaderString(vLine, MeVerizer.HEADER_HEADER); |
2 |
26 Feb 07 |
jari |
84 |
|
2 |
26 Feb 07 |
jari |
//create Extract objects to represent each feature |
2 |
26 Feb 07 |
jari |
86 |
this.vExtract = this.createExtract(vLine, headerLine, this.iHeaders, |
2 |
26 Feb 07 |
jari |
87 |
parseByHeaders, replacement); |
2 |
26 Feb 07 |
jari |
88 |
}//end parseExtract() |
2 |
26 Feb 07 |
jari |
89 |
|
2 |
26 Feb 07 |
jari |
90 |
|
2 |
26 Feb 07 |
jari |
91 |
/** |
2 |
26 Feb 07 |
jari |
* Loops through the Vector of Lines, creating an Extract object to represent |
2 |
26 Feb 07 |
jari |
* @param vLine Vector of Lines from Agilent File (Nulls replaced with static var) |
2 |
26 Feb 07 |
jari |
* @param headerLine The tab-delimited Header line from the Agilent file |
2 |
26 Feb 07 |
jari |
* @param iHeader The index of the Header line in the vLine Vector |
2 |
26 Feb 07 |
jari |
* @return Returns a Vector of Extract objects |
2 |
26 Feb 07 |
jari |
97 |
*/ |
2 |
26 Feb 07 |
jari |
98 |
private Vector createExtract(Vector vLine, String headerLine, int iHeader, |
2 |
26 Feb 07 |
jari |
99 |
boolean parseByHeaders, String replacement) { |
2 |
26 Feb 07 |
jari |
100 |
Vector vReturn = new Vector(); |
2 |
26 Feb 07 |
jari |
101 |
|
2 |
26 Feb 07 |
jari |
//loop through rows of data, starting at the first line of actual data (follows Header) |
2 |
26 Feb 07 |
jari |
103 |
for(int i = iHeader + 1; i < vLine.size(); i ++) { |
2 |
26 Feb 07 |
jari |
104 |
String line = (String) vLine.elementAt(i); |
2 |
26 Feb 07 |
jari |
105 |
|
2 |
26 Feb 07 |
jari |
//create Extract object to represent each row. add to vExtract Vector |
2 |
26 Feb 07 |
jari |
107 |
Extract e = new Extract(); |
2 |
26 Feb 07 |
jari |
108 |
if(parseByHeaders) { |
2 |
26 Feb 07 |
jari |
109 |
e.parseLineByHeaders(headerLine, line, this.columnDef, replacement); |
2 |
26 Feb 07 |
jari |
110 |
} else { |
2 |
26 Feb 07 |
jari |
111 |
e.parseLineByIndex(this.columnDef, line, replacement); |
2 |
26 Feb 07 |
jari |
112 |
} |
2 |
26 Feb 07 |
jari |
113 |
vReturn.add(e); |
2 |
26 Feb 07 |
jari |
114 |
}//end i |
2 |
26 Feb 07 |
jari |
115 |
|
2 |
26 Feb 07 |
jari |
116 |
return vReturn; |
2 |
26 Feb 07 |
jari |
117 |
}//end createExtract() |
2 |
26 Feb 07 |
jari |
118 |
|
2 |
26 Feb 07 |
jari |
119 |
|
2 |
26 Feb 07 |
jari |
120 |
/** |
2 |
26 Feb 07 |
jari |
* Loops through the Vector of Lines looking for the Header line. |
2 |
26 Feb 07 |
jari |
* @param vLine Vector of Lines from Agilent file |
2 |
26 Feb 07 |
jari |
* @param sDefine The String that defines this line as the Header |
2 |
26 Feb 07 |
jari |
* @return Returns the Header line from the Agilent file |
2 |
26 Feb 07 |
jari |
125 |
*/ |
2 |
26 Feb 07 |
jari |
126 |
private String findHeaderString(Vector vLine, String sDefine) { |
2 |
26 Feb 07 |
jari |
127 |
String sReturn = null; |
2 |
26 Feb 07 |
jari |
128 |
|
2 |
26 Feb 07 |
jari |
129 |
for(int i = 0; i < vLine.size(); i ++) { |
2 |
26 Feb 07 |
jari |
130 |
String line = (String) vLine.elementAt(i); |
2 |
26 Feb 07 |
jari |
131 |
if(line.startsWith(sDefine)) { |
2 |
26 Feb 07 |
jari |
132 |
sReturn = line; |
2 |
26 Feb 07 |
jari |
133 |
} |
2 |
26 Feb 07 |
jari |
134 |
} |
2 |
26 Feb 07 |
jari |
135 |
|
2 |
26 Feb 07 |
jari |
136 |
return sReturn; |
2 |
26 Feb 07 |
jari |
137 |
}//end findHeaderString() |
2 |
26 Feb 07 |
jari |
138 |
/** |
2 |
26 Feb 07 |
jari |
* Loops through the Vector of Lines looking for the index of the Header line |
2 |
26 Feb 07 |
jari |
* @param vLine Vector of Lines from the Agilent file |
2 |
26 Feb 07 |
jari |
* @param sDefine The String that defines this line as the Header |
2 |
26 Feb 07 |
jari |
* @return Returns the index of the Header line from the Agilent file |
2 |
26 Feb 07 |
jari |
143 |
*/ |
2 |
26 Feb 07 |
jari |
144 |
private int findHeaderRow(Vector vLine, String sDefine) { |
2 |
26 Feb 07 |
jari |
145 |
int iReturn = 0; |
2 |
26 Feb 07 |
jari |
146 |
|
2 |
26 Feb 07 |
jari |
147 |
for(int i = 0; i < vLine.size(); i ++) { |
2 |
26 Feb 07 |
jari |
148 |
String line = (String) vLine.elementAt(i); |
2 |
26 Feb 07 |
jari |
149 |
if(line.startsWith(sDefine)) { |
2 |
26 Feb 07 |
jari |
150 |
iReturn = i; |
2 |
26 Feb 07 |
jari |
151 |
} |
2 |
26 Feb 07 |
jari |
152 |
} |
2 |
26 Feb 07 |
jari |
153 |
|
2 |
26 Feb 07 |
jari |
154 |
return iReturn; |
2 |
26 Feb 07 |
jari |
155 |
}//end findHeaderRow() |
2 |
26 Feb 07 |
jari |
156 |
|
2 |
26 Feb 07 |
jari |
157 |
|
2 |
26 Feb 07 |
jari |
158 |
/** |
2 |
26 Feb 07 |
jari |
* Creates and uses a Reader object to read a file into a Vector of Lines where any |
2 |
26 Feb 07 |
jari |
* null cells of the tab-delimited File are replaced with a String denoted by the static |
2 |
26 Feb 07 |
jari |
* variable MeVerizer.PLACE_HOLDER. |
2 |
26 Feb 07 |
jari |
* @param f The File to be read |
2 |
26 Feb 07 |
jari |
* @return Returns a Vector where each element is a String representation of a |
2 |
26 Feb 07 |
jari |
* Line from the File |
2 |
26 Feb 07 |
jari |
* @throws IOException |
2 |
26 Feb 07 |
jari |
166 |
*/ |
2 |
26 Feb 07 |
jari |
167 |
private Vector readFileAsVector(File f) throws IOException { |
2 |
26 Feb 07 |
jari |
168 |
Reader r = new Reader(); |
2 |
26 Feb 07 |
jari |
169 |
r.readFile(f); |
2 |
26 Feb 07 |
jari |
170 |
return r.getVNullLine(MeVerizer.PLACE_HOLDER); |
2 |
26 Feb 07 |
jari |
171 |
}//end readFileASVector() |
2 |
26 Feb 07 |
jari |
172 |
|
2 |
26 Feb 07 |
jari |
173 |
|
2 |
26 Feb 07 |
jari |
174 |
/** |
2 |
26 Feb 07 |
jari |
* Get the new .mev File String. |
2 |
26 Feb 07 |
jari |
* @return Returns a tab-delimited String representing a formatted .mev File |
2 |
26 Feb 07 |
jari |
177 |
*/ |
2 |
26 Feb 07 |
jari |
178 |
public String getFileString() { |
2 |
26 Feb 07 |
jari |
179 |
StringBuffer sbReturn = new StringBuffer(this.mHeaders + "\r\n"); |
2 |
26 Feb 07 |
jari |
180 |
|
2 |
26 Feb 07 |
jari |
181 |
for(int i = 0; i < this.vExtract.size(); i ++) { |
2 |
26 Feb 07 |
jari |
182 |
Extract e = (Extract) this.vExtract.elementAt(i); |
2 |
26 Feb 07 |
jari |
183 |
String s = e.getParsedLine(); |
2 |
26 Feb 07 |
jari |
184 |
sbReturn.append(s); |
2 |
26 Feb 07 |
jari |
185 |
sbReturn.append("\r\n"); |
2 |
26 Feb 07 |
jari |
186 |
} |
2 |
26 Feb 07 |
jari |
187 |
|
2 |
26 Feb 07 |
jari |
188 |
return sbReturn.toString(); |
2 |
26 Feb 07 |
jari |
189 |
} |
2 |
26 Feb 07 |
jari |
190 |
|
2 |
26 Feb 07 |
jari |
191 |
|
2 |
26 Feb 07 |
jari |
192 |
/** |
2 |
26 Feb 07 |
jari |
* Compiles a list of the 'columnDef' that aren't found in 'headerLine' |
2 |
26 Feb 07 |
jari |
* @return Vector of 'columnDef' not found in 'headerLine' |
2 |
26 Feb 07 |
jari |
195 |
*/ |
2 |
26 Feb 07 |
jari |
196 |
public Vector checkHeaders() { |
2 |
26 Feb 07 |
jari |
197 |
Vector vReturn = new Vector(); |
2 |
26 Feb 07 |
jari |
198 |
Vector vWant = new Vector(); |
2 |
26 Feb 07 |
jari |
199 |
Vector vHave = new Vector(); |
2 |
26 Feb 07 |
jari |
200 |
|
2 |
26 Feb 07 |
jari |
201 |
StringTokenizer stWant = new StringTokenizer(this.columnDef, "\t"); |
2 |
26 Feb 07 |
jari |
202 |
while(stWant.hasMoreTokens()) { |
2 |
26 Feb 07 |
jari |
203 |
vWant.add(stWant.nextToken()); |
2 |
26 Feb 07 |
jari |
204 |
} |
2 |
26 Feb 07 |
jari |
205 |
|
2 |
26 Feb 07 |
jari |
206 |
StringTokenizer stHave = new StringTokenizer(this.headerLine, "\t"); |
2 |
26 Feb 07 |
jari |
207 |
while(stHave.hasMoreTokens()) { |
2 |
26 Feb 07 |
jari |
208 |
vHave.add(stHave.nextToken()); |
2 |
26 Feb 07 |
jari |
209 |
} |
2 |
26 Feb 07 |
jari |
210 |
|
2 |
26 Feb 07 |
jari |
211 |
for(int i = 0; i < vWant.size(); i ++) { |
2 |
26 Feb 07 |
jari |
212 |
String want = (String) vWant.elementAt(i); |
2 |
26 Feb 07 |
jari |
213 |
|
2 |
26 Feb 07 |
jari |
214 |
boolean wantFound = false; |
2 |
26 Feb 07 |
jari |
215 |
|
2 |
26 Feb 07 |
jari |
216 |
for(int j = 0; j < vHave.size(); j ++) { |
2 |
26 Feb 07 |
jari |
217 |
String have = (String) vHave.elementAt(j); |
2 |
26 Feb 07 |
jari |
218 |
if(want.toLowerCase().equals(have.toLowerCase())) { |
2 |
26 Feb 07 |
jari |
219 |
wantFound = true; |
2 |
26 Feb 07 |
jari |
220 |
} |
2 |
26 Feb 07 |
jari |
221 |
} |
2 |
26 Feb 07 |
jari |
222 |
|
2 |
26 Feb 07 |
jari |
223 |
if(!wantFound) { |
2 |
26 Feb 07 |
jari |
224 |
vReturn.add(want); |
2 |
26 Feb 07 |
jari |
225 |
} |
2 |
26 Feb 07 |
jari |
226 |
} |
2 |
26 Feb 07 |
jari |
227 |
|
2 |
26 Feb 07 |
jari |
228 |
return vReturn; |
2 |
26 Feb 07 |
jari |
229 |
}//end checkHeaders() |
2 |
26 Feb 07 |
jari |
230 |
}//end class |