149 |
10 Aug 06 |
enell |
1 |
package annotationfilter; |
149 |
10 Aug 06 |
enell |
2 |
|
210 |
06 Dec 06 |
enell |
3 |
import basefile.BASEFileException; |
149 |
10 Aug 06 |
enell |
4 |
import basefile.BASEFileReader; |
149 |
10 Aug 06 |
enell |
5 |
import basefile.BASEFileSection; |
149 |
10 Aug 06 |
enell |
6 |
|
149 |
10 Aug 06 |
enell |
7 |
import java.io.IOException; |
149 |
10 Aug 06 |
enell |
8 |
import java.util.ArrayList; |
149 |
10 Aug 06 |
enell |
9 |
import java.util.Arrays; |
149 |
10 Aug 06 |
enell |
10 |
import java.util.List; |
149 |
10 Aug 06 |
enell |
11 |
import java.util.regex.Matcher; |
149 |
10 Aug 06 |
enell |
12 |
import java.util.regex.Pattern; |
149 |
10 Aug 06 |
enell |
13 |
|
149 |
10 Aug 06 |
enell |
14 |
/** |
149 |
10 Aug 06 |
enell |
* The class for filtering a basefile using the assay annotations. |
149 |
10 Aug 06 |
enell |
16 |
* |
149 |
10 Aug 06 |
enell |
* @author Johan Enell |
149 |
10 Aug 06 |
enell |
18 |
*/ |
149 |
10 Aug 06 |
enell |
19 |
public class Filter |
149 |
10 Aug 06 |
enell |
20 |
{ |
149 |
10 Aug 06 |
enell |
21 |
private boolean filter = true; |
149 |
10 Aug 06 |
enell |
22 |
|
149 |
10 Aug 06 |
enell |
23 |
private List<String> annotationValues; |
149 |
10 Aug 06 |
enell |
24 |
|
149 |
10 Aug 06 |
enell |
25 |
private int[] assayNumbers; |
149 |
10 Aug 06 |
enell |
26 |
|
149 |
10 Aug 06 |
enell |
27 |
private String param_annotationValues = ""; |
149 |
10 Aug 06 |
enell |
28 |
|
149 |
10 Aug 06 |
enell |
29 |
private String param_annotationType = ""; |
149 |
10 Aug 06 |
enell |
30 |
|
149 |
10 Aug 06 |
enell |
31 |
/** |
149 |
10 Aug 06 |
enell |
* This method extract the settings section for a basefile. Should be called |
149 |
10 Aug 06 |
enell |
* before the extractAssays method. The parameters used are annotationType |
149 |
10 Aug 06 |
enell |
* and annotationValues. If the are not present in the basefile the program |
149 |
10 Aug 06 |
enell |
* will not run. |
149 |
10 Aug 06 |
enell |
36 |
* |
149 |
10 Aug 06 |
enell |
* @param section |
149 |
10 Aug 06 |
enell |
* The settings section. |
149 |
10 Aug 06 |
enell |
39 |
*/ |
149 |
10 Aug 06 |
enell |
40 |
public void extractSettings(BASEFileSection section) |
149 |
10 Aug 06 |
enell |
41 |
{ |
149 |
10 Aug 06 |
enell |
42 |
try |
149 |
10 Aug 06 |
enell |
43 |
{ |
212 |
06 Dec 06 |
enell |
44 |
param_annotationValues = section.findStringOpt("annotationValues"); |
149 |
10 Aug 06 |
enell |
45 |
param_annotationType = section.findStringOpt("annotationType"); |
149 |
10 Aug 06 |
enell |
46 |
if (param_annotationType.equals("0")) |
149 |
10 Aug 06 |
enell |
47 |
{ |
149 |
10 Aug 06 |
enell |
48 |
filter = false; |
149 |
10 Aug 06 |
enell |
49 |
System.err.println("No annotation selected."); |
149 |
10 Aug 06 |
enell |
50 |
} |
149 |
10 Aug 06 |
enell |
51 |
else if (param_annotationValues.equals("")) |
149 |
10 Aug 06 |
enell |
52 |
{ |
149 |
10 Aug 06 |
enell |
53 |
filter = false; |
149 |
10 Aug 06 |
enell |
54 |
} |
149 |
10 Aug 06 |
enell |
55 |
annotationValues = new ArrayList<String>(Arrays.asList(param_annotationValues.split("\\\\r\\\\n", 0))); |
149 |
10 Aug 06 |
enell |
56 |
annotationValues.removeAll(Arrays.asList("")); |
149 |
10 Aug 06 |
enell |
57 |
if (section.findBooleanOpt("leaveEmpty")) |
149 |
10 Aug 06 |
enell |
58 |
{ |
149 |
10 Aug 06 |
enell |
59 |
annotationValues.add(""); |
149 |
10 Aug 06 |
enell |
60 |
} |
149 |
10 Aug 06 |
enell |
61 |
} |
149 |
10 Aug 06 |
enell |
62 |
catch (Exception e) |
149 |
10 Aug 06 |
enell |
63 |
{ |
149 |
10 Aug 06 |
enell |
64 |
e.printStackTrace(); |
149 |
10 Aug 06 |
enell |
65 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
66 |
} |
149 |
10 Aug 06 |
enell |
67 |
} |
149 |
10 Aug 06 |
enell |
68 |
|
149 |
10 Aug 06 |
enell |
69 |
/** |
149 |
10 Aug 06 |
enell |
* The method that extract the assays from the basefile and starts to filter |
149 |
10 Aug 06 |
enell |
* according to the settings that extractSettings has read. |
149 |
10 Aug 06 |
enell |
72 |
* |
149 |
10 Aug 06 |
enell |
* @param section |
149 |
10 Aug 06 |
enell |
* The assays section |
149 |
10 Aug 06 |
enell |
* @param reader |
149 |
10 Aug 06 |
enell |
* The BASEFileReader from the basefile so the assays can be |
149 |
10 Aug 06 |
enell |
* read. |
149 |
10 Aug 06 |
enell |
78 |
*/ |
149 |
10 Aug 06 |
enell |
79 |
public void extractAssays(BASEFileSection section, BASEFileReader reader) |
149 |
10 Aug 06 |
enell |
80 |
{ |
149 |
10 Aug 06 |
enell |
81 |
try |
149 |
10 Aug 06 |
enell |
82 |
{ |
149 |
10 Aug 06 |
enell |
83 |
List<String> columns = section.findFieldList("columns"); |
149 |
10 Aug 06 |
enell |
84 |
|
149 |
10 Aug 06 |
enell |
85 |
if (columns == null) |
149 |
10 Aug 06 |
enell |
86 |
{ |
149 |
10 Aug 06 |
enell |
87 |
System.err.println("columns is missing in the assays section."); |
149 |
10 Aug 06 |
enell |
88 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
89 |
} |
149 |
10 Aug 06 |
enell |
90 |
|
149 |
10 Aug 06 |
enell |
91 |
int idCol = columns.indexOf("id"); |
149 |
10 Aug 06 |
enell |
92 |
int annotationCol = columns.indexOf(param_annotationType); |
149 |
10 Aug 06 |
enell |
93 |
|
149 |
10 Aug 06 |
enell |
94 |
if (filter && (idCol == -1 || annotationCol == -1)) |
149 |
10 Aug 06 |
enell |
95 |
{ |
149 |
10 Aug 06 |
enell |
96 |
System.err.println("id or " + param_annotationType + " is missing in the assays section."); |
149 |
10 Aug 06 |
enell |
97 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
98 |
} |
149 |
10 Aug 06 |
enell |
99 |
|
149 |
10 Aug 06 |
enell |
100 |
String[] vals; |
149 |
10 Aug 06 |
enell |
101 |
ArrayList<String[]> data = new ArrayList<String[]>(); |
149 |
10 Aug 06 |
enell |
102 |
ArrayList<Integer> tmpNumbers = new ArrayList<Integer>(); |
149 |
10 Aug 06 |
enell |
103 |
while ((vals = reader.readDataRow()) != null) |
149 |
10 Aug 06 |
enell |
104 |
{ |
149 |
10 Aug 06 |
enell |
105 |
if (filter) |
149 |
10 Aug 06 |
enell |
106 |
{ |
212 |
06 Dec 06 |
enell |
107 |
if (annotationValues.contains(vals[annotationCol])) |
149 |
10 Aug 06 |
enell |
108 |
{ |
149 |
10 Aug 06 |
enell |
109 |
data.add(vals); |
149 |
10 Aug 06 |
enell |
110 |
tmpNumbers.add(new Integer(vals[idCol])); |
149 |
10 Aug 06 |
enell |
111 |
} |
149 |
10 Aug 06 |
enell |
112 |
} |
149 |
10 Aug 06 |
enell |
113 |
else |
149 |
10 Aug 06 |
enell |
114 |
{ |
149 |
10 Aug 06 |
enell |
115 |
tmpNumbers.add(new Integer(vals[idCol])); |
149 |
10 Aug 06 |
enell |
116 |
data.add(vals); |
149 |
10 Aug 06 |
enell |
117 |
} |
149 |
10 Aug 06 |
enell |
118 |
} |
149 |
10 Aug 06 |
enell |
119 |
section.setHeader("count", tmpNumbers.size()); |
149 |
10 Aug 06 |
enell |
120 |
System.out.println(section); |
149 |
10 Aug 06 |
enell |
121 |
|
149 |
10 Aug 06 |
enell |
122 |
for (String[] line : data) |
149 |
10 Aug 06 |
enell |
123 |
{ |
149 |
10 Aug 06 |
enell |
124 |
System.out.print(line[0]); |
149 |
10 Aug 06 |
enell |
125 |
for (int i = 1; i < line.length; i++) |
149 |
10 Aug 06 |
enell |
126 |
{ |
149 |
10 Aug 06 |
enell |
127 |
System.out.print("\t" + line[i]); |
149 |
10 Aug 06 |
enell |
128 |
} |
149 |
10 Aug 06 |
enell |
129 |
System.out.println(); |
149 |
10 Aug 06 |
enell |
130 |
} |
149 |
10 Aug 06 |
enell |
131 |
System.out.println(); |
149 |
10 Aug 06 |
enell |
132 |
|
149 |
10 Aug 06 |
enell |
133 |
if (filter && tmpNumbers.size() == 0) |
149 |
10 Aug 06 |
enell |
134 |
{ |
210 |
06 Dec 06 |
enell |
135 |
throw new BASEFileException("Can't find any assay with " + param_annotationType + " matching " + annotationValues); |
149 |
10 Aug 06 |
enell |
136 |
} |
149 |
10 Aug 06 |
enell |
137 |
|
149 |
10 Aug 06 |
enell |
138 |
assayNumbers = new int[tmpNumbers.size()]; |
149 |
10 Aug 06 |
enell |
139 |
for (int i = 0; i < assayNumbers.length; i++) |
149 |
10 Aug 06 |
enell |
140 |
{ |
149 |
10 Aug 06 |
enell |
141 |
Integer number = tmpNumbers.get(i); |
149 |
10 Aug 06 |
enell |
142 |
assayNumbers[i] = number.intValue(); |
149 |
10 Aug 06 |
enell |
143 |
} |
149 |
10 Aug 06 |
enell |
144 |
} |
149 |
10 Aug 06 |
enell |
145 |
catch (Exception e) |
149 |
10 Aug 06 |
enell |
146 |
{ |
149 |
10 Aug 06 |
enell |
147 |
e.printStackTrace(); |
149 |
10 Aug 06 |
enell |
148 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
149 |
} |
149 |
10 Aug 06 |
enell |
150 |
} |
149 |
10 Aug 06 |
enell |
151 |
|
149 |
10 Aug 06 |
enell |
152 |
/** |
149 |
10 Aug 06 |
enell |
* This method reads the spot section and and prints the spots from from the |
149 |
10 Aug 06 |
enell |
* assays left after the filtering. |
149 |
10 Aug 06 |
enell |
155 |
* |
149 |
10 Aug 06 |
enell |
* @param section |
149 |
10 Aug 06 |
enell |
* The spots section. |
149 |
10 Aug 06 |
enell |
* @param reader |
149 |
10 Aug 06 |
enell |
* he BASEFileReader from the basefile so the spots can be read. |
149 |
10 Aug 06 |
enell |
160 |
*/ |
149 |
10 Aug 06 |
enell |
161 |
public void extractSpots(BASEFileSection section, BASEFileReader reader) |
149 |
10 Aug 06 |
enell |
162 |
{ |
149 |
10 Aug 06 |
enell |
163 |
List<String> assays = section.findFieldList("assays"); |
149 |
10 Aug 06 |
enell |
164 |
List<String> columns = section.findFieldList("columns"); |
149 |
10 Aug 06 |
enell |
165 |
List<String> assayFields = section.findFieldList("assayFields"); |
149 |
10 Aug 06 |
enell |
166 |
|
149 |
10 Aug 06 |
enell |
167 |
if (columns == null || assayFields == null || assays == null) |
149 |
10 Aug 06 |
enell |
168 |
{ |
149 |
10 Aug 06 |
enell |
169 |
System.err.println("columns, assayFields or assays is missing in one of the assays spot section."); |
149 |
10 Aug 06 |
enell |
170 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
171 |
} |
149 |
10 Aug 06 |
enell |
172 |
|
149 |
10 Aug 06 |
enell |
173 |
int assayDataCol = columns.indexOf("assayData"); |
149 |
10 Aug 06 |
enell |
174 |
int[] assayCol = new int[assayNumbers.length]; |
149 |
10 Aug 06 |
enell |
175 |
|
149 |
10 Aug 06 |
enell |
176 |
for (int i = 0; i < assayCol.length; i++) |
149 |
10 Aug 06 |
enell |
177 |
{ |
149 |
10 Aug 06 |
enell |
178 |
assayCol[i] = assays.indexOf(String.valueOf(assayNumbers[i])); |
149 |
10 Aug 06 |
enell |
179 |
} |
149 |
10 Aug 06 |
enell |
180 |
|
149 |
10 Aug 06 |
enell |
181 |
if (assayNumbers.length > 0) |
149 |
10 Aug 06 |
enell |
182 |
{ |
149 |
10 Aug 06 |
enell |
183 |
String value = String.valueOf(assayNumbers[0]); |
149 |
10 Aug 06 |
enell |
184 |
for (int i = 1; i < assayNumbers.length; i++) |
149 |
10 Aug 06 |
enell |
185 |
{ |
149 |
10 Aug 06 |
enell |
186 |
value += "\t" + assayNumbers[i]; |
149 |
10 Aug 06 |
enell |
187 |
} |
149 |
10 Aug 06 |
enell |
188 |
section.setHeader("assays", value); |
149 |
10 Aug 06 |
enell |
189 |
} |
149 |
10 Aug 06 |
enell |
190 |
else |
149 |
10 Aug 06 |
enell |
191 |
{ |
149 |
10 Aug 06 |
enell |
192 |
section.setHeader("assays", ""); |
149 |
10 Aug 06 |
enell |
193 |
} |
149 |
10 Aug 06 |
enell |
194 |
|
149 |
10 Aug 06 |
enell |
195 |
try |
149 |
10 Aug 06 |
enell |
196 |
{ |
149 |
10 Aug 06 |
enell |
197 |
section.removeHeader("setExtraFloats"); |
149 |
10 Aug 06 |
enell |
198 |
Pattern p = Pattern.compile("_xc_(\\S*)\\b"); |
149 |
10 Aug 06 |
enell |
199 |
Matcher m = p.matcher(section.findStringOpt("assayFields")); |
149 |
10 Aug 06 |
enell |
200 |
while (m.find()) |
149 |
10 Aug 06 |
enell |
201 |
{ |
149 |
10 Aug 06 |
enell |
202 |
String ef = section.getHeader("setExtraFloats"); |
149 |
10 Aug 06 |
enell |
203 |
ef = ef == null ? m.group(1) : ef + "\t" + m.group(1); |
149 |
10 Aug 06 |
enell |
204 |
section.setHeader("setExtraFloats", ef); |
149 |
10 Aug 06 |
enell |
205 |
} |
149 |
10 Aug 06 |
enell |
206 |
section.setHeader("assayFields", section.findStringOpt("assayFields").replaceAll("_xc_", "")); |
149 |
10 Aug 06 |
enell |
207 |
System.out.println(section); |
149 |
10 Aug 06 |
enell |
208 |
|
149 |
10 Aug 06 |
enell |
209 |
String[] vals; |
149 |
10 Aug 06 |
enell |
210 |
while ((vals = reader.readDataRow()) != null) |
149 |
10 Aug 06 |
enell |
211 |
{ |
149 |
10 Aug 06 |
enell |
212 |
String spotLine = ""; |
149 |
10 Aug 06 |
enell |
213 |
for (int i = 0; i < assayCol.length; i++) |
149 |
10 Aug 06 |
enell |
214 |
{ |
149 |
10 Aug 06 |
enell |
215 |
for (int j = 0; j < assayFields.size(); j++) |
149 |
10 Aug 06 |
enell |
216 |
{ |
149 |
10 Aug 06 |
enell |
217 |
spotLine += "\t" + vals[assayDataCol + assayFields.size() * assayCol[i] + j]; |
149 |
10 Aug 06 |
enell |
218 |
} |
149 |
10 Aug 06 |
enell |
219 |
} |
149 |
10 Aug 06 |
enell |
220 |
if (!spotLine.matches("^\\s*$")) |
149 |
10 Aug 06 |
enell |
221 |
{ |
149 |
10 Aug 06 |
enell |
222 |
String geneLine = vals[0]; |
149 |
10 Aug 06 |
enell |
223 |
for (int j = 1; j < columns.size() - 1; j++) |
149 |
10 Aug 06 |
enell |
224 |
{ |
149 |
10 Aug 06 |
enell |
225 |
geneLine += "\t" + vals[j]; |
149 |
10 Aug 06 |
enell |
226 |
} |
149 |
10 Aug 06 |
enell |
227 |
System.out.println(geneLine + spotLine); |
149 |
10 Aug 06 |
enell |
228 |
} |
149 |
10 Aug 06 |
enell |
229 |
|
149 |
10 Aug 06 |
enell |
230 |
} |
149 |
10 Aug 06 |
enell |
231 |
} |
149 |
10 Aug 06 |
enell |
232 |
catch (IOException e) |
149 |
10 Aug 06 |
enell |
233 |
{ |
149 |
10 Aug 06 |
enell |
234 |
e.printStackTrace(); |
149 |
10 Aug 06 |
enell |
235 |
} |
149 |
10 Aug 06 |
enell |
236 |
} |
142 |
10 Aug 06 |
enell |
237 |
} |