1385 |
02 Sep 11 |
martin |
1 |
/** |
1385 |
02 Sep 11 |
martin |
$Id: |
1385 |
02 Sep 11 |
martin |
3 |
|
1385 |
02 Sep 11 |
martin |
Copyright (C) 2011 Martin Svensson |
1385 |
02 Sep 11 |
martin |
5 |
|
1385 |
02 Sep 11 |
martin |
This file is part of BASE - BioArray Software Environment. |
1385 |
02 Sep 11 |
martin |
Available at http://base.thep.lu.se/ |
1385 |
02 Sep 11 |
martin |
8 |
|
1385 |
02 Sep 11 |
martin |
BASE is free software; you can redistribute it and/or |
1385 |
02 Sep 11 |
martin |
modify it under the terms of the GNU General Public License |
1385 |
02 Sep 11 |
martin |
as published by the Free Software Foundation; either version 3 |
1385 |
02 Sep 11 |
martin |
of the License, or (at your option) any later version. |
1385 |
02 Sep 11 |
martin |
13 |
|
1385 |
02 Sep 11 |
martin |
BASE is distributed in the hope that it will be useful, |
1385 |
02 Sep 11 |
martin |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
1385 |
02 Sep 11 |
martin |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
1385 |
02 Sep 11 |
martin |
GNU General Public License for more details. |
1385 |
02 Sep 11 |
martin |
18 |
|
1385 |
02 Sep 11 |
martin |
You should have received a copy of the GNU General Public License |
1385 |
02 Sep 11 |
martin |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
1385 |
02 Sep 11 |
martin |
21 |
*/ |
1385 |
02 Sep 11 |
martin |
22 |
package net.sf.basedb.illumina.filehandler; |
1385 |
02 Sep 11 |
martin |
23 |
|
1385 |
02 Sep 11 |
martin |
24 |
import java.io.BufferedReader; |
1385 |
02 Sep 11 |
martin |
25 |
import java.io.IOException; |
1385 |
02 Sep 11 |
martin |
26 |
import java.io.InputStream; |
1385 |
02 Sep 11 |
martin |
27 |
import java.io.InputStreamReader; |
1385 |
02 Sep 11 |
martin |
28 |
import java.io.PushbackInputStream; |
1385 |
02 Sep 11 |
martin |
29 |
import java.util.ArrayList; |
1385 |
02 Sep 11 |
martin |
30 |
import java.util.List; |
1385 |
02 Sep 11 |
martin |
31 |
import java.util.zip.GZIPInputStream; |
1385 |
02 Sep 11 |
martin |
32 |
|
1385 |
02 Sep 11 |
martin |
33 |
import net.sf.basedb.core.ArrayDesign; |
1385 |
02 Sep 11 |
martin |
34 |
import net.sf.basedb.core.File; |
1385 |
02 Sep 11 |
martin |
35 |
import net.sf.basedb.core.InvalidDataException; |
1385 |
02 Sep 11 |
martin |
36 |
import net.sf.basedb.core.InvalidUseOfNullException; |
1385 |
02 Sep 11 |
martin |
37 |
import net.sf.basedb.util.FileUtil; |
1385 |
02 Sep 11 |
martin |
38 |
|
1385 |
02 Sep 11 |
martin |
39 |
|
1385 |
02 Sep 11 |
martin |
40 |
/** |
1385 |
02 Sep 11 |
martin |
41 |
|
1385 |
02 Sep 11 |
martin |
@author Martin |
1385 |
02 Sep 11 |
martin |
@since 1.7 |
1385 |
02 Sep 11 |
martin |
@base.modified $Date: 2011-05-06 14:09:13 +0200 (Fri, 06 May 2011) $ |
1385 |
02 Sep 11 |
martin |
45 |
*/ |
1385 |
02 Sep 11 |
martin |
46 |
|
1385 |
02 Sep 11 |
martin |
47 |
public class BgxValidator |
1385 |
02 Sep 11 |
martin |
48 |
{ |
1385 |
02 Sep 11 |
martin |
//Max number of lines to parse |
1385 |
02 Sep 11 |
martin |
50 |
private final int linesToParse = 100; |
1385 |
02 Sep 11 |
martin |
//The start of the heading that holds the number of controls |
1385 |
02 Sep 11 |
martin |
52 |
private String controlsHeading = "Number of Controls"; |
1385 |
02 Sep 11 |
martin |
53 |
private String probesHeading = "Number of Probes"; |
1385 |
02 Sep 11 |
martin |
54 |
|
1385 |
02 Sep 11 |
martin |
//List to hold the parsed lines. |
1385 |
02 Sep 11 |
martin |
56 |
private List<String> parsedHeadings = null; |
1385 |
02 Sep 11 |
martin |
57 |
|
1385 |
02 Sep 11 |
martin |
//Column names |
1385 |
02 Sep 11 |
martin |
59 |
private String[] requiredColumns = {"Probe_Id", "Array_Address_Id"}; |
1385 |
02 Sep 11 |
martin |
60 |
|
1385 |
02 Sep 11 |
martin |
61 |
|
1385 |
02 Sep 11 |
martin |
62 |
/** |
1385 |
02 Sep 11 |
martin |
Create a new instance |
1385 |
02 Sep 11 |
martin |
64 |
*/ |
1385 |
02 Sep 11 |
martin |
65 |
public BgxValidator() |
1385 |
02 Sep 11 |
martin |
66 |
{} |
1385 |
02 Sep 11 |
martin |
67 |
|
1385 |
02 Sep 11 |
martin |
68 |
/** |
1385 |
02 Sep 11 |
martin |
Reset metadata on the given array design. This set the |
1385 |
02 Sep 11 |
martin |
number of features to 0. |
1385 |
02 Sep 11 |
martin |
@param design The array design |
1385 |
02 Sep 11 |
martin |
72 |
*/ |
1385 |
02 Sep 11 |
martin |
73 |
public void resetMetadata(ArrayDesign design) |
1385 |
02 Sep 11 |
martin |
74 |
{ |
1385 |
02 Sep 11 |
martin |
75 |
design.setNumFileFeatures(0); |
1385 |
02 Sep 11 |
martin |
76 |
} |
1385 |
02 Sep 11 |
martin |
77 |
|
1385 |
02 Sep 11 |
martin |
78 |
public void copyMetaData(File bgxFile, ArrayDesign design) |
1385 |
02 Sep 11 |
martin |
79 |
throws InvalidDataException |
1385 |
02 Sep 11 |
martin |
80 |
{ |
1385 |
02 Sep 11 |
martin |
81 |
parsedHeadings = parseHeadings(bgxFile, linesToParse); |
1385 |
02 Sep 11 |
martin |
82 |
if (parsedHeadings != null) |
1385 |
02 Sep 11 |
martin |
83 |
{ |
1385 |
02 Sep 11 |
martin |
84 |
int numFeatures = getNumFeatures(parsedHeadings); |
1385 |
02 Sep 11 |
martin |
85 |
design.setNumFileFeatures(numFeatures); |
1385 |
02 Sep 11 |
martin |
86 |
} |
1385 |
02 Sep 11 |
martin |
87 |
} |
1385 |
02 Sep 11 |
martin |
88 |
|
1385 |
02 Sep 11 |
martin |
89 |
|
1385 |
02 Sep 11 |
martin |
90 |
/* |
1385 |
02 Sep 11 |
martin |
Takes a file, packed(gzip) or not packed and parse certain amount of lines. |
1385 |
02 Sep 11 |
martin |
Parsed heading lines will be stored in a list. |
1385 |
02 Sep 11 |
martin |
The file considers to be valid if the [Probes] section is found and next line |
1385 |
02 Sep 11 |
martin |
contains of at least the columns: Probe_Id and Array_Address_Id. |
1385 |
02 Sep 11 |
martin |
95 |
*/ |
1385 |
02 Sep 11 |
martin |
96 |
private List<String> parseHeadings(File bgxFile, int range) |
1385 |
02 Sep 11 |
martin |
97 |
{ |
1385 |
02 Sep 11 |
martin |
98 |
if (bgxFile == null) |
1385 |
02 Sep 11 |
martin |
99 |
{ |
1385 |
02 Sep 11 |
martin |
100 |
throw new InvalidUseOfNullException("bgxFile"); |
1385 |
02 Sep 11 |
martin |
101 |
} |
1385 |
02 Sep 11 |
martin |
102 |
if (!bgxFile.getLocation().isDownloadable()) |
1385 |
02 Sep 11 |
martin |
103 |
{ |
1385 |
02 Sep 11 |
martin |
104 |
throw new InvalidDataException("Can't download file data for file '" + |
1385 |
02 Sep 11 |
martin |
105 |
bgxFile.getName() + "'; location=" + bgxFile.getLocation()); |
1385 |
02 Sep 11 |
martin |
106 |
} |
1385 |
02 Sep 11 |
martin |
107 |
InputStream in = null; |
1385 |
02 Sep 11 |
martin |
108 |
try |
1385 |
02 Sep 11 |
martin |
109 |
{ |
1385 |
02 Sep 11 |
martin |
110 |
in = wrapInputStream(bgxFile.getDownloadStream(0)); |
1385 |
02 Sep 11 |
martin |
111 |
} |
1385 |
02 Sep 11 |
martin |
112 |
catch(IOException ioex) |
1385 |
02 Sep 11 |
martin |
113 |
{ |
1385 |
02 Sep 11 |
martin |
114 |
throw new InvalidDataException("BGX-file could not be read properly."); |
1385 |
02 Sep 11 |
martin |
115 |
} |
1385 |
02 Sep 11 |
martin |
116 |
BufferedReader reader = new BufferedReader(new InputStreamReader(in)); |
1385 |
02 Sep 11 |
martin |
117 |
List<String> headings = new ArrayList<String>(); |
1385 |
02 Sep 11 |
martin |
118 |
boolean valid = false; |
1385 |
02 Sep 11 |
martin |
119 |
String line = null; |
1385 |
02 Sep 11 |
martin |
120 |
do |
1385 |
02 Sep 11 |
martin |
121 |
{ |
1385 |
02 Sep 11 |
martin |
122 |
try |
1385 |
02 Sep 11 |
martin |
123 |
{ |
1385 |
02 Sep 11 |
martin |
124 |
line = reader.readLine(); |
1385 |
02 Sep 11 |
martin |
125 |
headings.add(line); |
1385 |
02 Sep 11 |
martin |
126 |
if (line.matches("\\[Probes\\]")) |
1385 |
02 Sep 11 |
martin |
127 |
{ |
1385 |
02 Sep 11 |
martin |
128 |
line = reader.readLine(); |
1385 |
02 Sep 11 |
martin |
129 |
valid = true; |
1385 |
02 Sep 11 |
martin |
130 |
for (String column : requiredColumns) |
1385 |
02 Sep 11 |
martin |
131 |
{ |
1385 |
02 Sep 11 |
martin |
132 |
valid = line.contains("\t" + column + "\t") ? valid : false; |
1385 |
02 Sep 11 |
martin |
133 |
} |
1385 |
02 Sep 11 |
martin |
134 |
} |
1385 |
02 Sep 11 |
martin |
135 |
} |
1385 |
02 Sep 11 |
martin |
136 |
catch(IOException ioex) |
1385 |
02 Sep 11 |
martin |
137 |
{ |
1385 |
02 Sep 11 |
martin |
138 |
line = null; |
1385 |
02 Sep 11 |
martin |
139 |
} |
1385 |
02 Sep 11 |
martin |
140 |
}while(!valid && (headings.size() < range) && line != null); |
1385 |
02 Sep 11 |
martin |
141 |
if (!valid) |
1385 |
02 Sep 11 |
martin |
142 |
{ |
1385 |
02 Sep 11 |
martin |
143 |
throw new InvalidDataException("Could not find required parts in the file"); |
1385 |
02 Sep 11 |
martin |
144 |
} |
1385 |
02 Sep 11 |
martin |
145 |
return headings; |
1385 |
02 Sep 11 |
martin |
146 |
} |
1385 |
02 Sep 11 |
martin |
147 |
|
1385 |
02 Sep 11 |
martin |
148 |
/* |
1385 |
02 Sep 11 |
martin |
Gets the right type of input stream depending if it comes |
1385 |
02 Sep 11 |
martin |
from a compressed file(GZIP) or none-compressed file. |
1385 |
02 Sep 11 |
martin |
151 |
*/ |
1385 |
02 Sep 11 |
martin |
152 |
private InputStream wrapInputStream (InputStream in) |
1385 |
02 Sep 11 |
martin |
153 |
throws IOException |
1385 |
02 Sep 11 |
martin |
154 |
{ |
1385 |
02 Sep 11 |
martin |
155 |
PushbackInputStream pin = new PushbackInputStream(in, 2); |
1385 |
02 Sep 11 |
martin |
156 |
boolean isGZip = FileUtil.checkMagicNumber(pin, new byte[] { 0x1f, (byte)0x8b }); |
1385 |
02 Sep 11 |
martin |
157 |
if (isGZip) |
1385 |
02 Sep 11 |
martin |
158 |
{ |
1385 |
02 Sep 11 |
martin |
159 |
in = new GZIPInputStream(pin); |
1385 |
02 Sep 11 |
martin |
160 |
} |
1385 |
02 Sep 11 |
martin |
161 |
else |
1385 |
02 Sep 11 |
martin |
162 |
{ |
1385 |
02 Sep 11 |
martin |
163 |
in = pin; |
1385 |
02 Sep 11 |
martin |
164 |
} |
1385 |
02 Sep 11 |
martin |
165 |
return in; |
1385 |
02 Sep 11 |
martin |
166 |
} |
1385 |
02 Sep 11 |
martin |
167 |
|
1385 |
02 Sep 11 |
martin |
168 |
/* |
1385 |
02 Sep 11 |
martin |
Gets the number of controls - defined in the headings section. |
1385 |
02 Sep 11 |
martin |
170 |
*/ |
1385 |
02 Sep 11 |
martin |
171 |
private int getNumFeatures(List<String> headings) |
1385 |
02 Sep 11 |
martin |
172 |
{ |
1385 |
02 Sep 11 |
martin |
173 |
int numFeatures = 0; |
1385 |
02 Sep 11 |
martin |
174 |
if (headings != null && !(headings.size() < 1)) |
1385 |
02 Sep 11 |
martin |
175 |
{ |
1385 |
02 Sep 11 |
martin |
176 |
for (String s : headings) |
1385 |
02 Sep 11 |
martin |
177 |
{ |
1385 |
02 Sep 11 |
martin |
178 |
String num = null; |
1385 |
02 Sep 11 |
martin |
179 |
if (s.startsWith(controlsHeading)) |
1385 |
02 Sep 11 |
martin |
180 |
{ |
1385 |
02 Sep 11 |
martin |
181 |
num = s.substring(controlsHeading.length()); |
1385 |
02 Sep 11 |
martin |
182 |
} |
1385 |
02 Sep 11 |
martin |
183 |
else if (s.startsWith(probesHeading)) |
1385 |
02 Sep 11 |
martin |
184 |
{ |
1385 |
02 Sep 11 |
martin |
185 |
num = s.substring(probesHeading.length()); |
1385 |
02 Sep 11 |
martin |
186 |
} |
1385 |
02 Sep 11 |
martin |
187 |
if (num != null) |
1385 |
02 Sep 11 |
martin |
188 |
{ |
1385 |
02 Sep 11 |
martin |
189 |
try |
1385 |
02 Sep 11 |
martin |
190 |
{ |
1385 |
02 Sep 11 |
martin |
191 |
numFeatures += Integer.parseInt(num.trim()); |
1385 |
02 Sep 11 |
martin |
192 |
} |
1385 |
02 Sep 11 |
martin |
193 |
catch (NumberFormatException nex) |
1385 |
02 Sep 11 |
martin |
194 |
{ |
1385 |
02 Sep 11 |
martin |
195 |
throw new InvalidDataException(nex); |
1385 |
02 Sep 11 |
martin |
196 |
} |
1385 |
02 Sep 11 |
martin |
197 |
} |
1385 |
02 Sep 11 |
martin |
198 |
} |
1385 |
02 Sep 11 |
martin |
199 |
} |
1385 |
02 Sep 11 |
martin |
200 |
return numFeatures; |
1385 |
02 Sep 11 |
martin |
201 |
} |
1385 |
02 Sep 11 |
martin |
202 |
} |