937 |
20 Jan 09 |
jari |
#!/usr/bin/perl -w |
937 |
20 Jan 09 |
jari |
2 |
|
937 |
20 Jan 09 |
jari |
# $Id$ |
937 |
20 Jan 09 |
jari |
4 |
|
937 |
20 Jan 09 |
jari |
# Copyright (C) 2009 Markus Ringnér |
937 |
20 Jan 09 |
jari |
6 |
|
937 |
20 Jan 09 |
jari |
# This file is part of Illumina plug-in package for BASE. |
937 |
20 Jan 09 |
jari |
# Available at http://baseplugins.thep.lu.se/ |
937 |
20 Jan 09 |
jari |
# BASE main site: http://base.thep.lu.se/ |
937 |
20 Jan 09 |
jari |
10 |
|
937 |
20 Jan 09 |
jari |
# This is free software; you can redistribute it and/or |
937 |
20 Jan 09 |
jari |
# modify it under the terms of the GNU General Public License |
940 |
27 Jan 09 |
martin |
# as published by the Free Software Foundation; either version 3 |
937 |
20 Jan 09 |
jari |
# of the License, or (at your option) any later version. |
937 |
20 Jan 09 |
jari |
15 |
|
937 |
20 Jan 09 |
jari |
# The software is distributed in the hope that it will be useful, |
937 |
20 Jan 09 |
jari |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
937 |
20 Jan 09 |
jari |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
937 |
20 Jan 09 |
jari |
# GNU General Public License for more details. |
937 |
20 Jan 09 |
jari |
20 |
|
937 |
20 Jan 09 |
jari |
# You should have received a copy of the GNU General Public License |
940 |
27 Jan 09 |
martin |
# along with this package. If not, see <http://www.gnu.org/licenses/>. |
937 |
20 Jan 09 |
jari |
23 |
|
937 |
20 Jan 09 |
jari |
24 |
use strict; |
937 |
20 Jan 09 |
jari |
25 |
use warnings; |
937 |
20 Jan 09 |
jari |
26 |
|
937 |
20 Jan 09 |
jari |
27 |
use File::Spec; |
937 |
20 Jan 09 |
jari |
28 |
use Getopt::Long; |
937 |
20 Jan 09 |
jari |
29 |
use IO::File; |
937 |
20 Jan 09 |
jari |
30 |
use Pod::Usage; |
937 |
20 Jan 09 |
jari |
31 |
|
937 |
20 Jan 09 |
jari |
# initialize user-definable parameters to default values |
937 |
20 Jan 09 |
jari |
33 |
|
937 |
20 Jan 09 |
jari |
34 |
my $samples_file='sample_probe_profile.txt'; |
937 |
20 Jan 09 |
jari |
35 |
my $controls_file='control_probe_profile.txt'; |
937 |
20 Jan 09 |
jari |
36 |
|
937 |
20 Jan 09 |
jari |
# set user-defined parameters |
937 |
20 Jan 09 |
jari |
38 |
|
937 |
20 Jan 09 |
jari |
39 |
GetOptions('samples_file=s' => \$samples_file, |
937 |
20 Jan 09 |
jari |
40 |
'controls_file=s' => \$controls_file) |
937 |
20 Jan 09 |
jari |
41 |
or pod2usage(1); |
937 |
20 Jan 09 |
jari |
42 |
|
937 |
20 Jan 09 |
jari |
43 |
$samples_file=File::Spec->canonpath($samples_file); |
937 |
20 Jan 09 |
jari |
44 |
$controls_file=File::Spec->canonpath($controls_file); |
937 |
20 Jan 09 |
jari |
45 |
|
937 |
20 Jan 09 |
jari |
46 |
if($samples_file ne '') { |
937 |
20 Jan 09 |
jari |
47 |
unless((-r $samples_file)) { |
937 |
20 Jan 09 |
jari |
48 |
print STDERR "Error: Cannot read samples data file: $samples_file\n"; |
937 |
20 Jan 09 |
jari |
49 |
exit(0); |
937 |
20 Jan 09 |
jari |
50 |
} |
937 |
20 Jan 09 |
jari |
51 |
} |
937 |
20 Jan 09 |
jari |
52 |
else { |
937 |
20 Jan 09 |
jari |
53 |
print STDERR "Error: No file with data for samples specified!\n"; |
937 |
20 Jan 09 |
jari |
54 |
pod2usage(1); |
937 |
20 Jan 09 |
jari |
55 |
} |
937 |
20 Jan 09 |
jari |
56 |
|
937 |
20 Jan 09 |
jari |
57 |
if($controls_file ne '') { |
937 |
20 Jan 09 |
jari |
58 |
unless((-r $controls_file)) { |
937 |
20 Jan 09 |
jari |
59 |
print STDERR "Error: Cannot read controls data file: $controls_file\n"; |
937 |
20 Jan 09 |
jari |
60 |
exit(0); |
937 |
20 Jan 09 |
jari |
61 |
} |
937 |
20 Jan 09 |
jari |
62 |
} |
937 |
20 Jan 09 |
jari |
63 |
else { |
937 |
20 Jan 09 |
jari |
64 |
print STDERR "Error: No file with data for controls specified!\n"; |
937 |
20 Jan 09 |
jari |
65 |
pod2usage(1); |
937 |
20 Jan 09 |
jari |
66 |
} |
937 |
20 Jan 09 |
jari |
67 |
|
937 |
20 Jan 09 |
jari |
68 |
|
937 |
20 Jan 09 |
jari |
# Main program - Generate an IBS for each sample |
937 |
20 Jan 09 |
jari |
70 |
|
937 |
20 Jan 09 |
jari |
# Read header for samples file |
937 |
20 Jan 09 |
jari |
72 |
|
937 |
20 Jan 09 |
jari |
73 |
open(SAMPLES,"$samples_file"); |
937 |
20 Jan 09 |
jari |
74 |
my $header=<SAMPLES>; |
937 |
20 Jan 09 |
jari |
75 |
chomp($header); |
937 |
20 Jan 09 |
jari |
76 |
$header=~s/[\r\n]+$//; |
937 |
20 Jan 09 |
jari |
77 |
my $samples_file_error="The samples data file $samples_file is not in the " . |
937 |
20 Jan 09 |
jari |
78 |
"correct format"; |
937 |
20 Jan 09 |
jari |
79 |
my @header=split(/\t/,$header,-1); |
937 |
20 Jan 09 |
jari |
80 |
if($header[0] ne 'PROBE_ID') { |
937 |
20 Jan 09 |
jari |
81 |
print STDERR "$samples_file_error\n"; |
937 |
20 Jan 09 |
jari |
82 |
print STDERR "The first column header should be " . |
937 |
20 Jan 09 |
jari |
83 |
"PROBE_ID, but is $header[0].\n"; |
937 |
20 Jan 09 |
jari |
84 |
exit(0); |
937 |
20 Jan 09 |
jari |
85 |
} |
937 |
20 Jan 09 |
jari |
86 |
if($header[scalar(@header)-1] ne 'ProbeID') { |
937 |
20 Jan 09 |
jari |
87 |
print STDERR "$samples_file_error\n"; |
937 |
20 Jan 09 |
jari |
88 |
print STDERR "The last column header should be " . |
937 |
20 Jan 09 |
jari |
89 |
"ProbeID, but is $header[scalar(@header)-1].\n"; |
937 |
20 Jan 09 |
jari |
90 |
exit(0); |
937 |
20 Jan 09 |
jari |
91 |
} |
937 |
20 Jan 09 |
jari |
92 |
|
937 |
20 Jan 09 |
jari |
93 |
|
937 |
20 Jan 09 |
jari |
94 |
my @samples; |
937 |
20 Jan 09 |
jari |
95 |
for(my $i=1;$i<scalar(@header)-1;$i+=3) { |
937 |
20 Jan 09 |
jari |
96 |
my $header_error=0; |
937 |
20 Jan 09 |
jari |
97 |
if($header[$i] !~ /\.AVG_Signal/) { |
937 |
20 Jan 09 |
jari |
98 |
$header_error=1; |
937 |
20 Jan 09 |
jari |
99 |
} |
937 |
20 Jan 09 |
jari |
100 |
my $sample_name=$header[$i]; |
937 |
20 Jan 09 |
jari |
101 |
$sample_name=~s/\.AVG_Signal//; |
937 |
20 Jan 09 |
jari |
102 |
if($header[$i+1]!~/$sample_name\.BEAD_STDERR/ || |
937 |
20 Jan 09 |
jari |
103 |
$header[$i+2]!~/$sample_name\.Avg_NBEADS/) { |
937 |
20 Jan 09 |
jari |
104 |
$header_error=1; |
937 |
20 Jan 09 |
jari |
105 |
} |
937 |
20 Jan 09 |
jari |
106 |
if($header_error) { |
937 |
20 Jan 09 |
jari |
107 |
print STDERR "$samples_file_error\n"; |
937 |
20 Jan 09 |
jari |
108 |
print STDERR "Columns $i-" . ($i+2) . |
937 |
20 Jan 09 |
jari |
109 |
" header, $header[$i]\t$header[$i+1]\t$header[$i+2]," . |
937 |
20 Jan 09 |
jari |
110 |
" should be " . |
937 |
20 Jan 09 |
jari |
111 |
"${sample_name}.AVG_signal\t${sample_name}.BEAD_STDERR" . |
937 |
20 Jan 09 |
jari |
112 |
"\t${sample_name}.AVG_NBEADS\n"; |
937 |
20 Jan 09 |
jari |
113 |
exit(0); |
937 |
20 Jan 09 |
jari |
114 |
} |
937 |
20 Jan 09 |
jari |
115 |
push(@samples,$sample_name); |
937 |
20 Jan 09 |
jari |
116 |
} |
937 |
20 Jan 09 |
jari |
117 |
|
937 |
20 Jan 09 |
jari |
# Read and write data for sample probes. |
937 |
20 Jan 09 |
jari |
119 |
|
937 |
20 Jan 09 |
jari |
120 |
my %sample_probes;# Keep track of which probes have been printed |
937 |
20 Jan 09 |
jari |
121 |
my @file_handles; |
937 |
20 Jan 09 |
jari |
122 |
foreach my $sample (@samples) { |
937 |
20 Jan 09 |
jari |
123 |
my $filename=File::Spec->canonpath("${sample}.csv"); |
937 |
20 Jan 09 |
jari |
124 |
my $fh=IO::File->new(">$filename"); |
937 |
20 Jan 09 |
jari |
125 |
if(!defined($fh)) { |
937 |
20 Jan 09 |
jari |
126 |
print STDERR "Error: Cannot open file $filename for writing\n"; |
937 |
20 Jan 09 |
jari |
127 |
exit(0); |
937 |
20 Jan 09 |
jari |
128 |
} |
937 |
20 Jan 09 |
jari |
129 |
print $fh "Illumicode,N,Mean GRN,Dev GRN\n"; |
937 |
20 Jan 09 |
jari |
130 |
push(@file_handles,$fh); |
937 |
20 Jan 09 |
jari |
131 |
} |
937 |
20 Jan 09 |
jari |
132 |
|
937 |
20 Jan 09 |
jari |
133 |
while(my $line=<SAMPLES>) { |
937 |
20 Jan 09 |
jari |
134 |
chomp($line); |
937 |
20 Jan 09 |
jari |
135 |
$line=~s/[\r\n]+$//; |
937 |
20 Jan 09 |
jari |
136 |
my @line=split(/\t/,$line,-1); |
937 |
20 Jan 09 |
jari |
137 |
if(scalar(@header) != scalar(@line)) { |
937 |
20 Jan 09 |
jari |
138 |
print STDERR "$samples_file_error\n"; |
937 |
20 Jan 09 |
jari |
139 |
print STDERR "Not the same number of columns in all rows in $samples_file\n"; |
937 |
20 Jan 09 |
jari |
140 |
exit(0); |
937 |
20 Jan 09 |
jari |
141 |
} |
937 |
20 Jan 09 |
jari |
142 |
for(my $i=1;$i<scalar(@line)-1;$i+=3) { |
937 |
20 Jan 09 |
jari |
143 |
my $fh=$file_handles[($i-1)/3]; |
937 |
20 Jan 09 |
jari |
144 |
my $dev=$line[$i+1]*sqrt($line[$i+2]); |
937 |
20 Jan 09 |
jari |
145 |
$sample_probes{$line[scalar(@line)-1]}=1; |
937 |
20 Jan 09 |
jari |
146 |
print $fh "$line[scalar(@line)-1],$line[$i+2]," . |
944 |
28 Jan 09 |
martin |
147 |
$line[$i] . "," . $dev. "\n"; |
937 |
20 Jan 09 |
jari |
148 |
} |
937 |
20 Jan 09 |
jari |
149 |
} |
937 |
20 Jan 09 |
jari |
150 |
|
937 |
20 Jan 09 |
jari |
151 |
close(SAMPLES); |
937 |
20 Jan 09 |
jari |
152 |
|
937 |
20 Jan 09 |
jari |
# Read header for control probe data. |
937 |
20 Jan 09 |
jari |
154 |
|
937 |
20 Jan 09 |
jari |
155 |
open(CONTROLS,"$controls_file"); |
937 |
20 Jan 09 |
jari |
156 |
$header=<CONTROLS>; |
937 |
20 Jan 09 |
jari |
157 |
chomp($header); |
937 |
20 Jan 09 |
jari |
158 |
$header=~s/[\r\n]+$//; |
937 |
20 Jan 09 |
jari |
159 |
my $controls_file_error="The controls data file $controls_file is not in the " . |
937 |
20 Jan 09 |
jari |
160 |
"correct format"; |
937 |
20 Jan 09 |
jari |
161 |
@header=split(/\t/,$header,-1); |
937 |
20 Jan 09 |
jari |
162 |
if($header[0] ne 'ProbeID') { |
937 |
20 Jan 09 |
jari |
163 |
print STDERR "$controls_file_error\n"; |
937 |
20 Jan 09 |
jari |
164 |
print STDERR "The first column header should be " . |
937 |
20 Jan 09 |
jari |
165 |
"ProbeID, but is $header[0].\n"; |
937 |
20 Jan 09 |
jari |
166 |
exit(0); |
937 |
20 Jan 09 |
jari |
167 |
} |
937 |
20 Jan 09 |
jari |
168 |
|
937 |
20 Jan 09 |
jari |
169 |
my $i=1; |
937 |
20 Jan 09 |
jari |
170 |
foreach my $sample (@samples) { |
937 |
20 Jan 09 |
jari |
171 |
my $header_error=0; |
937 |
20 Jan 09 |
jari |
172 |
if($header[$i]!~/$sample\.AVG_Signal/ || |
937 |
20 Jan 09 |
jari |
173 |
$header[$i+1]!~/$sample\.BEAD_STDERR/ || |
937 |
20 Jan 09 |
jari |
174 |
$header[$i+2]!~/$sample\.Avg_NBEADS/) { |
937 |
20 Jan 09 |
jari |
175 |
$header_error=1; |
937 |
20 Jan 09 |
jari |
176 |
} |
937 |
20 Jan 09 |
jari |
177 |
if($header_error) { |
937 |
20 Jan 09 |
jari |
178 |
print STDERR "$controls_file_error\n"; |
937 |
20 Jan 09 |
jari |
179 |
print STDERR "Columns $i-" . ($i+2) . |
937 |
20 Jan 09 |
jari |
180 |
" header, $header[$i]\t$header[$i+1]\t$header[$i+2]," . |
937 |
20 Jan 09 |
jari |
181 |
" should be " . |
937 |
20 Jan 09 |
jari |
182 |
"${sample}.AVG_signal\t${sample}.BEAD_STDERR" . |
937 |
20 Jan 09 |
jari |
183 |
"\t${sample}.AVG_NBEADS\n"; |
937 |
20 Jan 09 |
jari |
184 |
exit(0); |
937 |
20 Jan 09 |
jari |
185 |
} |
937 |
20 Jan 09 |
jari |
186 |
$i+=3; |
937 |
20 Jan 09 |
jari |
187 |
} |
937 |
20 Jan 09 |
jari |
188 |
|
937 |
20 Jan 09 |
jari |
189 |
|
937 |
20 Jan 09 |
jari |
# Read and write data for control probes. |
937 |
20 Jan 09 |
jari |
191 |
|
937 |
20 Jan 09 |
jari |
# Some probes are both annotated as sample_probes and control_probes; |
937 |
20 Jan 09 |
jari |
# data for these should only appear once in the IBS-file. Some |
937 |
20 Jan 09 |
jari |
# controls are annotated as multiple types of controls; data for these |
937 |
20 Jan 09 |
jari |
# should only appear once in the IBS-file. |
937 |
20 Jan 09 |
jari |
196 |
|
937 |
20 Jan 09 |
jari |
197 |
while(my $line=<CONTROLS>) { |
937 |
20 Jan 09 |
jari |
198 |
chomp($line); |
937 |
20 Jan 09 |
jari |
199 |
$line=~s/[\r\n]+$//; |
937 |
20 Jan 09 |
jari |
200 |
my @line=split(/\t/,$line,-1); |
937 |
20 Jan 09 |
jari |
201 |
if(scalar(@header) != scalar(@line)) { |
937 |
20 Jan 09 |
jari |
202 |
print STDERR "$controls_file_error\n"; |
937 |
20 Jan 09 |
jari |
203 |
print STDERR "Not the same number of columns in all rows in $controls_file\n"; |
937 |
20 Jan 09 |
jari |
204 |
exit(0); |
937 |
20 Jan 09 |
jari |
205 |
} |
937 |
20 Jan 09 |
jari |
206 |
for(my $i=1;$i<scalar(@line);$i+=3) { |
937 |
20 Jan 09 |
jari |
207 |
my $fh=$file_handles[($i-1)/3]; |
937 |
20 Jan 09 |
jari |
208 |
my $dev=$line[$i+1]*sqrt($line[$i+2]); |
937 |
20 Jan 09 |
jari |
209 |
if(!exists($sample_probes{$line[0]})) { |
937 |
20 Jan 09 |
jari |
210 |
$sample_probes{$line[0]}=1; |
944 |
28 Jan 09 |
martin |
211 |
print $fh "$line[0],$line[$i+2]," . $line[$i] . |
944 |
28 Jan 09 |
martin |
212 |
"," . $dev . "\n"; |
937 |
20 Jan 09 |
jari |
213 |
} |
937 |
20 Jan 09 |
jari |
214 |
} |
937 |
20 Jan 09 |
jari |
215 |
} |
937 |
20 Jan 09 |
jari |
216 |
|
937 |
20 Jan 09 |
jari |
217 |
close(SAMPLES); |
937 |
20 Jan 09 |
jari |
218 |
|
937 |
20 Jan 09 |
jari |
219 |
|
937 |
20 Jan 09 |
jari |
220 |
foreach my $fh (@file_handles) { |
937 |
20 Jan 09 |
jari |
221 |
close($fh); |
937 |
20 Jan 09 |
jari |
222 |
} |
937 |
20 Jan 09 |
jari |
223 |
|
937 |
20 Jan 09 |
jari |
# Documentation |
937 |
20 Jan 09 |
jari |
225 |
|
937 |
20 Jan 09 |
jari |
226 |
=pod |
937 |
20 Jan 09 |
jari |
227 |
|
937 |
20 Jan 09 |
jari |
228 |
=head1 NAME |
937 |
20 Jan 09 |
jari |
229 |
|
937 |
20 Jan 09 |
jari |
230 |
beadstudio2ibs.pl - split multiple samples BeadStudio data files into separate IBS files for each sample |
937 |
20 Jan 09 |
jari |
231 |
|
937 |
20 Jan 09 |
jari |
232 |
=head1 SYNOPSIS |
937 |
20 Jan 09 |
jari |
233 |
|
937 |
20 Jan 09 |
jari |
234 |
beadstudio2ibs.pl --samples_file=<name> --controls_file=<name> |
937 |
20 Jan 09 |
jari |
235 |
|
937 |
20 Jan 09 |
jari |
236 |
=head1 OPTIONS |
937 |
20 Jan 09 |
jari |
237 |
|
937 |
20 Jan 09 |
jari |
238 |
=over 8 |
937 |
20 Jan 09 |
jari |
239 |
|
937 |
20 Jan 09 |
jari |
240 |
=item B<--samples_file=<name>> |
937 |
20 Jan 09 |
jari |
241 |
|
937 |
20 Jan 09 |
jari |
242 |
Specify a file with sample probe data from BeadStudio. |
937 |
20 Jan 09 |
jari |
243 |
|
937 |
20 Jan 09 |
jari |
244 |
=item B<--controls_file=<name>> |
937 |
20 Jan 09 |
jari |
245 |
|
937 |
20 Jan 09 |
jari |
246 |
Specify a file with controls probe data from BeadStudio. |
937 |
20 Jan 09 |
jari |
247 |
|
937 |
20 Jan 09 |
jari |
248 |
=back |
937 |
20 Jan 09 |
jari |
249 |
|
937 |
20 Jan 09 |
jari |
250 |
=head1 AUTHORS |
937 |
20 Jan 09 |
jari |
251 |
|
937 |
20 Jan 09 |
jari |
252 |
Markus Ringner |
937 |
20 Jan 09 |
jari |
253 |
|
937 |
20 Jan 09 |
jari |
254 |
Please report bugs to markus.ringner@med.lu.se |
937 |
20 Jan 09 |
jari |
255 |
|
937 |
20 Jan 09 |
jari |
256 |
=cut |
937 |
20 Jan 09 |
jari |
257 |
|
937 |
20 Jan 09 |
jari |
# End of documentation |