720 |
11 Jun 08 |
jari |
#!/usr/bin/perl |
720 |
11 Jun 08 |
jari |
2 |
|
720 |
11 Jun 08 |
jari |
# $Id$ |
720 |
11 Jun 08 |
jari |
4 |
|
720 |
11 Jun 08 |
jari |
# Document name: cgh_dataDumper.pl |
720 |
11 Jun 08 |
jari |
# cgh_dataDumper.pl version 2.0 |
720 |
11 Jun 08 |
jari |
7 |
# |
720 |
11 Jun 08 |
jari |
8 |
# |
720 |
11 Jun 08 |
jari |
# Copyright (C) 2004 Johan Staaf |
720 |
11 Jun 08 |
jari |
10 |
|
720 |
11 Jun 08 |
jari |
# cgh_dataDumper.pl is free software; you can redistribute it and/or |
720 |
11 Jun 08 |
jari |
# modify it under the terms of the GNU General Public License |
720 |
11 Jun 08 |
jari |
# as published by the Free Software Foundation; either version 2 |
720 |
11 Jun 08 |
jari |
# of the License, or (at your option) any later version. |
720 |
11 Jun 08 |
jari |
15 |
|
720 |
11 Jun 08 |
jari |
# This program is distributed in the hope that it will be useful, |
720 |
11 Jun 08 |
jari |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
720 |
11 Jun 08 |
jari |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
720 |
11 Jun 08 |
jari |
# GNU General Public License for more details. |
720 |
11 Jun 08 |
jari |
20 |
|
720 |
11 Jun 08 |
jari |
# You should have received a copy of the GNU General Public License |
720 |
11 Jun 08 |
jari |
# along with this program; if not, write to the Free Software |
720 |
11 Jun 08 |
jari |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
720 |
11 Jun 08 |
jari |
24 |
|
720 |
11 Jun 08 |
jari |
# johan.staaf@onk.lu.se |
720 |
11 Jun 08 |
jari |
# Johan Staaf, Dept Oncology, Lund University, S-221 85 Lund, Sweden |
720 |
11 Jun 08 |
jari |
27 |
|
720 |
11 Jun 08 |
jari |
28 |
use strict; |
720 |
11 Jun 08 |
jari |
29 |
use FindBin (); |
720 |
11 Jun 08 |
jari |
30 |
|
720 |
11 Jun 08 |
jari |
# VER 1.0 of cgh_dataDumper.pl |
720 |
11 Jun 08 |
jari |
32 |
|
720 |
11 Jun 08 |
jari |
33 |
|
720 |
11 Jun 08 |
jari |
######## BASE file requirements ######### |
720 |
11 Jun 08 |
jari |
# The BASEfile format should be normal. |
720 |
11 Jun 08 |
jari |
# expect per row: |
720 |
11 Jun 08 |
jari |
# [reporterId] [geneSymbol] [chromosome] [cytoBand] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
38 |
|
720 |
11 Jun 08 |
jari |
39 |
|
720 |
11 Jun 08 |
jari |
40 |
############################################ |
720 |
11 Jun 08 |
jari |
############# MAIN PROGRAM ############# |
720 |
11 Jun 08 |
jari |
42 |
############################################ |
720 |
11 Jun 08 |
jari |
43 |
|
720 |
11 Jun 08 |
jari |
44 |
|
720 |
11 Jun 08 |
jari |
### Main Variables ### |
720 |
11 Jun 08 |
jari |
46 |
my (@final); |
720 |
11 Jun 08 |
jari |
47 |
my (%param_hash,%base_hash,%bacToGene); |
720 |
11 Jun 08 |
jari |
48 |
my @annoStat; |
720 |
11 Jun 08 |
jari |
49 |
################# |
720 |
11 Jun 08 |
jari |
50 |
|
720 |
11 Jun 08 |
jari |
## Read the file and extract BASE column data ####### |
720 |
11 Jun 08 |
jari |
52 |
|
720 |
11 Jun 08 |
jari |
53 |
$/="%"; #Gives every section as one lump |
720 |
11 Jun 08 |
jari |
54 |
my $i=0; |
720 |
11 Jun 08 |
jari |
55 |
|
720 |
11 Jun 08 |
jari |
56 |
while(<>){ |
720 |
11 Jun 08 |
jari |
57 |
$i++; |
720 |
11 Jun 08 |
jari |
58 |
chomp; |
720 |
11 Jun 08 |
jari |
59 |
if($i==1){ #Parameters |
720 |
11 Jun 08 |
jari |
60 |
|
720 |
11 Jun 08 |
jari |
61 |
$_ =~ /filename\t(.+)\n/; |
720 |
11 Jun 08 |
jari |
62 |
$param_hash{'filename'}=$1; |
720 |
11 Jun 08 |
jari |
63 |
$_ =~ /sort\t(\w+)\n/; |
720 |
11 Jun 08 |
jari |
64 |
$param_hash{'sort'}=$1; |
720 |
11 Jun 08 |
jari |
65 |
$_ =~ /exportMode\t(\w+)\n/; |
720 |
11 Jun 08 |
jari |
66 |
$param_hash{'exportMode'}=$1; |
720 |
11 Jun 08 |
jari |
67 |
$_ =~ /annotationType\t(.+)\n/; |
720 |
11 Jun 08 |
jari |
68 |
$param_hash{'annotationType'}=$1; |
720 |
11 Jun 08 |
jari |
69 |
|
720 |
11 Jun 08 |
jari |
70 |
$_ =~ /annoForStat\t(.+)\n/; #in format |ER Status|p_brca_family_status|etc..| |
720 |
11 Jun 08 |
jari |
71 |
$param_hash{'annoForStat'}=$1; |
720 |
11 Jun 08 |
jari |
72 |
my @tmp=split(/\|/,$param_hash{'annoForStat'}); #holding the annotations to get stats for! |
720 |
11 Jun 08 |
jari |
73 |
foreach my $line (@tmp){ |
720 |
11 Jun 08 |
jari |
74 |
if($line ne ""){ |
720 |
11 Jun 08 |
jari |
75 |
push(@annoStat,$line); |
720 |
11 Jun 08 |
jari |
76 |
} |
720 |
11 Jun 08 |
jari |
77 |
} |
720 |
11 Jun 08 |
jari |
78 |
}elsif($i==2){ #section assays info |
720 |
11 Jun 08 |
jari |
79 |
$_=~ /count\t(\d*)/; |
720 |
11 Jun 08 |
jari |
80 |
$base_hash{'NumberOfAssays'}=$1; |
720 |
11 Jun 08 |
jari |
81 |
$_=~ /columns\t(.*)/; |
720 |
11 Jun 08 |
jari |
82 |
$base_hash{'AssayColumns'}=$1; |
720 |
11 Jun 08 |
jari |
83 |
$_=~ /annotationColumns\t(.*)/; |
720 |
11 Jun 08 |
jari |
84 |
$base_hash{'AnnotationColumns'}=$1; |
720 |
11 Jun 08 |
jari |
85 |
}elsif($i==3){ #assay info + section spots |
720 |
11 Jun 08 |
jari |
86 |
$_=~ /columns\t(.*)\tassayData/; |
720 |
11 Jun 08 |
jari |
87 |
$base_hash{'Columns'}=$1; |
720 |
11 Jun 08 |
jari |
88 |
$_=~ /channels\t(.*)/; |
720 |
11 Jun 08 |
jari |
89 |
$base_hash{'channels'}=$1; |
720 |
11 Jun 08 |
jari |
90 |
$_=~/assayFields\t(.*)/; |
720 |
11 Jun 08 |
jari |
91 |
$base_hash{'assayFields'}=$1; |
720 |
11 Jun 08 |
jari |
92 |
(my $text,my $dump)=split("\n\n",$_); |
720 |
11 Jun 08 |
jari |
93 |
$text=~ s/^\n//; |
720 |
11 Jun 08 |
jari |
94 |
$base_hash{'Assays'}=$text; |
720 |
11 Jun 08 |
jari |
95 |
last; #Dont read in the large bulk of data now! |
720 |
11 Jun 08 |
jari |
96 |
} |
720 |
11 Jun 08 |
jari |
97 |
} |
720 |
11 Jun 08 |
jari |
#### OBSERVE, all base headers now printed to STDOUT! |
720 |
11 Jun 08 |
jari |
99 |
|
720 |
11 Jun 08 |
jari |
100 |
|
720 |
11 Jun 08 |
jari |
101 |
|
720 |
11 Jun 08 |
jari |
## Do export of data points, global boolean |
720 |
11 Jun 08 |
jari |
103 |
my $doExport="TRUE"; |
720 |
11 Jun 08 |
jari |
104 |
|
720 |
11 Jun 08 |
jari |
105 |
|
720 |
11 Jun 08 |
jari |
#### Locate the assay field columns for each annotation group to extract data from |
720 |
11 Jun 08 |
jari |
107 |
my @assays = split("\n",$base_hash{'Assays'}); |
720 |
11 Jun 08 |
jari |
108 |
my @assaynames = (); |
720 |
11 Jun 08 |
jari |
109 |
foreach my $line (@assays) { |
720 |
11 Jun 08 |
jari |
110 |
my @tmp=split("\t",$line); |
720 |
11 Jun 08 |
jari |
111 |
push(@assaynames,$tmp[1]); #keep only the assay name in the order sent to the plugin |
720 |
11 Jun 08 |
jari |
112 |
} |
720 |
11 Jun 08 |
jari |
113 |
|
720 |
11 Jun 08 |
jari |
114 |
if($param_hash{'exportMode'} eq "sampleName_annotation"){ |
720 |
11 Jun 08 |
jari |
115 |
my @sampleannotations; |
720 |
11 Jun 08 |
jari |
116 |
|
720 |
11 Jun 08 |
jari |
### Assign an assay to an annotation type and save in @control vector |
720 |
11 Jun 08 |
jari |
118 |
my @base_annotations=split("\t",$base_hash{'AnnotationColumns'}); |
720 |
11 Jun 08 |
jari |
119 |
my $annotation_index= -1; #tracks which column index to use to get the annotation values |
720 |
11 Jun 08 |
jari |
120 |
for (my $i=0;$i<@base_annotations;$i++){ |
720 |
11 Jun 08 |
jari |
121 |
if($param_hash{'annotationType'} eq $base_annotations[$i]){ #found the right annotation! |
720 |
11 Jun 08 |
jari |
122 |
$annotation_index=$i; |
720 |
11 Jun 08 |
jari |
123 |
last; |
720 |
11 Jun 08 |
jari |
124 |
} |
720 |
11 Jun 08 |
jari |
125 |
} |
720 |
11 Jun 08 |
jari |
126 |
for (my $i=0;$i<@assays;$i++) { |
720 |
11 Jun 08 |
jari |
127 |
my @tmp=split("\t",$assays[$i]); |
720 |
11 Jun 08 |
jari |
128 |
if($annotation_index > -1){ # a real annotation index found |
720 |
11 Jun 08 |
jari |
129 |
if($tmp[$annotation_index+2] eq ""){ #[assaynbr] [assayname] then annotations, thats why +2 |
720 |
11 Jun 08 |
jari |
130 |
$sampleannotations[$i]="NONE"; #Na for annotation for this assay! |
720 |
11 Jun 08 |
jari |
131 |
}else{ |
720 |
11 Jun 08 |
jari |
132 |
$sampleannotations[$i]=$tmp[$annotation_index+2]; #a real annotation |
720 |
11 Jun 08 |
jari |
133 |
} |
720 |
11 Jun 08 |
jari |
134 |
}else{ |
720 |
11 Jun 08 |
jari |
#basically this means no annotation analysis from start, use Na instead in control file |
720 |
11 Jun 08 |
jari |
136 |
$sampleannotations[$i]="NONE"; #Na for annotation for this assay! |
720 |
11 Jun 08 |
jari |
137 |
|
720 |
11 Jun 08 |
jari |
138 |
} |
720 |
11 Jun 08 |
jari |
139 |
} |
720 |
11 Jun 08 |
jari |
140 |
|
720 |
11 Jun 08 |
jari |
### print the binding of @assaynames, @sampleannotations |
720 |
11 Jun 08 |
jari |
142 |
open(myfile,">SampleNames_and_selectedAnnotation.txt"); |
720 |
11 Jun 08 |
jari |
143 |
for (my $i=0;$i<@assaynames;$i++) { |
720 |
11 Jun 08 |
jari |
144 |
print myfile "$assaynames[$i]\t$sampleannotations[$i]\n"; |
720 |
11 Jun 08 |
jari |
145 |
} |
720 |
11 Jun 08 |
jari |
146 |
close(myfile); |
720 |
11 Jun 08 |
jari |
147 |
$doExport="FALSE"; |
720 |
11 Jun 08 |
jari |
148 |
}elsif($param_hash{'exportMode'} eq "sampleName"){ |
720 |
11 Jun 08 |
jari |
149 |
|
720 |
11 Jun 08 |
jari |
### print @assaynames |
720 |
11 Jun 08 |
jari |
151 |
open(myfile,">SampleNames.txt"); |
720 |
11 Jun 08 |
jari |
152 |
for (my $i=0;$i<@assaynames;$i++) { |
720 |
11 Jun 08 |
jari |
153 |
print myfile "$assaynames[$i]\n"; |
720 |
11 Jun 08 |
jari |
154 |
} |
720 |
11 Jun 08 |
jari |
155 |
close(myfile); |
720 |
11 Jun 08 |
jari |
156 |
$doExport="FALSE"; |
720 |
11 Jun 08 |
jari |
157 |
}elsif(($param_hash{'exportMode'} eq "annotationStatistics") && ($param_hash{'annoForStat'} ne "NULL") ){ |
720 |
11 Jun 08 |
jari |
158 |
my @base_annotations=split("\t",$base_hash{'AnnotationColumns'}); #all possible base annotations for this sample |
720 |
11 Jun 08 |
jari |
159 |
|
720 |
11 Jun 08 |
jari |
### Fixing the issue with blanks ( :( ) |
720 |
11 Jun 08 |
jari |
161 |
my $annos=$base_hash{'Assays'}; |
720 |
11 Jun 08 |
jari |
162 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
163 |
$annos =~s/\t\n/\tNA\n/g; |
720 |
11 Jun 08 |
jari |
164 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
165 |
my @annos=split("\n",$annos); #a row is a sample with its annotations |
720 |
11 Jun 08 |
jari |
166 |
|
720 |
11 Jun 08 |
jari |
#print "$base_hash{'Assays'}"; |
720 |
11 Jun 08 |
jari |
#print "\n#####\n"; |
720 |
11 Jun 08 |
jari |
#print "nbr assay $base_hash{'AnnotationColumns'}\n"; |
720 |
11 Jun 08 |
jari |
#print "$annos"; |
720 |
11 Jun 08 |
jari |
171 |
|
720 |
11 Jun 08 |
jari |
### Find the subset of annotations to get statistics for |
720 |
11 Jun 08 |
jari |
173 |
my @annotation_index; #tracks which column index to use to get the annotation values |
720 |
11 Jun 08 |
jari |
174 |
for (my $i=0;$i<@base_annotations;$i++){ |
720 |
11 Jun 08 |
jari |
175 |
foreach my $anno (@annoStat){ |
720 |
11 Jun 08 |
jari |
176 |
if(($anno eq $base_annotations[$i]) && ($anno ne "")){ #found the right annotation! |
720 |
11 Jun 08 |
jari |
177 |
push(@annotation_index,$i); |
720 |
11 Jun 08 |
jari |
178 |
last; |
720 |
11 Jun 08 |
jari |
179 |
} |
720 |
11 Jun 08 |
jari |
180 |
} |
720 |
11 Jun 08 |
jari |
181 |
} |
720 |
11 Jun 08 |
jari |
182 |
|
720 |
11 Jun 08 |
jari |
# @annoStat and @annotation_index are now matched! |
720 |
11 Jun 08 |
jari |
184 |
my %annoStat; #key = selected annotation type, value = the column of values for that type, |
720 |
11 Jun 08 |
jari |
# separated with tab. |
720 |
11 Jun 08 |
jari |
186 |
my %annoUnique; #key = selected annotation type, value = unique annotation values for that type |
720 |
11 Jun 08 |
jari |
187 |
for(my $i=0;$i<@annoStat;$i++){ |
720 |
11 Jun 08 |
jari |
#foreach selected annotation extract its column |
720 |
11 Jun 08 |
jari |
189 |
my @tempCol; |
720 |
11 Jun 08 |
jari |
190 |
foreach my $line (@annos) { |
720 |
11 Jun 08 |
jari |
191 |
my @tmp=split("\t",$line); |
720 |
11 Jun 08 |
jari |
192 |
shift(@tmp); #removing assay id |
720 |
11 Jun 08 |
jari |
193 |
shift(@tmp); #removing assay name |
720 |
11 Jun 08 |
jari |
# only the annotations are left! |
720 |
11 Jun 08 |
jari |
195 |
push(@tempCol,$tmp[$annotation_index[$i]]); |
720 |
11 Jun 08 |
jari |
196 |
} |
720 |
11 Jun 08 |
jari |
197 |
$annoStat{$annoStat[$i]}=join("\t",@tempCol); |
720 |
11 Jun 08 |
jari |
198 |
my %saw; |
720 |
11 Jun 08 |
jari |
199 |
@saw{@tempCol}=(); |
720 |
11 Jun 08 |
jari |
200 |
$annoUnique{$annoStat[$i]}=join("\t",keys(%saw)); |
720 |
11 Jun 08 |
jari |
201 |
} |
720 |
11 Jun 08 |
jari |
202 |
open(myfile,">AnnotationStatistics.txt"); |
720 |
11 Jun 08 |
jari |
203 |
foreach my $annotype (keys(%annoUnique)){ |
720 |
11 Jun 08 |
jari |
#count how many instances of each unique annotation value there is for this annotation type |
720 |
11 Jun 08 |
jari |
205 |
my @uniqueV=split("\t",$annoUnique{$annotype}); |
720 |
11 Jun 08 |
jari |
206 |
my @values=split("\t",$annoStat{$annotype}); |
720 |
11 Jun 08 |
jari |
207 |
my %saw; |
720 |
11 Jun 08 |
jari |
208 |
print myfile "$annotype\tNbrAssays\n"; |
720 |
11 Jun 08 |
jari |
209 |
foreach my $uniqueValue (@uniqueV){ |
720 |
11 Jun 08 |
jari |
210 |
my $r=scalar(grep(/$uniqueValue/,@values)); |
720 |
11 Jun 08 |
jari |
211 |
print myfile "$uniqueValue\t$r\n"; |
720 |
11 Jun 08 |
jari |
212 |
} |
720 |
11 Jun 08 |
jari |
213 |
print myfile "\n"; |
720 |
11 Jun 08 |
jari |
214 |
} |
720 |
11 Jun 08 |
jari |
215 |
close(myfile); |
720 |
11 Jun 08 |
jari |
216 |
$doExport="FALSE"; |
720 |
11 Jun 08 |
jari |
217 |
}elsif($param_hash{'exportMode'} eq "annotationAll"){ |
720 |
11 Jun 08 |
jari |
#create an annotation file for all annotations |
720 |
11 Jun 08 |
jari |
219 |
|
720 |
11 Jun 08 |
jari |
220 |
my @base_annotations=split("\t",$base_hash{'AnnotationColumns'}); #all possible base annotations for this sample |
720 |
11 Jun 08 |
jari |
221 |
|
720 |
11 Jun 08 |
jari |
### Fixing the issue with blanks ( :( ) |
720 |
11 Jun 08 |
jari |
223 |
my $annos=$base_hash{'Assays'}; |
720 |
11 Jun 08 |
jari |
224 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
225 |
$annos =~s/\t\n/\tNA\n/g; |
720 |
11 Jun 08 |
jari |
226 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
227 |
my @annos=split("\n",$annos); #a row is a sample with its annotations |
720 |
11 Jun 08 |
jari |
228 |
|
720 |
11 Jun 08 |
jari |
229 |
open(myfile,">AnnotationAllFile.txt"); |
720 |
11 Jun 08 |
jari |
230 |
print myfile "Assay\t$base_hash{'AnnotationColumns'}\n"; |
720 |
11 Jun 08 |
jari |
231 |
foreach my $line (@annos){ |
720 |
11 Jun 08 |
jari |
232 |
my @tmp=split("\t",$line); |
720 |
11 Jun 08 |
jari |
233 |
shift(@tmp); #remove the BASE assay id nummer |
720 |
11 Jun 08 |
jari |
234 |
$line=join("\t",@tmp); |
720 |
11 Jun 08 |
jari |
235 |
print myfile "$line\n"; |
720 |
11 Jun 08 |
jari |
236 |
} |
720 |
11 Jun 08 |
jari |
237 |
close(myfile); |
720 |
11 Jun 08 |
jari |
238 |
$doExport="FALSE"; |
720 |
11 Jun 08 |
jari |
239 |
}elsif(($param_hash{'exportMode'} eq "annotationSelected") && ($param_hash{'annoForStat'} ne "NULL")){ |
720 |
11 Jun 08 |
jari |
#create an annotation file based on only selected annotations |
720 |
11 Jun 08 |
jari |
241 |
|
720 |
11 Jun 08 |
jari |
242 |
my @base_annotations=split("\t",$base_hash{'AnnotationColumns'}); #all possible base annotations for this sample |
720 |
11 Jun 08 |
jari |
243 |
|
720 |
11 Jun 08 |
jari |
### Fixing the issue with blanks ( :( ) |
720 |
11 Jun 08 |
jari |
245 |
my $annos=$base_hash{'Assays'}; |
720 |
11 Jun 08 |
jari |
246 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
247 |
$annos =~s/\t\n/\tNA\n/g; |
720 |
11 Jun 08 |
jari |
248 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
249 |
my @annos=split("\n",$annos); #a row is a sample with its annotations |
720 |
11 Jun 08 |
jari |
250 |
|
720 |
11 Jun 08 |
jari |
251 |
|
720 |
11 Jun 08 |
jari |
### Find the subset of annotations to get statistics for |
720 |
11 Jun 08 |
jari |
253 |
my @annotation_index; #tracks which column index to use to get the annotation values |
720 |
11 Jun 08 |
jari |
254 |
my @matched_annoStat; |
720 |
11 Jun 08 |
jari |
255 |
|
720 |
11 Jun 08 |
jari |
256 |
foreach my $anno (@annoStat){ |
720 |
11 Jun 08 |
jari |
257 |
if($anno =~/~/){ |
720 |
11 Jun 08 |
jari |
# pattern matching!! Get all annotations that match the specified pattern |
720 |
11 Jun 08 |
jari |
# e.g. ~Her130 = all Her130 annotations |
720 |
11 Jun 08 |
jari |
260 |
$anno=~ /~(.+)/; |
720 |
11 Jun 08 |
jari |
261 |
my $pattern=$1; |
720 |
11 Jun 08 |
jari |
262 |
for(my $i=0;$i<@base_annotations;$i++){ |
720 |
11 Jun 08 |
jari |
263 |
if($base_annotations[$i]=~/$pattern/){ |
720 |
11 Jun 08 |
jari |
264 |
push(@matched_annoStat,$base_annotations[$i]); |
720 |
11 Jun 08 |
jari |
265 |
push(@annotation_index,$i); |
720 |
11 Jun 08 |
jari |
266 |
} |
720 |
11 Jun 08 |
jari |
267 |
} |
720 |
11 Jun 08 |
jari |
268 |
}else{ |
720 |
11 Jun 08 |
jari |
269 |
for (my $i=0;$i<@base_annotations;$i++){ |
720 |
11 Jun 08 |
jari |
270 |
if(($anno eq $base_annotations[$i]) && ($anno ne "")){ #found the right annotation! |
720 |
11 Jun 08 |
jari |
271 |
push(@matched_annoStat,$anno); |
720 |
11 Jun 08 |
jari |
272 |
push(@annotation_index,$i); |
720 |
11 Jun 08 |
jari |
273 |
last; |
720 |
11 Jun 08 |
jari |
274 |
} |
720 |
11 Jun 08 |
jari |
275 |
} |
720 |
11 Jun 08 |
jari |
276 |
} |
720 |
11 Jun 08 |
jari |
277 |
} |
720 |
11 Jun 08 |
jari |
# @annoStat and @annotation_index are now matched! |
720 |
11 Jun 08 |
jari |
279 |
|
720 |
11 Jun 08 |
jari |
280 |
my $annoHeader=join("\t","Assay",@matched_annoStat); |
720 |
11 Jun 08 |
jari |
281 |
open(myfile,">AnnotationSelectedFile.txt"); |
720 |
11 Jun 08 |
jari |
282 |
print myfile "$annoHeader\n"; |
720 |
11 Jun 08 |
jari |
283 |
foreach my $line (@annos){ |
720 |
11 Jun 08 |
jari |
284 |
my @tmp=split("\t",$line); |
720 |
11 Jun 08 |
jari |
285 |
my $printline=$tmp[1]; #assay name |
720 |
11 Jun 08 |
jari |
286 |
for(my $i=0;$i<@matched_annoStat;$i++){ |
720 |
11 Jun 08 |
jari |
#print "$annotation_index[$i] $tmp[$annotation_index[$i]+2]\n"; |
720 |
11 Jun 08 |
jari |
288 |
$printline=join("\t",$printline,$tmp[$annotation_index[$i]+2]); |
720 |
11 Jun 08 |
jari |
289 |
} |
720 |
11 Jun 08 |
jari |
290 |
print myfile "$printline\n"; |
720 |
11 Jun 08 |
jari |
291 |
} |
720 |
11 Jun 08 |
jari |
292 |
close(myfile); |
720 |
11 Jun 08 |
jari |
293 |
$doExport="FALSE"; |
720 |
11 Jun 08 |
jari |
294 |
}elsif(($param_hash{'exportMode'} eq "annotationDisplaySelected") && ($param_hash{'annoForStat'} ne "NULL")){ |
720 |
11 Jun 08 |
jari |
#create an annotation display file based on only selected annotations |
720 |
11 Jun 08 |
jari |
296 |
|
720 |
11 Jun 08 |
jari |
297 |
my @base_annotations=split("\t",$base_hash{'AnnotationColumns'}); #all possible base annotations for this sample |
720 |
11 Jun 08 |
jari |
298 |
|
720 |
11 Jun 08 |
jari |
### Fixing the issue with blanks ( :( ) |
720 |
11 Jun 08 |
jari |
300 |
my $annos=$base_hash{'Assays'}; |
720 |
11 Jun 08 |
jari |
301 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
302 |
$annos =~s/\t\n/\tNA\n/g; |
720 |
11 Jun 08 |
jari |
303 |
$annos =~s/\t\t/\tNA\t/g; |
720 |
11 Jun 08 |
jari |
304 |
my @annos=split("\n",$annos); #a row is a sample with its annotations |
720 |
11 Jun 08 |
jari |
305 |
|
720 |
11 Jun 08 |
jari |
306 |
|
720 |
11 Jun 08 |
jari |
### Find the subset of annotations to get statistics for |
720 |
11 Jun 08 |
jari |
308 |
my @annotation_index; #tracks which column index to use to get the annotation values |
720 |
11 Jun 08 |
jari |
309 |
my @matched_annoStat; |
720 |
11 Jun 08 |
jari |
310 |
|
720 |
11 Jun 08 |
jari |
311 |
foreach my $anno (@annoStat){ |
720 |
11 Jun 08 |
jari |
312 |
if($anno =~/~/){ |
720 |
11 Jun 08 |
jari |
# pattern matching!! Get all annotations that match the specified pattern |
720 |
11 Jun 08 |
jari |
# e.g. ~Her130 = all Her130 annotations |
720 |
11 Jun 08 |
jari |
315 |
$anno=~ /~(.+)/; |
720 |
11 Jun 08 |
jari |
316 |
my $pattern=$1; |
720 |
11 Jun 08 |
jari |
317 |
for(my $i=0;$i<@base_annotations;$i++){ |
720 |
11 Jun 08 |
jari |
318 |
if($base_annotations[$i]=~/$pattern/){ |
720 |
11 Jun 08 |
jari |
319 |
push(@matched_annoStat,$base_annotations[$i]); |
720 |
11 Jun 08 |
jari |
320 |
push(@annotation_index,$i); |
720 |
11 Jun 08 |
jari |
321 |
} |
720 |
11 Jun 08 |
jari |
322 |
} |
720 |
11 Jun 08 |
jari |
323 |
}else{ |
720 |
11 Jun 08 |
jari |
324 |
for (my $i=0;$i<@base_annotations;$i++){ |
720 |
11 Jun 08 |
jari |
325 |
if(($anno eq $base_annotations[$i]) && ($anno ne "")){ #found the right annotation! |
720 |
11 Jun 08 |
jari |
326 |
push(@matched_annoStat,$anno); |
720 |
11 Jun 08 |
jari |
327 |
push(@annotation_index,$i); |
720 |
11 Jun 08 |
jari |
328 |
last; |
720 |
11 Jun 08 |
jari |
329 |
} |
720 |
11 Jun 08 |
jari |
330 |
} |
720 |
11 Jun 08 |
jari |
331 |
} |
720 |
11 Jun 08 |
jari |
332 |
} |
720 |
11 Jun 08 |
jari |
# @annoStat and @annotation_index are now matched! |
720 |
11 Jun 08 |
jari |
# i.e matched_annoStat contains all annotation types to extract data from, |
720 |
11 Jun 08 |
jari |
# and annotation_index contains which columns to use respectively. |
720 |
11 Jun 08 |
jari |
336 |
|
720 |
11 Jun 08 |
jari |
# go through each matched_annoStat entry, find the unique annotation values (k) |
720 |
11 Jun 08 |
jari |
# and for each unique value, create a column with value 1 for match on the unique value |
720 |
11 Jun 08 |
jari |
#for a given sample, 0 for no match. Iterate this over all matched annotation types. |
720 |
11 Jun 08 |
jari |
# blank samples have the value NA in @annos. They retain NA in the export, however NA is not |
720 |
11 Jun 08 |
jari |
# allowed as a column type! |
720 |
11 Jun 08 |
jari |
342 |
|
720 |
11 Jun 08 |
jari |
343 |
my @annotationDisplayMatrix; #N rows = N samples |
720 |
11 Jun 08 |
jari |
344 |
my @annotationDisplayHeader; |
720 |
11 Jun 08 |
jari |
345 |
push(@annotationDisplayHeader,"AssayName"); |
720 |
11 Jun 08 |
jari |
346 |
|
720 |
11 Jun 08 |
jari |
#Initiate the Display matrix with sample names |
720 |
11 Jun 08 |
jari |
348 |
for(my $j=0;$j<@annos;$j++){ |
720 |
11 Jun 08 |
jari |
349 |
my @tmp=split("\t",$annos[$j]); |
720 |
11 Jun 08 |
jari |
350 |
push(@annotationDisplayMatrix,$tmp[1]); #initiate with sample name! |
720 |
11 Jun 08 |
jari |
351 |
} |
720 |
11 Jun 08 |
jari |
352 |
|
720 |
11 Jun 08 |
jari |
353 |
for(my $i=0;$i<@matched_annoStat;$i++){ |
720 |
11 Jun 08 |
jari |
354 |
my %uniqueValues; |
720 |
11 Jun 08 |
jari |
355 |
my @uniqueValueColumns; |
720 |
11 Jun 08 |
jari |
356 |
|
720 |
11 Jun 08 |
jari |
#find unique annotation values |
720 |
11 Jun 08 |
jari |
358 |
for(my $j=0;$j<@annos;$j++){ |
720 |
11 Jun 08 |
jari |
359 |
my @tmp=split("\t",$annos[$j]); |
720 |
11 Jun 08 |
jari |
360 |
if(($tmp[$annotation_index[$i]+2] eq "NA") || ($tmp[$annotation_index[$i]+2] eq "")){ |
720 |
11 Jun 08 |
jari |
#not allowed as a unique entry |
720 |
11 Jun 08 |
jari |
362 |
}else{ |
720 |
11 Jun 08 |
jari |
363 |
$uniqueValues{$tmp[$annotation_index[$i]+2]}=""; |
720 |
11 Jun 08 |
jari |
364 |
} |
720 |
11 Jun 08 |
jari |
365 |
} |
720 |
11 Jun 08 |
jari |
366 |
my @uniqueValueColumn=sort(keys(%uniqueValues)); |
720 |
11 Jun 08 |
jari |
367 |
|
720 |
11 Jun 08 |
jari |
368 |
|
720 |
11 Jun 08 |
jari |
#foreach of the unique annotation values, make a column, 1 for present, 0 for absent |
720 |
11 Jun 08 |
jari |
# for each sample |
720 |
11 Jun 08 |
jari |
371 |
foreach my $uniqueValue (@uniqueValueColumn){ |
720 |
11 Jun 08 |
jari |
#my $ttt<-$uniqueValue; |
720 |
11 Jun 08 |
jari |
#$ttt =~s/\s/\_/g; |
720 |
11 Jun 08 |
jari |
374 |
push(@annotationDisplayHeader,$uniqueValue); #push to the header |
720 |
11 Jun 08 |
jari |
375 |
for(my $j=0;$j<@annos;$j++){ |
720 |
11 Jun 08 |
jari |
#for each sample |
720 |
11 Jun 08 |
jari |
377 |
my @tmp=split("\t",$annos[$j]); |
720 |
11 Jun 08 |
jari |
378 |
|
720 |
11 Jun 08 |
jari |
379 |
if($tmp[$annotation_index[$i]+2] eq $uniqueValue){ |
720 |
11 Jun 08 |
jari |
# push in a 1 |
720 |
11 Jun 08 |
jari |
381 |
$annotationDisplayMatrix[$j]=$annotationDisplayMatrix[$j]."\t".1; |
720 |
11 Jun 08 |
jari |
382 |
}else{ |
720 |
11 Jun 08 |
jari |
#push in a 0 |
720 |
11 Jun 08 |
jari |
384 |
$annotationDisplayMatrix[$j]=$annotationDisplayMatrix[$j]."\t".0; |
720 |
11 Jun 08 |
jari |
385 |
} |
720 |
11 Jun 08 |
jari |
386 |
} |
720 |
11 Jun 08 |
jari |
387 |
} |
720 |
11 Jun 08 |
jari |
388 |
} |
720 |
11 Jun 08 |
jari |
389 |
|
720 |
11 Jun 08 |
jari |
390 |
my $annoHeader= join("\t",@annotationDisplayHeader); |
720 |
11 Jun 08 |
jari |
391 |
|
720 |
11 Jun 08 |
jari |
392 |
open(myfile,">AnnotationDisplaySelectedFile.txt"); |
720 |
11 Jun 08 |
jari |
393 |
print myfile "$annoHeader\n"; |
720 |
11 Jun 08 |
jari |
394 |
foreach my $row (@annotationDisplayMatrix){ |
720 |
11 Jun 08 |
jari |
395 |
print myfile "$row\n"; |
720 |
11 Jun 08 |
jari |
396 |
|
720 |
11 Jun 08 |
jari |
397 |
} |
720 |
11 Jun 08 |
jari |
398 |
close(myfile); |
720 |
11 Jun 08 |
jari |
399 |
$doExport="FALSE"; |
720 |
11 Jun 08 |
jari |
400 |
}#end annotationDisplay |
720 |
11 Jun 08 |
jari |
401 |
|
720 |
11 Jun 08 |
jari |
402 |
|
720 |
11 Jun 08 |
jari |
403 |
if($doExport eq "TRUE"){ |
720 |
11 Jun 08 |
jari |
#### Determine how many common columns to add to get to assaydata |
720 |
11 Jun 08 |
jari |
405 |
my @cc = split("\t",$base_hash{'Columns'}); |
720 |
11 Jun 08 |
jari |
406 |
|
720 |
11 Jun 08 |
jari |
#### Make header and calculate nbr of data columns in total |
720 |
11 Jun 08 |
jari |
408 |
my $header=join("\t",@cc,@assaynames); |
720 |
11 Jun 08 |
jari |
409 |
my $nbrCols=scalar(@cc)+scalar(@assaynames); |
720 |
11 Jun 08 |
jari |
410 |
|
720 |
11 Jun 08 |
jari |
411 |
|
720 |
11 Jun 08 |
jari |
####### Parse the spot data one row at a time ####### |
720 |
11 Jun 08 |
jari |
# each row looks like: |
720 |
11 Jun 08 |
jari |
# [reporterId] [geneSymbol] [chromosome] [cytoBand] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
415 |
my $chromosomeIndex=2; |
720 |
11 Jun 08 |
jari |
416 |
my $geneIndex=1; |
720 |
11 Jun 08 |
jari |
417 |
my $repIdIndex=0; |
720 |
11 Jun 08 |
jari |
418 |
my $endPositionIndex=5; |
720 |
11 Jun 08 |
jari |
419 |
my @highlightToWrite=(); |
720 |
11 Jun 08 |
jari |
420 |
my @toSort=(); #vector for holding data to sort in format as above |
720 |
11 Jun 08 |
jari |
421 |
|
720 |
11 Jun 08 |
jari |
422 |
$/="\n"; #reset |
720 |
11 Jun 08 |
jari |
423 |
my $found = "FALSE"; |
720 |
11 Jun 08 |
jari |
424 |
|
720 |
11 Jun 08 |
jari |
425 |
|
720 |
11 Jun 08 |
jari |
## Create a file with the assay names for the stac_format.R script. Also modify the header for stac output |
720 |
11 Jun 08 |
jari |
## so that the header is sample1 .. sampleN instead of real assay names (column conflict in R for weird assay names |
720 |
11 Jun 08 |
jari |
428 |
if($param_hash{'exportMode'} eq "stac"){ |
720 |
11 Jun 08 |
jari |
429 |
$header=join("\t",@cc); |
720 |
11 Jun 08 |
jari |
430 |
my $index=0; |
720 |
11 Jun 08 |
jari |
431 |
open(myoutfile,">assays.txt"); |
720 |
11 Jun 08 |
jari |
432 |
foreach my $line (@assaynames){ |
720 |
11 Jun 08 |
jari |
433 |
$index++; |
720 |
11 Jun 08 |
jari |
434 |
print myoutfile "$line\n"; |
720 |
11 Jun 08 |
jari |
435 |
$header=$header."\t"."sample".$index; |
720 |
11 Jun 08 |
jari |
436 |
} |
720 |
11 Jun 08 |
jari |
437 |
close(myoutfile); |
720 |
11 Jun 08 |
jari |
438 |
} |
720 |
11 Jun 08 |
jari |
439 |
|
720 |
11 Jun 08 |
jari |
440 |
if(($param_hash{'exportMode'} eq "standard_lite") || ($param_hash{'exportMode'} eq "singleLite")){ |
720 |
11 Jun 08 |
jari |
#lite format is reporterId,chromosome,startPosition,endPosition |
720 |
11 Jun 08 |
jari |
442 |
$header=join("\t","reporterId","chromosome","startPosition","endPosition",@assaynames); |
720 |
11 Jun 08 |
jari |
443 |
} |
720 |
11 Jun 08 |
jari |
444 |
|
720 |
11 Jun 08 |
jari |
445 |
my $file="basematrix.txt"; |
720 |
11 Jun 08 |
jari |
446 |
if($param_hash{'filename'} ne "NULL"){ |
720 |
11 Jun 08 |
jari |
447 |
$file=$param_hash{'filename'}; |
720 |
11 Jun 08 |
jari |
448 |
} |
720 |
11 Jun 08 |
jari |
449 |
|
720 |
11 Jun 08 |
jari |
## open file for printing of data values |
720 |
11 Jun 08 |
jari |
451 |
if($param_hash{'exportMode'} ne "singleLite"){ |
720 |
11 Jun 08 |
jari |
452 |
open (myoutfile,">$file"); |
720 |
11 Jun 08 |
jari |
453 |
} |
720 |
11 Jun 08 |
jari |
454 |
|
720 |
11 Jun 08 |
jari |
455 |
if(($param_hash{'exportMode'} ne "agilent") || ($param_hash{'exportMode'} ne "bed") || ($param_hash{'exportMode'} ne "singleLite") ){ |
720 |
11 Jun 08 |
jari |
#print defined header if not Agilent mode, or BED or single lite |
720 |
11 Jun 08 |
jari |
457 |
print myoutfile "$header\n"; |
720 |
11 Jun 08 |
jari |
458 |
} |
720 |
11 Jun 08 |
jari |
459 |
|
720 |
11 Jun 08 |
jari |
460 |
while(<>){ |
720 |
11 Jun 08 |
jari |
461 |
chomp; |
720 |
11 Jun 08 |
jari |
462 |
if($_ =~ /section spots/){ #found the spot section |
720 |
11 Jun 08 |
jari |
463 |
$found="TRUE"; |
720 |
11 Jun 08 |
jari |
464 |
next; #skip to next line ? |
720 |
11 Jun 08 |
jari |
465 |
} |
720 |
11 Jun 08 |
jari |
466 |
if(($_ !~/columns/) || ($_ !~/channels 2/) || ($_ !~ /assayFields/) || ($_ !~/assays/) || ($_ !~/%/) && ($found eq "TRUE") ){ |
720 |
11 Jun 08 |
jari |
# now reading real data line |
720 |
11 Jun 08 |
jari |
468 |
if ($_ eq ""){ |
720 |
11 Jun 08 |
jari |
# do nada |
720 |
11 Jun 08 |
jari |
470 |
}else{ |
720 |
11 Jun 08 |
jari |
# [reporterId] [geneSymbol] [chromosome] [cytoBand] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
472 |
my @temp = split("\t",$_); #split each line |
720 |
11 Jun 08 |
jari |
473 |
if( ($temp[$endPositionIndex]>0) && ($temp[$chromosomeIndex]=~ /\w/) && ($temp[$chromosomeIndex]>0) ){ #reporterId must have a bp end and a chromosome not = "" |
720 |
11 Jun 08 |
jari |
474 |
foreach my $row (@temp){ #replace empty string with NaN .. |
720 |
11 Jun 08 |
jari |
475 |
if($row eq ""){ |
720 |
11 Jun 08 |
jari |
476 |
$row = "NaN"; |
720 |
11 Jun 08 |
jari |
477 |
} |
720 |
11 Jun 08 |
jari |
478 |
} |
720 |
11 Jun 08 |
jari |
479 |
if(scalar(@temp)<$nbrCols){ # To cope with dropped tabs.. |
720 |
11 Jun 08 |
jari |
480 |
while(scalar(@temp)<$nbrCols){ |
720 |
11 Jun 08 |
jari |
481 |
push(@temp,"NaN"); |
720 |
11 Jun 08 |
jari |
482 |
} |
720 |
11 Jun 08 |
jari |
483 |
} |
720 |
11 Jun 08 |
jari |
484 |
my $line; |
720 |
11 Jun 08 |
jari |
485 |
if(($param_hash{'exportMode'} eq "standard_lite") || ($param_hash{'exportMode'} eq "singleLite")){ |
720 |
11 Jun 08 |
jari |
486 |
splice(@temp, 1, 1); |
720 |
11 Jun 08 |
jari |
#new format of temp |
720 |
11 Jun 08 |
jari |
# [reporterId] [chromosome] [cytoBand] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
489 |
splice(@temp, 2, 1); |
720 |
11 Jun 08 |
jari |
#new format of temp |
720 |
11 Jun 08 |
jari |
# [reporterId] [chromosome] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
492 |
$line = join("\t",@temp); |
720 |
11 Jun 08 |
jari |
493 |
}elsif($param_hash{'exportMode'} eq "bed"){ |
720 |
11 Jun 08 |
jari |
#format: chr start stop reporterId |
720 |
11 Jun 08 |
jari |
495 |
my $bed_chrom="chr".$temp[$chromosomeIndex]; |
720 |
11 Jun 08 |
jari |
496 |
if($temp[$chromosomeIndex] eq "23"){ |
720 |
11 Jun 08 |
jari |
497 |
$bed_chrom="chrX"; |
720 |
11 Jun 08 |
jari |
498 |
} |
720 |
11 Jun 08 |
jari |
499 |
if($temp[$chromosomeIndex] eq "24"){ |
720 |
11 Jun 08 |
jari |
500 |
$bed_chrom="chrY"; |
720 |
11 Jun 08 |
jari |
501 |
} |
720 |
11 Jun 08 |
jari |
502 |
$line = join("\t",$bed_chrom,$temp[4],$temp[$endPositionIndex],$temp[$repIdIndex]); |
720 |
11 Jun 08 |
jari |
503 |
}else{ |
720 |
11 Jun 08 |
jari |
504 |
$line = join("\t",@temp); |
720 |
11 Jun 08 |
jari |
505 |
} |
720 |
11 Jun 08 |
jari |
506 |
|
720 |
11 Jun 08 |
jari |
507 |
if(($param_hash{'sort'} eq "TRUE") || ($param_hash{'exportMode'} eq "singleLite") || ($param_hash{'exportMode'} eq "agilent") || ($param_hash{'exportMode'} eq "stac") ){ |
720 |
11 Jun 08 |
jari |
508 |
push(@toSort,$line); |
720 |
11 Jun 08 |
jari |
509 |
}else{ |
720 |
11 Jun 08 |
jari |
510 |
print myoutfile "$line\n"; |
720 |
11 Jun 08 |
jari |
511 |
} |
720 |
11 Jun 08 |
jari |
512 |
} |
720 |
11 Jun 08 |
jari |
513 |
} |
720 |
11 Jun 08 |
jari |
514 |
}# end of if ( !~/columns etc.. |
720 |
11 Jun 08 |
jari |
515 |
} #end of while(<>) |
720 |
11 Jun 08 |
jari |
516 |
|
720 |
11 Jun 08 |
jari |
517 |
if($param_hash{'exportMode'} eq "stac"){ |
720 |
11 Jun 08 |
jari |
#to format the data correctly and easiest, use a small R script, stac_format.R |
720 |
11 Jun 08 |
jari |
519 |
open(myfile,"<$FindBin::Bin/stac_format.R"); |
720 |
11 Jun 08 |
jari |
520 |
open (myoutfile, ">stac_formatCommands.R"); |
720 |
11 Jun 08 |
jari |
521 |
while(<myfile>){ |
720 |
11 Jun 08 |
jari |
522 |
chomp; |
720 |
11 Jun 08 |
jari |
523 |
print myoutfile "$_\n"; |
720 |
11 Jun 08 |
jari |
524 |
} |
720 |
11 Jun 08 |
jari |
525 |
close (myfile); |
720 |
11 Jun 08 |
jari |
526 |
close (myoutfile); |
720 |
11 Jun 08 |
jari |
527 |
|
720 |
11 Jun 08 |
jari |
528 |
my $success = system("R --no-save < stac_formatCommands.R > r-debug.txt"); |
720 |
11 Jun 08 |
jari |
529 |
|
720 |
11 Jun 08 |
jari |
#catch the output |
720 |
11 Jun 08 |
jari |
531 |
|
720 |
11 Jun 08 |
jari |
532 |
} |
720 |
11 Jun 08 |
jari |
533 |
|
720 |
11 Jun 08 |
jari |
## Sort data and print it if selected |
720 |
11 Jun 08 |
jari |
535 |
if(($param_hash{'sort'} eq "TRUE") || ($param_hash{'exportMode'} eq "agilent") || ($param_hash{'exportMode'} eq "singleLite")){ |
720 |
11 Jun 08 |
jari |
#sort on chromosome and then on startPosition |
720 |
11 Jun 08 |
jari |
537 |
if($param_hash{'sort'} eq "TRUE"){ |
720 |
11 Jun 08 |
jari |
538 |
if(($param_hash{'exportMode'} eq "standard_lite") || ($param_hash{'exportMode'} eq "singleLite")){ |
720 |
11 Jun 08 |
jari |
# [reporterId] [chromosome] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
540 |
@toSort = |
720 |
11 Jun 08 |
jari |
541 |
map $_->[0] => |
720 |
11 Jun 08 |
jari |
542 |
sort { $a->[1] <=> $b->[1] |
720 |
11 Jun 08 |
jari |
543 |
|| |
720 |
11 Jun 08 |
jari |
544 |
$a->[2] <=> $b->[2] } |
720 |
11 Jun 08 |
jari |
545 |
map [ $_, (split /\t/)[1], (split /\t/)[2] ] |
720 |
11 Jun 08 |
jari |
#map [ $_, (split /\t/)[1,2] ] |
720 |
11 Jun 08 |
jari |
547 |
=> @toSort; |
720 |
11 Jun 08 |
jari |
548 |
}elsif($param_hash{'exportMode'} eq "bed"){ |
720 |
11 Jun 08 |
jari |
#format: [chromosome] [startPosition] [stopPosition] [reporterId] |
720 |
11 Jun 08 |
jari |
550 |
@toSort = |
720 |
11 Jun 08 |
jari |
551 |
map $_->[0] => |
720 |
11 Jun 08 |
jari |
552 |
sort { $a->[1] <=> $b->[1] |
720 |
11 Jun 08 |
jari |
553 |
|| |
720 |
11 Jun 08 |
jari |
554 |
$a->[2] <=> $b->[2] } |
720 |
11 Jun 08 |
jari |
555 |
map [ $_, (split /\t/)[0], (split /\t/)[1] ] |
720 |
11 Jun 08 |
jari |
556 |
=> @toSort; |
720 |
11 Jun 08 |
jari |
557 |
}else{ |
720 |
11 Jun 08 |
jari |
#standard format |
720 |
11 Jun 08 |
jari |
# [reporterId] [geneSymbol] [chromosome] [cytoBand] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
560 |
|
720 |
11 Jun 08 |
jari |
561 |
@toSort = |
720 |
11 Jun 08 |
jari |
562 |
map $_->[0] => |
720 |
11 Jun 08 |
jari |
563 |
sort { $a->[1] <=> $b->[1] || |
720 |
11 Jun 08 |
jari |
564 |
$a->[2] <=> $b->[2] } |
720 |
11 Jun 08 |
jari |
565 |
map [ $_, (split /\t/)[2], (split /\t/)[4] ] |
720 |
11 Jun 08 |
jari |
566 |
=> @toSort; |
720 |
11 Jun 08 |
jari |
567 |
} |
720 |
11 Jun 08 |
jari |
568 |
}#end if to sort data |
720 |
11 Jun 08 |
jari |
569 |
if($param_hash{'exportMode'} eq "singleLite"){ |
720 |
11 Jun 08 |
jari |
# split the matrix into separate files... |
720 |
11 Jun 08 |
jari |
# [reporterId] [chromosome] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
572 |
|
720 |
11 Jun 08 |
jari |
#split header |
720 |
11 Jun 08 |
jari |
574 |
my @tmpHeader=split("\t",$header); |
720 |
11 Jun 08 |
jari |
575 |
my $commonHeader=join("\t",shift(@tmpHeader),shift(@tmpHeader),shift(@tmpHeader),shift(@tmpHeader)); |
720 |
11 Jun 08 |
jari |
576 |
my $ccN=4; |
720 |
11 Jun 08 |
jari |
577 |
for(my $i=0;$i<@tmpHeader;$i++){ |
720 |
11 Jun 08 |
jari |
#foreach assay create individual files, through iteration.. VERY SLOW! |
720 |
11 Jun 08 |
jari |
579 |
my $fileN=$tmpHeader[$i]."_".$file; |
720 |
11 Jun 08 |
jari |
580 |
open(mySingleFileHandle,">$fileN"); |
720 |
11 Jun 08 |
jari |
581 |
print mySingleFileHandle "$commonHeader\t$tmpHeader[$i]\n"; |
720 |
11 Jun 08 |
jari |
582 |
for(my $j=0;$j<@toSort;$j++){ |
720 |
11 Jun 08 |
jari |
583 |
my @ttt= split("\t",$toSort[$j]); |
720 |
11 Jun 08 |
jari |
584 |
my $lineN=join("\t",$ttt[0],$ttt[1],$ttt[2],$ttt[3],$ttt[$i+$ccN]); |
720 |
11 Jun 08 |
jari |
585 |
print mySingleFileHandle "$lineN\n"; |
720 |
11 Jun 08 |
jari |
586 |
} |
720 |
11 Jun 08 |
jari |
587 |
close(mySingleFileHandle); |
720 |
11 Jun 08 |
jari |
588 |
} |
720 |
11 Jun 08 |
jari |
589 |
}elsif($param_hash{'exportMode'} eq "agilent"){ |
720 |
11 Jun 08 |
jari |
#create the agilent format and print |
720 |
11 Jun 08 |
jari |
591 |
my $headerA=join("\t","FeatureNum","Chr","Start","Stop","SystematicName","Gene Name","Description",@assaynames); |
720 |
11 Jun 08 |
jari |
592 |
my $featureNum=0; |
720 |
11 Jun 08 |
jari |
593 |
print myoutfile "AMADID 1234567\n"; #check if to use random number? |
720 |
11 Jun 08 |
jari |
594 |
print myoutfile "$headerA\n"; |
720 |
11 Jun 08 |
jari |
595 |
|
720 |
11 Jun 08 |
jari |
# Format of toSort: |
720 |
11 Jun 08 |
jari |
# [reporterId] [geneSymbol] [chromosome] [cytoBand] [startPosition] [endPosition] [assayData = M] |
720 |
11 Jun 08 |
jari |
598 |
foreach my $line (@toSort){ |
720 |
11 Jun 08 |
jari |
599 |
$featureNum++; |
720 |
11 Jun 08 |
jari |
600 |
my @tmp=split("\t",$line); |
720 |
11 Jun 08 |
jari |
601 |
if($tmp[2] eq "23"){ |
720 |
11 Jun 08 |
jari |
602 |
$tmp[2]="X"; |
720 |
11 Jun 08 |
jari |
603 |
}elsif($tmp[2] eq "24"){ |
720 |
11 Jun 08 |
jari |
604 |
$tmp[2]="Y"; |
720 |
11 Jun 08 |
jari |
605 |
} |
720 |
11 Jun 08 |
jari |
#my $agilentLine=join("\t",$featureNum,$tmp[2],$tmp[4],$tmp[5],$tmp[0],$tmp[1],""); #if it works with bundled gene symbols.. |
720 |
11 Jun 08 |
jari |
607 |
my $agilentLine=join("\t",$featureNum,$tmp[2],$tmp[4],$tmp[5],$tmp[0],"",""); |
720 |
11 Jun 08 |
jari |
608 |
|
720 |
11 Jun 08 |
jari |
609 |
splice(@tmp,0,6); #only log2data left |
720 |
11 Jun 08 |
jari |
610 |
$agilentLine=join("\t",$agilentLine,@tmp); |
720 |
11 Jun 08 |
jari |
611 |
print myoutfile "$agilentLine\n"; |
720 |
11 Jun 08 |
jari |
612 |
} |
720 |
11 Jun 08 |
jari |
613 |
}else{ |
720 |
11 Jun 08 |
jari |
#just print! |
720 |
11 Jun 08 |
jari |
615 |
foreach my $line (@toSort){ |
720 |
11 Jun 08 |
jari |
616 |
print myoutfile "$line\n"; |
720 |
11 Jun 08 |
jari |
617 |
} |
720 |
11 Jun 08 |
jari |
618 |
} |
720 |
11 Jun 08 |
jari |
619 |
} |
720 |
11 Jun 08 |
jari |
620 |
if($param_hash{'exportMode'} ne "singleLite"){ |
720 |
11 Jun 08 |
jari |
621 |
close(myoutfile); |
720 |
11 Jun 08 |
jari |
622 |
} |
720 |
11 Jun 08 |
jari |
623 |
}#end doExport |
720 |
11 Jun 08 |
jari |
624 |
|