2 |
26 Feb 07 |
jari |
1 |
/* |
2 |
26 Feb 07 |
jari |
Copyright @ 1999-2003, The Institute for Genomic Research (TIGR). |
2 |
26 Feb 07 |
jari |
All rights reserved. |
2 |
26 Feb 07 |
jari |
4 |
*/ |
2 |
26 Feb 07 |
jari |
5 |
/* |
2 |
26 Feb 07 |
jari |
* $RCSfile: HypergeometricProbability.java,v $ |
2 |
26 Feb 07 |
jari |
* $Revision: 1.3 $ |
2 |
26 Feb 07 |
jari |
* $Date: 2005/03/10 15:46:49 $ |
2 |
26 Feb 07 |
jari |
* $Author: braistedj $ |
2 |
26 Feb 07 |
jari |
* $State: Exp $ |
2 |
26 Feb 07 |
jari |
11 |
*/ |
2 |
26 Feb 07 |
jari |
12 |
|
2 |
26 Feb 07 |
jari |
13 |
/* |
2 |
26 Feb 07 |
jari |
* Catherine Loader of Bell Laboratories originally wrote |
2 |
26 Feb 07 |
jari |
* this fast and accurate estimation of Gaussian hyper-geometric |
2 |
26 Feb 07 |
jari |
* probability in C: |
2 |
26 Feb 07 |
jari |
* (http://cm.bell-labs.com/cm/ms/departments/sia/catherine/dbinom/). |
2 |
26 Feb 07 |
jari |
18 |
* |
2 |
26 Feb 07 |
jari |
* It has been incorporated into the DAVID(1) and EASE(2) software |
2 |
26 Feb 07 |
jari |
* packages and converted to open-source java code by the Laboratory |
2 |
26 Feb 07 |
jari |
* of Immunopathogenesis and Bioinformatics, Science Applications |
2 |
26 Feb 07 |
jari |
* International Corporation-Frederick, Inc. For questions, comments, |
2 |
26 Feb 07 |
jari |
* and acknowledgements please contact: |
2 |
26 Feb 07 |
jari |
24 |
|
2 |
26 Feb 07 |
jari |
* Richard A. Lempicki, PhD |
2 |
26 Feb 07 |
jari |
* Senior Scientist |
2 |
26 Feb 07 |
jari |
* SAIC-Frederick, Inc., Clinical Services Program |
2 |
26 Feb 07 |
jari |
* Head, Laboratory of Immunopathogenesis and Bioinformatics |
2 |
26 Feb 07 |
jari |
* P.O. Box B |
2 |
26 Feb 07 |
jari |
* Frederick, MD 21702-1201 |
2 |
26 Feb 07 |
jari |
* T: (301) 846-5093 |
2 |
26 Feb 07 |
jari |
* F: (301) 846-6762 |
2 |
26 Feb 07 |
jari |
* E: rlempicki@niaid.nih.gov |
2 |
26 Feb 07 |
jari |
34 |
|
2 |
26 Feb 07 |
jari |
* Acknowledgements: |
2 |
26 Feb 07 |
jari |
36 |
|
2 |
26 Feb 07 |
jari |
* 1. Dennis G Jr., Sherman BT., Hosack DA., Lane CH., Lempicki RA. |
2 |
26 Feb 07 |
jari |
* DAVID: Database for Annotation, Visualization, and Integrated Discovery. |
2 |
26 Feb 07 |
jari |
* Genome Biology 4(9):R60. |
2 |
26 Feb 07 |
jari |
40 |
* |
2 |
26 Feb 07 |
jari |
* 2. Hosack DA., Dennis G Jr., Sherman BT., Lane HC., and Lempicki RA. |
2 |
26 Feb 07 |
jari |
* Identifying biological themes within lists of genes with EASE. |
2 |
26 Feb 07 |
jari |
* Genome Biology 4(10):R70. |
2 |
26 Feb 07 |
jari |
44 |
*/ |
2 |
26 Feb 07 |
jari |
45 |
|
2 |
26 Feb 07 |
jari |
46 |
/* |
2 |
26 Feb 07 |
jari |
* Developed by NIAID LTB Lab. |
2 |
26 Feb 07 |
jari |
48 |
*/ |
2 |
26 Feb 07 |
jari |
49 |
|
2 |
26 Feb 07 |
jari |
50 |
package org.tigr.microarray.mev.cluster.algorithm.impl.ease; |
2 |
26 Feb 07 |
jari |
51 |
|
2 |
26 Feb 07 |
jari |
52 |
public class HypergeometricProbability { |
2 |
26 Feb 07 |
jari |
53 |
|
2 |
26 Feb 07 |
jari |
/** Pi Squared pre-calculated */ |
2 |
26 Feb 07 |
jari |
55 |
public final double P12 = 6.283185307179586476925286; |
2 |
26 Feb 07 |
jari |
/** First saddle point for correcting error of Stirling's approximation*/ |
2 |
26 Feb 07 |
jari |
57 |
public final double S0 = 0.083333333333333333333; |
2 |
26 Feb 07 |
jari |
/** First saddle point for correcting error of Stirling's approximation*/ |
2 |
26 Feb 07 |
jari |
59 |
public final double S1 = 0.00277777777777777777778; |
2 |
26 Feb 07 |
jari |
/** First saddle point for correcting error of Stirling's approximation*/ |
2 |
26 Feb 07 |
jari |
61 |
public final double S2 = 0.00079365079365079365079365; |
2 |
26 Feb 07 |
jari |
/** First saddle point for correcting error of Stirling's approximation*/ |
2 |
26 Feb 07 |
jari |
63 |
public final double S3 = 0.000595238095238095238095238; |
2 |
26 Feb 07 |
jari |
/** First saddle point for correcting error of Stirling's approximation*/ |
2 |
26 Feb 07 |
jari |
65 |
public final double S4 = 0.0008417508417508417508417508; |
2 |
26 Feb 07 |
jari |
66 |
|
2 |
26 Feb 07 |
jari |
/** Array storing errors of Stirling's approximation of n! */ |
2 |
26 Feb 07 |
jari |
68 |
public static double[] sfe = new double[16]; |
2 |
26 Feb 07 |
jari |
69 |
|
2 |
26 Feb 07 |
jari |
70 |
public HypergeometricProbability() { |
2 |
26 Feb 07 |
jari |
71 |
|
2 |
26 Feb 07 |
jari |
72 |
sfe[0] = 0.0; |
2 |
26 Feb 07 |
jari |
73 |
sfe[1] = 0.081061466795327258219670264; |
2 |
26 Feb 07 |
jari |
74 |
sfe[2] = 0.041340695955409294093822081; |
2 |
26 Feb 07 |
jari |
75 |
sfe[3] = 0.0276779256849983391487892927; |
2 |
26 Feb 07 |
jari |
76 |
sfe[4] = 0.020790672103765093111522771; |
2 |
26 Feb 07 |
jari |
77 |
sfe[5] = 0.0166446911898211921631948653; |
2 |
26 Feb 07 |
jari |
78 |
sfe[6] = 0.013876128823070747998745727; |
2 |
26 Feb 07 |
jari |
79 |
sfe[7] = 0.0118967099458917700950557241; |
2 |
26 Feb 07 |
jari |
80 |
sfe[8] = 0.010411265261972096497478567; |
2 |
26 Feb 07 |
jari |
81 |
sfe[9] = 0.0092554621827127329177286366; |
2 |
26 Feb 07 |
jari |
82 |
sfe[10] = 0.008330563433362871256469318; |
2 |
26 Feb 07 |
jari |
83 |
sfe[11] = 0.0075736754879518407949720242; |
2 |
26 Feb 07 |
jari |
84 |
sfe[12] = 0.006942840107209529865664152; |
2 |
26 Feb 07 |
jari |
85 |
sfe[13] = 0.0064089941880042070684396310; |
2 |
26 Feb 07 |
jari |
86 |
sfe[14] = 0.005951370112758847735624416; |
2 |
26 Feb 07 |
jari |
87 |
sfe[15] = 0.0055547335519628013710386899; |
2 |
26 Feb 07 |
jari |
88 |
} |
2 |
26 Feb 07 |
jari |
89 |
|
2 |
26 Feb 07 |
jari |
90 |
|
2 |
26 Feb 07 |
jari |
/** returns error of Stirling's approximation of n! */ |
2 |
26 Feb 07 |
jari |
92 |
public double stirlerr(int n){ |
2 |
26 Feb 07 |
jari |
93 |
double nn; |
2 |
26 Feb 07 |
jari |
94 |
if (n<16) return(sfe[(int)n]); |
2 |
26 Feb 07 |
jari |
95 |
nn = (double)n; |
2 |
26 Feb 07 |
jari |
96 |
nn = nn*nn; |
2 |
26 Feb 07 |
jari |
97 |
if (n>500) return((S0-S1/nn)/n); |
2 |
26 Feb 07 |
jari |
98 |
if (n>80) return((S0-(S1-S2/nn)/nn)/n); |
2 |
26 Feb 07 |
jari |
99 |
if (n>35) return((S0-(S1-(S2-S3/nn)/nn)/nn)/n); |
2 |
26 Feb 07 |
jari |
100 |
return((S0-(S1-(S2-(S3-S4/nn)/nn)/nn)/nn)/n); |
2 |
26 Feb 07 |
jari |
101 |
} |
2 |
26 Feb 07 |
jari |
102 |
|
2 |
26 Feb 07 |
jari |
/** returns a factor of the binomial distribution */ |
2 |
26 Feb 07 |
jari |
104 |
public double bd0(int x, double np){ |
2 |
26 Feb 07 |
jari |
105 |
double ej, s, s1, v; |
2 |
26 Feb 07 |
jari |
106 |
int j; |
2 |
26 Feb 07 |
jari |
107 |
|
2 |
26 Feb 07 |
jari |
108 |
if(Math.abs(x-np) < 0.1*(x+np)){ |
2 |
26 Feb 07 |
jari |
109 |
s = (x-np)*(x-np)/(x+np); |
2 |
26 Feb 07 |
jari |
110 |
v = (x-np)/(x+np); |
2 |
26 Feb 07 |
jari |
111 |
ej = 2*x*v; |
2 |
26 Feb 07 |
jari |
112 |
v = v*v; |
2 |
26 Feb 07 |
jari |
113 |
|
2 |
26 Feb 07 |
jari |
114 |
for (j=1; ;++j){ |
2 |
26 Feb 07 |
jari |
115 |
ej *= v; |
2 |
26 Feb 07 |
jari |
116 |
s1 = s+ej/((j<<1)+1); |
2 |
26 Feb 07 |
jari |
117 |
if (s1==s) return(s1); |
2 |
26 Feb 07 |
jari |
118 |
s = s1; |
2 |
26 Feb 07 |
jari |
119 |
} |
2 |
26 Feb 07 |
jari |
120 |
} |
2 |
26 Feb 07 |
jari |
121 |
|
2 |
26 Feb 07 |
jari |
122 |
return(x*Math.log(x/np)+np-x); |
2 |
26 Feb 07 |
jari |
123 |
} |
2 |
26 Feb 07 |
jari |
124 |
|
2 |
26 Feb 07 |
jari |
/** returns a discrete value from the binomial distribution */ |
2 |
26 Feb 07 |
jari |
126 |
public double dbinom(int x, int n, double p){ |
2 |
26 Feb 07 |
jari |
127 |
|
2 |
26 Feb 07 |
jari |
128 |
double lc; |
2 |
26 Feb 07 |
jari |
129 |
|
2 |
26 Feb 07 |
jari |
130 |
if (p==0.0) return( (x==0) ? 1.0 : 0.0); |
2 |
26 Feb 07 |
jari |
131 |
if (p==1.0) return( (x==n) ? 1.0 : 0.0); |
2 |
26 Feb 07 |
jari |
132 |
if (x==0) return(Math.exp(n*Math.log(1-p))); |
2 |
26 Feb 07 |
jari |
133 |
if (x==n) return(Math.exp(n*Math.log(p))); |
2 |
26 Feb 07 |
jari |
134 |
if ((x<0) | (x>n)) return(0.0); |
2 |
26 Feb 07 |
jari |
135 |
|
2 |
26 Feb 07 |
jari |
136 |
lc = stirlerr(n) - stirlerr(x) - stirlerr(n-x) - bd0(x,n*p) - bd0(n-x,n*(1.0-p)); |
2 |
26 Feb 07 |
jari |
137 |
|
2 |
26 Feb 07 |
jari |
138 |
|
2 |
26 Feb 07 |
jari |
139 |
return(Math.exp(lc)*Math.sqrt(n/(P12*x*(n-x)))); |
2 |
26 Feb 07 |
jari |
140 |
} |
2 |
26 Feb 07 |
jari |
141 |
|
2 |
26 Feb 07 |
jari |
/** returns the binomial approximation of a discrete value from the hypergeometric distribution*/ |
2 |
26 Feb 07 |
jari |
143 |
public double dhyperg(int x, int t, int m, int n){ |
2 |
26 Feb 07 |
jari |
144 |
double p; |
2 |
26 Feb 07 |
jari |
145 |
p = ((double)m)/((double)n); |
2 |
26 Feb 07 |
jari |
146 |
|
2 |
26 Feb 07 |
jari |
147 |
return( dbinom(x,t,p) * dbinom(m-x,n-t,p) / dbinom(m,n,p) ); |
2 |
26 Feb 07 |
jari |
148 |
} |
2 |
26 Feb 07 |
jari |
149 |
|
2 |
26 Feb 07 |
jari |
/** Returns an exact hypergeometric p for the contingency matrix values |
2 |
26 Feb 07 |
jari |
* p = [(a1+a2)!*(a3+a4)!*(a1+a3)!*(a2+a4)!]/(n!a1!a2!a3!a4!) |
2 |
26 Feb 07 |
jari |
152 |
*/ |
2 |
26 Feb 07 |
jari |
153 |
public double pExactForMatrix(int a1, int a2, int a3, int a4){ |
2 |
26 Feb 07 |
jari |
154 |
return dhyperg(a1, a1+a3, a1+a2 , a1+a2+a3+a4); |
2 |
26 Feb 07 |
jari |
155 |
} |
2 |
26 Feb 07 |
jari |
156 |
|
2 |
26 Feb 07 |
jari |
157 |
|
2 |
26 Feb 07 |
jari |
/** returns the one-tailed hypergeometric probability of over-representation */ |
2 |
26 Feb 07 |
jari |
159 |
public double SumHGP(int POPTOT, int POPHITS, int SAMPTOT, int SAMPHITS){ |
2 |
26 Feb 07 |
jari |
160 |
double P; |
2 |
26 Feb 07 |
jari |
161 |
|
2 |
26 Feb 07 |
jari |
162 |
if ((SAMPHITS>SAMPTOT) || (SAMPHITS>POPHITS) || (SAMPTOT>POPTOT) || (POPHITS>POPTOT)){ |
2 |
26 Feb 07 |
jari |
163 |
|
2 |
26 Feb 07 |
jari |
164 |
return 0; |
2 |
26 Feb 07 |
jari |
165 |
} |
2 |
26 Feb 07 |
jari |
166 |
else if (SAMPHITS<1){return 1; } |
2 |
26 Feb 07 |
jari |
167 |
else if (POPHITS<SAMPTOT){ |
2 |
26 Feb 07 |
jari |
168 |
int R; |
2 |
26 Feb 07 |
jari |
169 |
P=0; |
2 |
26 Feb 07 |
jari |
170 |
for (R=SAMPHITS; R<(POPHITS+1); R++){ |
2 |
26 Feb 07 |
jari |
171 |
P+=dhyperg(R, SAMPTOT, POPHITS, POPTOT); |
2 |
26 Feb 07 |
jari |
172 |
} |
2 |
26 Feb 07 |
jari |
173 |
return P; |
2 |
26 Feb 07 |
jari |
174 |
}else{ |
2 |
26 Feb 07 |
jari |
175 |
int R; |
2 |
26 Feb 07 |
jari |
176 |
P=0; |
2 |
26 Feb 07 |
jari |
177 |
for (R=SAMPHITS; R<(SAMPTOT+1); R++){ |
2 |
26 Feb 07 |
jari |
178 |
P+=dhyperg(R, SAMPTOT, POPHITS, POPTOT); |
2 |
26 Feb 07 |
jari |
179 |
} |
2 |
26 Feb 07 |
jari |
180 |
return P; |
2 |
26 Feb 07 |
jari |
181 |
} |
2 |
26 Feb 07 |
jari |
182 |
} |
2 |
26 Feb 07 |
jari |
183 |
|
2 |
26 Feb 07 |
jari |
184 |
public static void main(String [] args){ |
2 |
26 Feb 07 |
jari |
185 |
HypergeometricProbability hgp = new HypergeometricProbability(); |
2 |
26 Feb 07 |
jari |
186 |
System.out.println("p = "+hgp.SumHGP(325, 19, 37, 5)); |
2 |
26 Feb 07 |
jari |
187 |
} |
2 |
26 Feb 07 |
jari |
188 |
|
2 |
26 Feb 07 |
jari |
189 |
} |