1 package baseCode.math;
2
3 import cern.colt.list.DoubleArrayList;
4 import cern.colt.matrix.DoubleMatrix2D;
5 import cern.colt.matrix.impl.SparseDoubleMatrix2D;
6 import cern.jet.math.Arithmetic;
7 import cern.jet.stat.Probability;
8
9 /***
10 * Statistical evaluation and transformation tools for correlations.
11 * <p>
12 * Copyright (c) 2004
13 * </p>
14 * <p>
15 * Institution:: Columbia University
16 * </p>
17 *
18 * @author Paul Pavlidis
19 * @version $Id: CorrelationStats.java,v 1.11 2005/01/05 17:59:19 pavlidis Exp $
20 */
21 public class CorrelationStats {
22
23 private static DoubleMatrix2D correlationPvalLookup;
24 private static final double BINSIZE = 0.005;
25
26
27 private static final double STEPSIZE = BINSIZE * 2;
28
29 private static final int MAXCOUNT = 1000;
30 private static final double PVALCHOP = 8.0;
31
32
33
34
35
36 static {
37 int numbins = ( int ) Math.ceil( 1.0 / BINSIZE );
38 correlationPvalLookup = new SparseDoubleMatrix2D( numbins, MAXCOUNT + 1 );
39 }
40
41 /***
42 * @param correl Pearson correlation.
43 * @param count Number of items used to calculate the correlation. NOT the degrees of freedom.
44 * @return double
45 */
46 public static double pvalue( double correl, int count ) {
47
48 double acorrel = Math.abs( correl );
49
50 if ( acorrel == 1.0 ) {
51 return 0.0;
52 }
53
54 if ( acorrel == 0.0 ) {
55 return 1.0;
56 }
57
58 int dof = count - 2;
59
60 if ( dof <= 0 ) {
61 return 1.0;
62 }
63
64 int bin = ( int ) Math.ceil( acorrel / BINSIZE );
65 if ( count <= MAXCOUNT
66 && correlationPvalLookup.getQuick( bin, dof ) != 0.0 ) {
67 return correlationPvalLookup.getQuick( bin, dof );
68 }
69 double t = correlationTstat( acorrel, dof );
70 double p = Probability.studentT( dof, -t );
71 if ( count < MAXCOUNT ) {
72 correlationPvalLookup.setQuick( bin, dof, p );
73 }
74 return p;
75
76 }
77
78 /***
79 * @param correl double
80 * @return int
81 */
82 public static int correlAsByte( double correl ) {
83 if ( correl == -1.0 ) {
84 return 0;
85 }
86
87 return ( int ) ( Math.ceil( ( correl + 1.0 ) * 128 ) - 1 );
88 }
89
90 /***
91 * Reverse the Fisher z-transform of correlations.
92 *
93 * @param r
94 * @return
95 */
96 public static double unFisherTransform( double r ) {
97 return Math.exp( 2.0 * r - 1.0 ) / Math.exp( 2.0 * r + 1.0 );
98 }
99
100 /***
101 * Compute the Fisher z transform of the Pearson correlation.
102 *
103 * @param r Correlation coefficient.
104 * @return Fisher transform of the Correlation.
105 */
106 public static double fisherTransform( double r ) {
107 if ( !isValidPearsonCorrelation( r ) ) {
108 throw new IllegalArgumentException( "Invalid correlation " + r );
109 }
110
111 return 0.5 * Math.log( ( 1.0 + r ) / ( 1.0 - r ) );
112 }
113
114 /***
115 * Fisher-transform a list of correlations.
116 *
117 * @param e
118 * @return
119 */
120 public static DoubleArrayList fisherTransform( DoubleArrayList e ) {
121 DoubleArrayList r = new DoubleArrayList( e.size() );
122 for ( int i = 0; i < e.size(); i++ ) {
123 r.add( CorrelationStats.fisherTransform( e.getQuick( i ) ) );
124 }
125 return r;
126 }
127
128 /***
129 * Conver a correlation p value into a value between 0 and 255 inclusive. This is done by taking the log, multiplying
130 * it by a fixed value (currently 8). This means that pvalues less than 10^-32 are rounded to 10^-32.
131 *
132 * @param correl double
133 * @param count int
134 * @return int
135 */
136 public static int pvalueAsByte( double correl, int count ) {
137 int p = -( int ) Math.floor( PVALCHOP
138 * Arithmetic.log10( pvalue( correl, count ) ) );
139
140 if ( p < 0 ) {
141 return 0;
142 } else if ( p > 255 ) {
143 return 255;
144 }
145 return p;
146 }
147
148 /***
149 * @param pvalByte int
150 * @return double
151 */
152 public static double byteToPvalue( int pvalByte ) {
153 return Math.pow( 10.0, -( double ) pvalByte / PVALCHOP );
154 }
155
156 /***
157 * @param correlByte int
158 * @return double
159 */
160 public static double byteToCorrel( int correlByte ) {
161 return correlByte / 128.0 - 1.0;
162 }
163
164 /***
165 * Compute the t-statistic associated with a Pearson correlation.
166 *
167 * @param correl Pearson correlation
168 * @param dof Degrees of freedom (n - 2)
169 * @return double
170 */
171 public static double correlationTstat( double correl, int dof ) {
172 return correl / Math.sqrt( ( 1.0 - correl * correl ) / dof );
173 }
174
175 /***
176 * Statistical comparison of two correlations. Assumes data are bivariate normal. Null hypothesis is that the two
177 * correlations are equal. See Zar (Biostatistics)
178 *
179 * @param correl1 First correlation
180 * @param n1 Number of values used to compute correl1
181 * @param correl2 Second correlation
182 * @param n2 Number of values used to compute correl2
183 * @return double p value.
184 */
185 public static double compare( double correl1, int n1, double correl2, int n2 ) {
186
187 double Z;
188 double sigma;
189 double p;
190
191 sigma = Math.sqrt( ( 1 / ( ( double ) n1 - 3 ) )
192 + ( 1 / ( ( double ) n2 - 3 ) ) );
193
194 Z = Math.abs( correl1 - correl2 ) / sigma;
195
196 p = Probability.normal( -Z );
197
198 if ( p > 0.5 ) {
199 return 1.0 - p;
200 }
201 return p;
202 }
203
204 /***
205 * Find the approximate correlation required to meet a particular pvalue. This works by simple gradient descent.
206 *
207 * @param pval double
208 * @param count int
209 * @return double
210 */
211 public static double correlationForPvalue( double pval, int count ) {
212 double stop = pval / 100.0;
213 double err = 1.0;
214 double corrguess = 1.0;
215 double step = STEPSIZE;
216 double preverr = 0.0;
217 int maxiter = 1000;
218 int iter = 0;
219 while ( Math.abs( err ) > stop && step >= BINSIZE ) {
220 double guess = pvalue( corrguess, count );
221 if ( guess > pval ) {
222 corrguess += step;
223 } else {
224 corrguess -= step;
225 }
226
227 if ( preverr * err < 0 ) {
228
229 step /= 2;
230 }
231
232 preverr = err;
233 err = pval - guess;
234 iter++;
235
236 if ( iter > maxiter ) {
237 throw new IllegalStateException( "Too many iterations" );
238 }
239 }
240 return ( corrguess );
241 }
242
243 /***
244 * Test if a value is a reasonable Pearson correlation (in the range -1 to 1; values outside of this
245 * range are acceptable within a small roundoff.
246 * @param r
247 * @return
248 */
249 public static boolean isValidPearsonCorrelation( double r ) {
250 return ( r + Constants.SMALL >= -1.0 && r - Constants.SMALL <= 1.0 );
251 }
252
253 }