View Javadoc

1   package baseCode.math;
2   
3   import cern.colt.list.DoubleArrayList;
4   import cern.colt.matrix.DoubleMatrix2D;
5   import cern.colt.matrix.impl.SparseDoubleMatrix2D;
6   import cern.jet.math.Arithmetic;
7   import cern.jet.stat.Probability;
8   
9   /***
10   * Statistical evaluation and transformation tools for correlations.
11   * <p>
12   * Copyright (c) 2004
13   * </p>
14   * <p>
15   * Institution:: Columbia University
16   * </p>
17   * 
18   * @author Paul Pavlidis
19   * @version $Id: CorrelationStats.java,v 1.11 2005/01/05 17:59:19 pavlidis Exp $
20   */
21  public class CorrelationStats {
22  
23     private static DoubleMatrix2D correlationPvalLookup;
24     private static final double BINSIZE = 0.005; // resolution of correlation.
25     // Differences smaller than this
26     // are considered meaningless.
27     private static final double STEPSIZE = BINSIZE * 2; // this MUST be more than
28     // the binsize.
29     private static final int MAXCOUNT = 1000; // maximum number of things.
30     private static final double PVALCHOP = 8.0; // value by which log(pvalues)
31     // are scaled before storing as
32     // bytes. Values less than
33     // 10^e-256/PVALCHOP are
34     // 'clipped'.
35  
36     static {
37        int numbins = ( int ) Math.ceil( 1.0 / BINSIZE );
38        correlationPvalLookup = new SparseDoubleMatrix2D( numbins, MAXCOUNT + 1 );
39     }
40  
41     /***
42      * @param correl Pearson correlation.
43      * @param count Number of items used to calculate the correlation. NOT the degrees of freedom.
44      * @return double
45      */
46     public static double pvalue( double correl, int count ) {
47  
48        double acorrel = Math.abs( correl );
49  
50        if ( acorrel == 1.0 ) {
51           return 0.0;
52        }
53  
54        if ( acorrel == 0.0 ) {
55           return 1.0;
56        }
57  
58        int dof = count - 2;
59  
60        if ( dof <= 0 ) {
61           return 1.0;
62        }
63  
64        int bin = ( int ) Math.ceil( acorrel / BINSIZE );
65        if ( count <= MAXCOUNT
66              && correlationPvalLookup.getQuick( bin, dof ) != 0.0 ) {
67           return correlationPvalLookup.getQuick( bin, dof );
68        }
69        double t = correlationTstat( acorrel, dof );
70        double p = Probability.studentT( dof, -t );
71        if ( count < MAXCOUNT ) {
72           correlationPvalLookup.setQuick( bin, dof, p );
73        }
74        return p;
75  
76     }
77  
78     /***
79      * @param correl double
80      * @return int
81      */
82     public static int correlAsByte( double correl ) {
83        if ( correl == -1.0 ) {
84           return 0;
85        }
86  
87        return ( int ) ( Math.ceil( ( correl + 1.0 ) * 128 ) - 1 );
88     }
89  
90     /***
91      * Reverse the Fisher z-transform of correlations.
92      * 
93      * @param r
94      * @return
95      */
96     public static double unFisherTransform( double r ) {
97        return Math.exp( 2.0 * r - 1.0 ) / Math.exp( 2.0 * r + 1.0 );
98     }
99  
100    /***
101     * Compute the Fisher z transform of the Pearson correlation.
102     * 
103     * @param r Correlation coefficient.
104     * @return Fisher transform of the Correlation.
105     */
106    public static double fisherTransform( double r ) {
107       if ( !isValidPearsonCorrelation( r ) ) {
108          throw new IllegalArgumentException( "Invalid correlation " + r );
109       }
110       
111       return 0.5 * Math.log( ( 1.0 + r ) / ( 1.0 - r ) );
112    }
113 
114    /***
115     * Fisher-transform a list of correlations.
116     * 
117     * @param e
118     * @return
119     */
120    public static DoubleArrayList fisherTransform( DoubleArrayList e ) {
121       DoubleArrayList r = new DoubleArrayList( e.size() );
122       for ( int i = 0; i < e.size(); i++ ) {
123          r.add( CorrelationStats.fisherTransform( e.getQuick( i ) ) );
124       }
125       return r;
126    }
127 
128    /***
129     * Conver a correlation p value into a value between 0 and 255 inclusive. This is done by taking the log, multiplying
130     * it by a fixed value (currently 8). This means that pvalues less than 10^-32 are rounded to 10^-32.
131     * 
132     * @param correl double
133     * @param count int
134     * @return int
135     */
136    public static int pvalueAsByte( double correl, int count ) {
137       int p = -( int ) Math.floor( PVALCHOP
138             * Arithmetic.log10( pvalue( correl, count ) ) );
139 
140       if ( p < 0 ) {
141          return 0;
142       } else if ( p > 255 ) {
143          return 255;
144       }
145       return p;
146    }
147 
148    /***
149     * @param pvalByte int
150     * @return double
151     */
152    public static double byteToPvalue( int pvalByte ) {
153       return Math.pow( 10.0, -( double ) pvalByte / PVALCHOP );
154    }
155 
156    /***
157     * @param correlByte int
158     * @return double
159     */
160    public static double byteToCorrel( int correlByte ) {
161       return correlByte / 128.0 - 1.0;
162    }
163 
164    /***
165     * Compute the t-statistic associated with a Pearson correlation.
166     * 
167     * @param correl Pearson correlation
168     * @param dof Degrees of freedom (n - 2)
169     * @return double
170     */
171    public static double correlationTstat( double correl, int dof ) {
172       return correl / Math.sqrt( ( 1.0 - correl * correl ) / dof );
173    }
174 
175    /***
176     * Statistical comparison of two correlations. Assumes data are bivariate normal. Null hypothesis is that the two
177     * correlations are equal. See Zar (Biostatistics)
178     * 
179     * @param correl1 First correlation
180     * @param n1 Number of values used to compute correl1
181     * @param correl2 Second correlation
182     * @param n2 Number of values used to compute correl2
183     * @return double p value.
184     */
185    public static double compare( double correl1, int n1, double correl2, int n2 ) {
186 
187       double Z;
188       double sigma;
189       double p;
190 
191       sigma = Math.sqrt( ( 1 / ( ( double ) n1 - 3 ) )
192             + ( 1 / ( ( double ) n2 - 3 ) ) );
193 
194       Z = Math.abs( correl1 - correl2 ) / sigma;
195 
196       p = Probability.normal( -Z ); // upper tail.
197 
198       if ( p > 0.5 ) {
199          return 1.0 - p;
200       }
201       return p;
202    }
203 
204    /***
205     * Find the approximate correlation required to meet a particular pvalue. This works by simple gradient descent.
206     * 
207     * @param pval double
208     * @param count int
209     * @return double
210     */
211    public static double correlationForPvalue( double pval, int count ) {
212       double stop = pval / 100.0;
213       double err = 1.0;
214       double corrguess = 1.0;
215       double step = STEPSIZE;
216       double preverr = 0.0;
217       int maxiter = 1000;
218       int iter = 0;
219       while ( Math.abs( err ) > stop && step >= BINSIZE ) {
220          double guess = pvalue( corrguess, count );
221          if ( guess > pval ) {
222             corrguess += step;
223          } else {
224             corrguess -= step;
225          }
226 
227          if ( preverr * err < 0 ) { // opposite signs. Means we missed. Make
228             // step smaller and keep going.
229             step /= 2;
230          }
231 
232          preverr = err;
233          err = pval - guess;
234          iter++;
235 
236          if ( iter > maxiter ) {
237             throw new IllegalStateException( "Too many iterations" );
238          }
239       }
240       return ( corrguess );
241    }
242 
243    /***
244     * Test if a value is a reasonable Pearson correlation (in the range -1 to 1; values outside of this
245     * range are acceptable within a small roundoff.
246     * @param r
247     * @return
248     */
249    public static boolean isValidPearsonCorrelation( double r ) {
250       return ( r + Constants.SMALL >= -1.0 && r - Constants.SMALL <= 1.0 );
251    }
252 
253 }