|
|||||||||||||||||||
| 30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover | |||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| CorrelationStats.java | 10% | 13.2% | 25% | 13.6% |
|
||||||||||||||
| 1 |
package baseCode.math;
|
|
| 2 |
|
|
| 3 |
import cern.colt.list.DoubleArrayList;
|
|
| 4 |
import cern.colt.matrix.DoubleMatrix2D;
|
|
| 5 |
import cern.colt.matrix.impl.SparseDoubleMatrix2D;
|
|
| 6 |
import cern.jet.math.Arithmetic;
|
|
| 7 |
import cern.jet.stat.Probability;
|
|
| 8 |
|
|
| 9 |
/**
|
|
| 10 |
* Statistical evaluation and transformation tools for correlations.
|
|
| 11 |
* <p>
|
|
| 12 |
* Copyright (c) 2004
|
|
| 13 |
* </p>
|
|
| 14 |
* <p>
|
|
| 15 |
* Institution:: Columbia University
|
|
| 16 |
* </p>
|
|
| 17 |
*
|
|
| 18 |
* @author Paul Pavlidis
|
|
| 19 |
* @version $Id: CorrelationStats.java,v 1.11 2005/01/05 17:59:19 pavlidis Exp $
|
|
| 20 |
*/
|
|
| 21 |
public class CorrelationStats { |
|
| 22 |
|
|
| 23 |
private static DoubleMatrix2D correlationPvalLookup; |
|
| 24 |
private static final double BINSIZE = 0.005; // resolution of correlation. |
|
| 25 |
// Differences smaller than this
|
|
| 26 |
// are considered meaningless.
|
|
| 27 |
private static final double STEPSIZE = BINSIZE * 2; // this MUST be more than |
|
| 28 |
// the binsize.
|
|
| 29 |
private static final int MAXCOUNT = 1000; // maximum number of things. |
|
| 30 |
private static final double PVALCHOP = 8.0; // value by which log(pvalues) |
|
| 31 |
// are scaled before storing as
|
|
| 32 |
// bytes. Values less than
|
|
| 33 |
// 10^e-256/PVALCHOP are
|
|
| 34 |
// 'clipped'.
|
|
| 35 |
|
|
| 36 |
static {
|
|
| 37 | 1 |
int numbins = ( int ) Math.ceil( 1.0 / BINSIZE ); |
| 38 | 1 |
correlationPvalLookup = new SparseDoubleMatrix2D( numbins, MAXCOUNT + 1 );
|
| 39 |
} |
|
| 40 |
|
|
| 41 |
/**
|
|
| 42 |
* @param correl Pearson correlation.
|
|
| 43 |
* @param count Number of items used to calculate the correlation. NOT the degrees of freedom.
|
|
| 44 |
* @return double
|
|
| 45 |
*/
|
|
| 46 | 0 |
public static double pvalue( double correl, int count ) { |
| 47 |
|
|
| 48 | 0 |
double acorrel = Math.abs( correl );
|
| 49 |
|
|
| 50 | 0 |
if ( acorrel == 1.0 ) {
|
| 51 | 0 |
return 0.0;
|
| 52 |
} |
|
| 53 |
|
|
| 54 | 0 |
if ( acorrel == 0.0 ) {
|
| 55 | 0 |
return 1.0;
|
| 56 |
} |
|
| 57 |
|
|
| 58 | 0 |
int dof = count - 2;
|
| 59 |
|
|
| 60 | 0 |
if ( dof <= 0 ) {
|
| 61 | 0 |
return 1.0;
|
| 62 |
} |
|
| 63 |
|
|
| 64 | 0 |
int bin = ( int ) Math.ceil( acorrel / BINSIZE ); |
| 65 | 0 |
if ( count <= MAXCOUNT
|
| 66 |
&& correlationPvalLookup.getQuick( bin, dof ) != 0.0 ) {
|
|
| 67 | 0 |
return correlationPvalLookup.getQuick( bin, dof );
|
| 68 |
} |
|
| 69 | 0 |
double t = correlationTstat( acorrel, dof );
|
| 70 | 0 |
double p = Probability.studentT( dof, -t );
|
| 71 | 0 |
if ( count < MAXCOUNT ) {
|
| 72 | 0 |
correlationPvalLookup.setQuick( bin, dof, p ); |
| 73 |
} |
|
| 74 | 0 |
return p;
|
| 75 |
|
|
| 76 |
} |
|
| 77 |
|
|
| 78 |
/**
|
|
| 79 |
* @param correl double
|
|
| 80 |
* @return int
|
|
| 81 |
*/
|
|
| 82 | 0 |
public static int correlAsByte( double correl ) { |
| 83 | 0 |
if ( correl == -1.0 ) {
|
| 84 | 0 |
return 0;
|
| 85 |
} |
|
| 86 |
|
|
| 87 | 0 |
return ( int ) ( Math.ceil( ( correl + 1.0 ) * 128 ) - 1 ); |
| 88 |
} |
|
| 89 |
|
|
| 90 |
/**
|
|
| 91 |
* Reverse the Fisher z-transform of correlations.
|
|
| 92 |
*
|
|
| 93 |
* @param r
|
|
| 94 |
* @return
|
|
| 95 |
*/
|
|
| 96 | 0 |
public static double unFisherTransform( double r ) { |
| 97 | 0 |
return Math.exp( 2.0 * r - 1.0 ) / Math.exp( 2.0 * r + 1.0 );
|
| 98 |
} |
|
| 99 |
|
|
| 100 |
/**
|
|
| 101 |
* Compute the Fisher z transform of the Pearson correlation.
|
|
| 102 |
*
|
|
| 103 |
* @param r Correlation coefficient.
|
|
| 104 |
* @return Fisher transform of the Correlation.
|
|
| 105 |
*/
|
|
| 106 | 100 |
public static double fisherTransform( double r ) { |
| 107 | 100 |
if ( !isValidPearsonCorrelation( r ) ) {
|
| 108 | 0 |
throw new IllegalArgumentException( "Invalid correlation " + r ); |
| 109 |
} |
|
| 110 |
|
|
| 111 | 100 |
return 0.5 * Math.log( ( 1.0 + r ) / ( 1.0 - r ) );
|
| 112 |
} |
|
| 113 |
|
|
| 114 |
/**
|
|
| 115 |
* Fisher-transform a list of correlations.
|
|
| 116 |
*
|
|
| 117 |
* @param e
|
|
| 118 |
* @return
|
|
| 119 |
*/
|
|
| 120 | 5 |
public static DoubleArrayList fisherTransform( DoubleArrayList e ) { |
| 121 | 5 |
DoubleArrayList r = new DoubleArrayList( e.size() );
|
| 122 | 5 |
for ( int i = 0; i < e.size(); i++ ) { |
| 123 | 100 |
r.add( CorrelationStats.fisherTransform( e.getQuick( i ) ) ); |
| 124 |
} |
|
| 125 | 5 |
return r;
|
| 126 |
} |
|
| 127 |
|
|
| 128 |
/**
|
|
| 129 |
* Conver a correlation p value into a value between 0 and 255 inclusive. This is done by taking the log, multiplying
|
|
| 130 |
* it by a fixed value (currently 8). This means that pvalues less than 10^-32 are rounded to 10^-32.
|
|
| 131 |
*
|
|
| 132 |
* @param correl double
|
|
| 133 |
* @param count int
|
|
| 134 |
* @return int
|
|
| 135 |
*/
|
|
| 136 | 0 |
public static int pvalueAsByte( double correl, int count ) { |
| 137 | 0 |
int p = -( int ) Math.floor( PVALCHOP |
| 138 |
* Arithmetic.log10( pvalue( correl, count ) ) ); |
|
| 139 |
|
|
| 140 | 0 |
if ( p < 0 ) {
|
| 141 | 0 |
return 0;
|
| 142 | 0 |
} else if ( p > 255 ) { |
| 143 | 0 |
return 255;
|
| 144 |
} |
|
| 145 | 0 |
return p;
|
| 146 |
} |
|
| 147 |
|
|
| 148 |
/**
|
|
| 149 |
* @param pvalByte int
|
|
| 150 |
* @return double
|
|
| 151 |
*/
|
|
| 152 | 0 |
public static double byteToPvalue( int pvalByte ) { |
| 153 | 0 |
return Math.pow( 10.0, -( double ) pvalByte / PVALCHOP ); |
| 154 |
} |
|
| 155 |
|
|
| 156 |
/**
|
|
| 157 |
* @param correlByte int
|
|
| 158 |
* @return double
|
|
| 159 |
*/
|
|
| 160 | 0 |
public static double byteToCorrel( int correlByte ) { |
| 161 | 0 |
return correlByte / 128.0 - 1.0;
|
| 162 |
} |
|
| 163 |
|
|
| 164 |
/**
|
|
| 165 |
* Compute the t-statistic associated with a Pearson correlation.
|
|
| 166 |
*
|
|
| 167 |
* @param correl Pearson correlation
|
|
| 168 |
* @param dof Degrees of freedom (n - 2)
|
|
| 169 |
* @return double
|
|
| 170 |
*/
|
|
| 171 | 0 |
public static double correlationTstat( double correl, int dof ) { |
| 172 | 0 |
return correl / Math.sqrt( ( 1.0 - correl * correl ) / dof );
|
| 173 |
} |
|
| 174 |
|
|
| 175 |
/**
|
|
| 176 |
* Statistical comparison of two correlations. Assumes data are bivariate normal. Null hypothesis is that the two
|
|
| 177 |
* correlations are equal. See Zar (Biostatistics)
|
|
| 178 |
*
|
|
| 179 |
* @param correl1 First correlation
|
|
| 180 |
* @param n1 Number of values used to compute correl1
|
|
| 181 |
* @param correl2 Second correlation
|
|
| 182 |
* @param n2 Number of values used to compute correl2
|
|
| 183 |
* @return double p value.
|
|
| 184 |
*/
|
|
| 185 | 0 |
public static double compare( double correl1, int n1, double correl2, int n2 ) { |
| 186 |
|
|
| 187 | 0 |
double Z;
|
| 188 | 0 |
double sigma;
|
| 189 | 0 |
double p;
|
| 190 |
|
|
| 191 | 0 |
sigma = Math.sqrt( ( 1 / ( ( double ) n1 - 3 ) )
|
| 192 |
+ ( 1 / ( ( double ) n2 - 3 ) ) );
|
|
| 193 |
|
|
| 194 | 0 |
Z = Math.abs( correl1 - correl2 ) / sigma; |
| 195 |
|
|
| 196 | 0 |
p = Probability.normal( -Z ); // upper tail.
|
| 197 |
|
|
| 198 | 0 |
if ( p > 0.5 ) {
|
| 199 | 0 |
return 1.0 - p;
|
| 200 |
} |
|
| 201 | 0 |
return p;
|
| 202 |
} |
|
| 203 |
|
|
| 204 |
/**
|
|
| 205 |
* Find the approximate correlation required to meet a particular pvalue. This works by simple gradient descent.
|
|
| 206 |
*
|
|
| 207 |
* @param pval double
|
|
| 208 |
* @param count int
|
|
| 209 |
* @return double
|
|
| 210 |
*/
|
|
| 211 | 0 |
public static double correlationForPvalue( double pval, int count ) { |
| 212 | 0 |
double stop = pval / 100.0;
|
| 213 | 0 |
double err = 1.0;
|
| 214 | 0 |
double corrguess = 1.0;
|
| 215 | 0 |
double step = STEPSIZE;
|
| 216 | 0 |
double preverr = 0.0;
|
| 217 | 0 |
int maxiter = 1000;
|
| 218 | 0 |
int iter = 0;
|
| 219 | 0 |
while ( Math.abs( err ) > stop && step >= BINSIZE ) {
|
| 220 | 0 |
double guess = pvalue( corrguess, count );
|
| 221 | 0 |
if ( guess > pval ) {
|
| 222 | 0 |
corrguess += step; |
| 223 |
} else {
|
|
| 224 | 0 |
corrguess -= step; |
| 225 |
} |
|
| 226 |
|
|
| 227 | 0 |
if ( preverr * err < 0 ) { // opposite signs. Means we missed. Make |
| 228 |
// step smaller and keep going.
|
|
| 229 | 0 |
step /= 2; |
| 230 |
} |
|
| 231 |
|
|
| 232 | 0 |
preverr = err; |
| 233 | 0 |
err = pval - guess; |
| 234 | 0 |
iter++; |
| 235 |
|
|
| 236 | 0 |
if ( iter > maxiter ) {
|
| 237 | 0 |
throw new IllegalStateException( "Too many iterations" ); |
| 238 |
} |
|
| 239 |
} |
|
| 240 | 0 |
return ( corrguess );
|
| 241 |
} |
|
| 242 |
|
|
| 243 |
/**
|
|
| 244 |
* Test if a value is a reasonable Pearson correlation (in the range -1 to 1; values outside of this
|
|
| 245 |
* range are acceptable within a small roundoff.
|
|
| 246 |
* @param r
|
|
| 247 |
* @return
|
|
| 248 |
*/
|
|
| 249 | 180 |
public static boolean isValidPearsonCorrelation( double r ) { |
| 250 | 180 |
return ( r + Constants.SMALL >= -1.0 && r - Constants.SMALL <= 1.0 );
|
| 251 |
} |
|
| 252 |
|
|
| 253 |
} |
|
||||||||||