View Javadoc

1   package baseCode.math;
2   
3   import cern.colt.list.DoubleArrayList;
4   import cern.jet.stat.Descriptive;
5   
6   /***
7    * Miscellaneous functions used for statistical analysis. Some are optimized or specialized versions of methods that can
8    * be found elsewhere.
9    * 
10   * @see <a href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/math/package-summary.html">cern.jet.math
11   *      </a>
12   * @see <a href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/package-summary.html">cern.jet.stat
13   *      </a>
14   *      <p>
15   *      Copyright (c) 2004
16   *      </p>
17   *      <p>
18   *      Columbia University
19   *      </p>
20   * @author Paul Pavlidis
21   * @version $Id: Stats.java,v 1.10 2004/07/27 03:18:58 pavlidis Exp $
22   */
23  public class Stats {
24  
25     private Stats() { /* block instantiation */
26     };
27  
28     /***
29      * Test whether a value is a valid fractional or probability value.
30      * 
31      * @param value
32      * @return true if the value is in the interval 0 to 1.
33      */
34     public static boolean isValidFraction( double value ) {
35        if ( value > 1.0 || value < 0.0 ) {
36           return false;
37        }
38        return true;
39     }
40  
41     /***
42      * Compute the coefficient of variation of an array (standard deviation / mean)
43      * 
44      * @param data DoubleArrayList
45      * @return the cv
46      * @todo offer a regularized version of this function.
47      */
48     public static double cv( DoubleArrayList data ) {
49        double mean = DescriptiveWithMissing.mean( data );
50        return mean
51              / Math.sqrt( DescriptiveWithMissing.sampleVariance( data, mean ) );
52     }
53  
54     /***
55      * Convert an array into a cumulative array. Summing is from the left hand side. Use this to make CDFs where the
56      * concern is the left tail.
57      * 
58      * @param x DoubleArrayList
59      * @return cern.colt.list.DoubleArrayList
60      */
61     public static DoubleArrayList cumulate( DoubleArrayList x ) {
62        if ( x.size() == 0 ) {
63           return new DoubleArrayList( 0 );
64        }
65  
66        DoubleArrayList r = new DoubleArrayList();
67  
68        double sum = 0.0;
69        for ( int i = 0; i < x.size(); i++ ) {
70           sum += x.get( i );
71           r.add( sum );
72        }
73        return r;
74     }
75  
76     /***
77      * Convert an array into a cumulative array. Summing is from the right hand side. This is useful for creating
78      * upper-tail cumulative density histograms from count histograms, where the upper tail is expected to have very
79      * small numbers that could be lost to rounding.
80      * 
81      * @param x the array of data to be cumulated.
82      * @return cern.colt.list.DoubleArrayList
83      */
84     public static DoubleArrayList cumulateRight( DoubleArrayList x ) {
85        if ( x.size() == 0 ) {
86           return new DoubleArrayList( 0 );
87        }
88  
89        DoubleArrayList r = new DoubleArrayList( new double[x.size()] );
90  
91        double sum = 0.0;
92        for ( int i = x.size() - 1; i >= 0; i-- ) {
93           sum += x.get( i );
94           r.set( i, sum );
95        }
96        return r;
97     }
98  
99     /***
100     * Convert an array into a cumulative density function (CDF). This assumes that the input contains counts
101     * representing the distribution in question.
102     * 
103     * @param x The input of counts (i.e. a histogram).
104     * @return DoubleArrayList the CDF.
105     */
106    public static DoubleArrayList cdf( DoubleArrayList x ) {
107       return cumulateRight( normalize( x ) );
108    }
109 
110    /***
111     * Divide the elements of an array by a given factor.
112     * 
113     * @param x Input array.
114     * @param normfactor double
115     * @return Normalized array.
116     */
117    public static DoubleArrayList normalize( DoubleArrayList x, double normfactor ) {
118       if ( x.size() == 0 ) {
119          return new DoubleArrayList( 0 );
120       }
121 
122       DoubleArrayList r = new DoubleArrayList();
123 
124       for ( int i = 0; i < x.size(); i++ ) {
125          r.add( x.get( i ) / normfactor );
126       }
127       return r;
128 
129    }
130 
131    /***
132     * Adjust the elements of an array so they total to 1.0.
133     * 
134     * @param x Input array.
135     * @return Normalized array.
136     */
137    public static DoubleArrayList normalize( DoubleArrayList x ) {
138       return normalize( x, Descriptive.sum( x ) );
139    }
140 
141    /***
142     * calculate the mean of the values above (NOT greater or equal to) a particular index rank of an array. Quantile
143     * must be a value from 0 to 100.
144     * 
145     * @see DescriptiveWithMissing#meanAboveQuantile
146     * @param index the rank of the value we wish to average above.
147     * @param array Array for which we want to get the quantile.
148     * @param effectiveSize The size of the array, not including NaNs.
149     * @return double
150     */
151    public static double meanAboveQuantile( int index, double[] array,
152          int effectiveSize ) {
153 
154       double[] temp = new double[effectiveSize];
155       double median;
156       double returnvalue = 0.0;
157       int k = 0;
158 
159       temp = array;
160       median = quantile( index, array, effectiveSize );
161 
162       for ( int i = 0; i < effectiveSize; i++ ) {
163          if ( temp[i] > median ) {
164             returnvalue += temp[i];
165             k++;
166          }
167       }
168       return ( returnvalue / k );
169    }
170 
171    /***
172     * Compute the range of an array.
173     * 
174     * @param data DoubleArrayList
175     * @return double
176     */
177    public static double range( DoubleArrayList data ) {
178       return Descriptive.max( data ) - Descriptive.min( data );
179    }
180 
181    /***
182     * Given a double array, calculate the quantile requested. Note that no interpolation is done.
183     * 
184     * @see DescriptiveWithMissing#quantile
185     * @param index - the rank of the value we wish to get. Thus if we have 200 items in the array, and want the median,
186     *        we should enter 100.
187     * @param values double[] - array of data we want quantile of
188     * @param effectiveSize int the effective size of the array
189     * @return double the value at the requested quantile
190     */
191    public static double quantile( int index, double[] values, int effectiveSize ) {
192       double pivot = -1.0;
193       if ( index == 0 ) {
194          double ans = values[0];
195          for ( int i = 1; i < effectiveSize; i++ ) {
196             if ( ans > values[i] ) {
197                ans = values[i];
198             }
199          }
200          return ans;
201       }
202 
203       double[] temp = new double[effectiveSize];
204 
205       for ( int i = 0; i < effectiveSize; i++ ) {
206          temp[i] = values[i];
207       }
208 
209       pivot = temp[0];
210 
211       double[] smaller = new double[effectiveSize];
212       double[] bigger = new double[effectiveSize];
213       int itrSm = 0;
214       int itrBg = 0;
215       for ( int i = 1; i < effectiveSize; i++ ) {
216          if ( temp[i] <= pivot ) {
217             smaller[itrSm] = temp[i];
218             itrSm++;
219          } else if ( temp[i] > pivot ) {
220             bigger[itrBg] = temp[i];
221             itrBg++;
222          }
223       }
224       if ( itrSm > index ) { // quantile must be in the 'smaller' array
225          return quantile( index, smaller, itrSm );
226       } else if ( itrSm < index ) { // quantile is in the 'bigger' array
227          return quantile( index - itrSm - 1, bigger, itrBg );
228       } else {
229          return pivot;
230       }
231 
232    }
233 
234 }