1 package baseCode.math;
2
3 import cern.colt.list.DoubleArrayList;
4 import cern.jet.stat.Descriptive;
5
6 /***
7 * Miscellaneous functions used for statistical analysis. Some are optimized or specialized versions of methods that can
8 * be found elsewhere.
9 *
10 * @see <a href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/math/package-summary.html">cern.jet.math
11 * </a>
12 * @see <a href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/package-summary.html">cern.jet.stat
13 * </a>
14 * <p>
15 * Copyright (c) 2004
16 * </p>
17 * <p>
18 * Columbia University
19 * </p>
20 * @author Paul Pavlidis
21 * @version $Id: Stats.java,v 1.10 2004/07/27 03:18:58 pavlidis Exp $
22 */
23 public class Stats {
24
25 private Stats() {
26 };
27
28 /***
29 * Test whether a value is a valid fractional or probability value.
30 *
31 * @param value
32 * @return true if the value is in the interval 0 to 1.
33 */
34 public static boolean isValidFraction( double value ) {
35 if ( value > 1.0 || value < 0.0 ) {
36 return false;
37 }
38 return true;
39 }
40
41 /***
42 * Compute the coefficient of variation of an array (standard deviation / mean)
43 *
44 * @param data DoubleArrayList
45 * @return the cv
46 * @todo offer a regularized version of this function.
47 */
48 public static double cv( DoubleArrayList data ) {
49 double mean = DescriptiveWithMissing.mean( data );
50 return mean
51 / Math.sqrt( DescriptiveWithMissing.sampleVariance( data, mean ) );
52 }
53
54 /***
55 * Convert an array into a cumulative array. Summing is from the left hand side. Use this to make CDFs where the
56 * concern is the left tail.
57 *
58 * @param x DoubleArrayList
59 * @return cern.colt.list.DoubleArrayList
60 */
61 public static DoubleArrayList cumulate( DoubleArrayList x ) {
62 if ( x.size() == 0 ) {
63 return new DoubleArrayList( 0 );
64 }
65
66 DoubleArrayList r = new DoubleArrayList();
67
68 double sum = 0.0;
69 for ( int i = 0; i < x.size(); i++ ) {
70 sum += x.get( i );
71 r.add( sum );
72 }
73 return r;
74 }
75
76 /***
77 * Convert an array into a cumulative array. Summing is from the right hand side. This is useful for creating
78 * upper-tail cumulative density histograms from count histograms, where the upper tail is expected to have very
79 * small numbers that could be lost to rounding.
80 *
81 * @param x the array of data to be cumulated.
82 * @return cern.colt.list.DoubleArrayList
83 */
84 public static DoubleArrayList cumulateRight( DoubleArrayList x ) {
85 if ( x.size() == 0 ) {
86 return new DoubleArrayList( 0 );
87 }
88
89 DoubleArrayList r = new DoubleArrayList( new double[x.size()] );
90
91 double sum = 0.0;
92 for ( int i = x.size() - 1; i >= 0; i-- ) {
93 sum += x.get( i );
94 r.set( i, sum );
95 }
96 return r;
97 }
98
99 /***
100 * Convert an array into a cumulative density function (CDF). This assumes that the input contains counts
101 * representing the distribution in question.
102 *
103 * @param x The input of counts (i.e. a histogram).
104 * @return DoubleArrayList the CDF.
105 */
106 public static DoubleArrayList cdf( DoubleArrayList x ) {
107 return cumulateRight( normalize( x ) );
108 }
109
110 /***
111 * Divide the elements of an array by a given factor.
112 *
113 * @param x Input array.
114 * @param normfactor double
115 * @return Normalized array.
116 */
117 public static DoubleArrayList normalize( DoubleArrayList x, double normfactor ) {
118 if ( x.size() == 0 ) {
119 return new DoubleArrayList( 0 );
120 }
121
122 DoubleArrayList r = new DoubleArrayList();
123
124 for ( int i = 0; i < x.size(); i++ ) {
125 r.add( x.get( i ) / normfactor );
126 }
127 return r;
128
129 }
130
131 /***
132 * Adjust the elements of an array so they total to 1.0.
133 *
134 * @param x Input array.
135 * @return Normalized array.
136 */
137 public static DoubleArrayList normalize( DoubleArrayList x ) {
138 return normalize( x, Descriptive.sum( x ) );
139 }
140
141 /***
142 * calculate the mean of the values above (NOT greater or equal to) a particular index rank of an array. Quantile
143 * must be a value from 0 to 100.
144 *
145 * @see DescriptiveWithMissing#meanAboveQuantile
146 * @param index the rank of the value we wish to average above.
147 * @param array Array for which we want to get the quantile.
148 * @param effectiveSize The size of the array, not including NaNs.
149 * @return double
150 */
151 public static double meanAboveQuantile( int index, double[] array,
152 int effectiveSize ) {
153
154 double[] temp = new double[effectiveSize];
155 double median;
156 double returnvalue = 0.0;
157 int k = 0;
158
159 temp = array;
160 median = quantile( index, array, effectiveSize );
161
162 for ( int i = 0; i < effectiveSize; i++ ) {
163 if ( temp[i] > median ) {
164 returnvalue += temp[i];
165 k++;
166 }
167 }
168 return ( returnvalue / k );
169 }
170
171 /***
172 * Compute the range of an array.
173 *
174 * @param data DoubleArrayList
175 * @return double
176 */
177 public static double range( DoubleArrayList data ) {
178 return Descriptive.max( data ) - Descriptive.min( data );
179 }
180
181 /***
182 * Given a double array, calculate the quantile requested. Note that no interpolation is done.
183 *
184 * @see DescriptiveWithMissing#quantile
185 * @param index - the rank of the value we wish to get. Thus if we have 200 items in the array, and want the median,
186 * we should enter 100.
187 * @param values double[] - array of data we want quantile of
188 * @param effectiveSize int the effective size of the array
189 * @return double the value at the requested quantile
190 */
191 public static double quantile( int index, double[] values, int effectiveSize ) {
192 double pivot = -1.0;
193 if ( index == 0 ) {
194 double ans = values[0];
195 for ( int i = 1; i < effectiveSize; i++ ) {
196 if ( ans > values[i] ) {
197 ans = values[i];
198 }
199 }
200 return ans;
201 }
202
203 double[] temp = new double[effectiveSize];
204
205 for ( int i = 0; i < effectiveSize; i++ ) {
206 temp[i] = values[i];
207 }
208
209 pivot = temp[0];
210
211 double[] smaller = new double[effectiveSize];
212 double[] bigger = new double[effectiveSize];
213 int itrSm = 0;
214 int itrBg = 0;
215 for ( int i = 1; i < effectiveSize; i++ ) {
216 if ( temp[i] <= pivot ) {
217 smaller[itrSm] = temp[i];
218 itrSm++;
219 } else if ( temp[i] > pivot ) {
220 bigger[itrBg] = temp[i];
221 itrBg++;
222 }
223 }
224 if ( itrSm > index ) {
225 return quantile( index, smaller, itrSm );
226 } else if ( itrSm < index ) {
227 return quantile( index - itrSm - 1, bigger, itrBg );
228 } else {
229 return pivot;
230 }
231
232 }
233
234 }