1 package baseCode.math;
2
3 import cern.jet.stat.Gamma;
4
5 /***
6 * Assorted special functions, primarily concerning probability distributions. For cumBinomial use
7 * cern.jet.stat.Probability.binomial.
8 * <p>
9 * Mostly ported from the R source tree (dhyper.c etc.), much due to Catherine Loader.
10 * <p>
11 *
12 * @see <a href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/Gamma.html">cern.jet.stat.gamma </a>
13 * @see <a
14 * href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/math/Arithmetic.html">cern.jet.math.arithmetic
15 * </a>
16 * <p>
17 * Copyright (c) 2004 Columbia University
18 * @author Paul Pavlidis
19 * @version $Id: SpecFunc.java,v 1.7 2004/12/24 23:16:09 pavlidis Exp $
20 */
21 public class SpecFunc {
22
23 /***
24 * Ported from R phyper.c
25 * <p>
26 * Sample of n balls from NR red and NB black ones; x are red
27 * <p>
28 *
29 * @param x - number of reds retrieved == successes
30 * @param NR - number of reds in the urn. == positives
31 * @param NB - number of blacks in the urn == negatives
32 * @param n - the total number of objects drawn == successes + failures
33 * @param lowerTail
34 * @return cumulative hypergeometric distribution.
35 */
36 public static double phyper( int x, int NR, int NB, int n, boolean lowerTail ) {
37
38 double d, pd;
39
40 if ( NR < 0 || NB < 0 || n < 0 || n > NR + NB ) {
41 throw new IllegalArgumentException();
42 }
43
44 if ( x * ( NR + NB ) > n * NR ) {
45
46 int oldNB = NB;
47 NB = NR;
48 NR = oldNB;
49 x = n - x - 1;
50 lowerTail = !lowerTail;
51 }
52
53 if ( x < 0 ) return 0.0;
54
55 d = dhyper( x, NR, NB, n );
56 pd = pdhyper( x, NR, NB, n );
57
58 return lowerTail ? d * pd : 1.0 - ( d * pd );
59 }
60
61 /***
62 * Ported from R (Catherine Loader)
63 * <p>
64 * DESCRIPTION
65 * <p>
66 * Given a sequence of r successes and b failures, we sample n (\le b+r) items without replacement. The
67 * hypergeometric probability is the probability of x successes:
68 *
69 * <pre>
70 *
71 *
72 *
73 *
74 *
75 *
76 *
77 *
78 *
79 *
80 * choose(r, x) * choose(b, n-x)
81 * p(x; r,b,n) = ----------------------------- =
82 * choose(r+b, n)
83 *
84 * dbinom(x,r,p) * dbinom(n-x,b,p)
85 * = --------------------------------
86 * dbinom(n,r+b,p)
87 *
88 *
89 *
90 *
91 *
92 *
93 *
94 *
95 *
96 *
97 * </pre>
98 *
99 * for any p. For numerical stability, we take p=n/(r+b); with this choice, the denominator is not exponentially
100 * small.
101 */
102 public static double dhyper( int x, int r, int b, int n ) {
103 double p, q, p1, p2, p3;
104
105 if ( r < 0 || b < 0 || n < 0 || n > r + b )
106 throw new IllegalArgumentException();
107
108 if ( x < 0 ) return 0.0;
109
110 if ( n < x || r < x || n - x > b ) return 0;
111 if ( n == 0 ) return ( ( x == 0 ) ? 1 : 0 );
112
113 p = ( ( double ) n ) / ( ( double ) ( r + b ) );
114 q = ( ( double ) ( r + b - n ) ) / ( ( double ) ( r + b ) );
115
116 p1 = dbinom_raw( x, r, p, q );
117 p2 = dbinom_raw( n - x, b, p, q );
118 p3 = dbinom_raw( n, r + b, p, q );
119
120 return p1 * p2 / p3;
121 }
122
123 /***
124 * See dbinom_raw.
125 * <hr>
126 *
127 * @param x Number of successes
128 * @param n Number of trials
129 * @param p Probability of success
130 * @return
131 */
132 public static double dbinom( double x, double n, double p ) {
133
134 if ( p < 0 || p > 1 || n < 0 ) throw new IllegalArgumentException();
135
136 return dbinom_raw( x, n, p, 1 - p );
137 }
138
139 /***
140 * Ported from R phyper.c
141 * <p>
142 * Calculate
143 *
144 * <pre>
145 *
146 *
147 *
148 * phyper (x, NR, NB, n, TRUE, FALSE)
149 * [log] ----------------------------------
150 * dhyper (x, NR, NB, n, FALSE)
151 *
152 *
153 *
154 * </pre>
155 *
156 * without actually calling phyper. This assumes that
157 *
158 * <pre>
159 * x * ( NR + NB ) <= n * NR
160 * </pre>
161 *
162 * <hr>
163 *
164 * @param x - number of reds retrieved == successes
165 * @param NR - number of reds in the urn. == positives
166 * @param NB - number of blacks in the urn == negatives
167 * @param n - the total number of objects drawn == successes + failures
168 */
169 private static double pdhyper( int x, int NR, int NB, int n ) {
170 double sum = 0.0;
171 double term = 1.0;
172
173 while ( x > 0.0 && term >= Double.MIN_VALUE * sum ) {
174 term *= ( double ) x * ( NB - n + x ) / ( n + 1 - x ) / ( NR + 1 - x );
175 sum += term;
176 x--;
177 }
178
179 return 1.0 + sum;
180 }
181
182 /***
183 * Ported from R dbinom.c
184 * <p>
185 * Due to Catherine Loader, catherine@research.bell-labs.com.
186 * <p>
187 * To compute the binomial probability, call dbinom(x,n,p). This checks for argument validity, and calls
188 * dbinom_raw().
189 * <p>
190 * dbinom_raw() does the actual computation; note this is called by other functions in addition to dbinom()).
191 * <ol>
192 * <li>dbinom_raw() has both p and q arguments, when one may be represented more accurately than the other (in
193 * particular, in df()).
194 * <li>dbinom_raw() does NOT check that inputs x and n are integers. This should be done in the calling function,
195 * where necessary.
196 * <li>Also does not check for 0 <= p <= 1 and 0 <= q <= 1 or NaN's. Do this in the calling function.
197 * </ol>
198 * <hr>
199 *
200 * @param x Number of successes
201 * @param n Number of trials
202 * @param p Probability of success
203 * @param q 1 - p
204 */
205 private static double dbinom_raw( double x, double n, double p, double q ) {
206 double f, lc;
207
208 if ( p == 0 ) return ( ( x == 0 ) ? 1 : 0 );
209 if ( q == 0 ) return ( ( x == n ) ? 1 : 0 );
210
211 if ( x == 0 ) {
212 if ( n == 0 ) return 1;
213 lc = ( p < 0.1 ) ? -bd0( n, n * q ) - n * p : n * Math.log( q );
214 return ( Math.exp( lc ) );
215 }
216 if ( x == n ) {
217 lc = ( q < 0.1 ) ? -bd0( n, n * p ) - n * q : n * Math.log( p );
218 return ( Math.exp( lc ) );
219 }
220 if ( x < 0 || x > n ) return ( 0 );
221
222 lc = stirlerr( n ) - stirlerr( x ) - stirlerr( n - x ) - bd0( x, n * p )
223 - bd0( n - x, n * q );
224 f = ( 2 * Math.PI * x * ( n - x ) ) / n;
225
226 return Math.exp( lc ) / Math.sqrt( f );
227 }
228
229 /***
230 * Ported from stirlerr.c (Catherine Loader).
231 * <p>
232 * Note that this is the same functionality as colt's Arithemetic.stirlingCorrection. I am keeping this version for
233 * compatibility with R.
234 *
235 * <pre>
236 *
237 *
238 *
239 *
240 *
241 *
242 *
243 * stirlerr(n) = log(n!) - log( sqrt(2*pi*n)*(n/e)ˆn )
244 * = log Gamma(n+1) - 1/2 * [log(2*pi) + log(n)] - n*[log(n) - 1]
245 * = log Gamma(n+1) - (n + 1/2) * log(n) + n - log(2*pi)/2
246 *
247 *
248 *
249 *
250 *
251 *
252 *
253 * </pre>
254 */
255 private static double stirlerr( double n ) {
256
257 double S0 = 0.083333333333333333333;
258 double S1 = 0.00277777777777777777778;
259 double S2 = 0.00079365079365079365079365;
260 double S3 = 0.000595238095238095238095238;
261 double S4 = 0.0008417508417508417508417508;
262
263
264
265
266 double[] sferr_halves = new double[] {
267 0.0,
268 0.1534264097200273452913848,
269 0.0810614667953272582196702,
270 0.0548141210519176538961390,
271 0.0413406959554092940938221,
272 0.03316287351993628748511048,
273 0.02767792568499833914878929,
274 0.02374616365629749597132920,
275 0.02079067210376509311152277,
276 0.01848845053267318523077934,
277 0.01664469118982119216319487,
278 0.01513497322191737887351255,
279 0.01387612882307074799874573,
280 0.01281046524292022692424986,
281 0.01189670994589177009505572,
282 0.01110455975820691732662991,
283 0.010411265261972096497478567,
284 0.009799416126158803298389475,
285 0.009255462182712732917728637,
286 0.008768700134139385462952823,
287 0.008330563433362871256469318,
288 0.007934114564314020547248100,
289 0.007573675487951840794972024,
290 0.007244554301320383179543912,
291 0.006942840107209529865664152,
292 0.006665247032707682442354394,
293 0.006408994188004207068439631,
294 0.006171712263039457647532867,
295 0.005951370112758847735624416,
296 0.005746216513010115682023589,
297 0.005554733551962801371038690
298
299 };
300
301 double nn;
302
303 if ( n <= 15.0 ) {
304 nn = n + n;
305 if ( nn == ( int ) nn ) return ( sferr_halves[( int ) nn] );
306 return ( Gamma.logGamma( n + 1. ) - ( n + 0.5 ) * Math.log( n ) + n - Constants.M_LN_SQRT_2PI );
307 }
308
309 nn = n * n;
310 if ( n > 500 ) return ( ( S0 - S1 / nn ) / n );
311 if ( n > 80 ) return ( ( S0 - ( S1 - S2 / nn ) / nn ) / n );
312 if ( n > 35 )
313 return ( ( S0 - ( S1 - ( S2 - S3 / nn ) / nn ) / nn ) / n );
314
315 return ( ( S0 - ( S1 - ( S2 - ( S3 - S4 / nn ) / nn ) / nn ) / nn ) / n );
316 }
317
318 /***
319 * Ported from bd0.c in R source.
320 * <p>
321 * Evaluates the "deviance part"
322 *
323 * <pre>
324 *
325 *
326 *
327 * bd0(x,M) := M * D0(x/M) = M*[ x/M * log(x/M) + 1 - (x/M) ] =
328 * = x * log(x/M) + M - x
329 * where M = E[X] = n*p (or = lambda), for x, M > 0
330 *
331 *
332 *
333 * <p>
334 *
335 *
336 *
337 * in a manner that should be stable (with small relative error)
338 * for all x and np. In particular for x/np close to 1, direct
339 * evaluation fails, and evaluation is based on the Taylor series
340 * of log((1+v)/(1-v)) with v = (x-np)/(x+np).
341 *
342 *
343 *
344 * <hr>
345 *
346 *
347 * @param x
348 * @param np
349 * @return
350 *
351 */
352 private static double bd0( double x, double np ) {
353 double ej, s, s1, v;
354 int j;
355
356 if ( Math.abs( x - np ) < 0.1 * ( x + np ) ) {
357 v = ( x - np ) / ( x + np );
358 s = ( x - np ) * v;
359 ej = 2 * x * v;
360 v = v * v;
361 for ( j = 1;; j++ ) {
362 ej *= v;
363 s1 = s + ej / ( ( j << 1 ) + 1 );
364 if ( s1 == s )
365 return ( s1 );
366 s = s1;
367 }
368 }
369
370 return ( x * Math.log( x / np ) + np - x );
371 }
372
373
374
375
376
377 }