View Javadoc

1   package baseCode.math;
2   
3   import cern.jet.stat.Gamma;
4   
5   /***
6    * Assorted special functions, primarily concerning probability distributions. For cumBinomial use
7    * cern.jet.stat.Probability.binomial.
8    * <p>
9    * Mostly ported from the R source tree (dhyper.c etc.), much due to Catherine Loader.
10   * <p>
11   * 
12   * @see <a href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/Gamma.html">cern.jet.stat.gamma </a>
13   * @see <a
14   *      href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/math/Arithmetic.html">cern.jet.math.arithmetic
15   *      </a>
16   *      <p>
17   *      Copyright (c) 2004 Columbia University
18   * @author Paul Pavlidis
19   * @version $Id: SpecFunc.java,v 1.7 2004/12/24 23:16:09 pavlidis Exp $
20   */
21  public class SpecFunc {
22  
23     /***
24      * Ported from R phyper.c
25      * <p>
26      * Sample of n balls from NR red and NB black ones; x are red
27      * <p>
28      * 
29      * @param x - number of reds retrieved == successes
30      * @param NR - number of reds in the urn. == positives
31      * @param NB - number of blacks in the urn == negatives
32      * @param n - the total number of objects drawn == successes + failures
33      * @param lowerTail
34      * @return cumulative hypergeometric distribution.
35      */
36     public static double phyper( int x, int NR, int NB, int n, boolean lowerTail ) {
37  
38        double d, pd;
39  
40        if ( NR < 0 || NB < 0 || n < 0 || n > NR + NB ) {
41           throw new IllegalArgumentException();
42        }
43  
44        if ( x * ( NR + NB ) > n * NR ) {
45           /* Swap tails. */
46           int oldNB = NB;
47           NB = NR;
48           NR = oldNB;
49           x = n - x - 1;
50           lowerTail = !lowerTail;
51        }
52  
53        if ( x < 0 ) return 0.0;
54  
55        d = dhyper( x, NR, NB, n );
56        pd = pdhyper( x, NR, NB, n );
57  
58        return lowerTail ? d * pd : 1.0 - ( d * pd );
59     }
60  
61     /***
62      * Ported from R (Catherine Loader)
63      * <p>
64      * DESCRIPTION
65      * <p>
66      * Given a sequence of r successes and b failures, we sample n (\le b+r) items without replacement. The
67      * hypergeometric probability is the probability of x successes:
68      * 
69      * <pre>
70      * 
71      *  
72      *   
73      *    
74      *     
75      *      
76      *       
77      *        
78      *         
79      *          
80      *                     choose(r, x) * choose(b, n-x)
81      *           p(x; r,b,n) =  -----------------------------  =
82      *                        choose(r+b, n)
83      *          
84      *                    dbinom(x,r,p) * dbinom(n-x,b,p)
85      *                  = --------------------------------
86      *                        dbinom(n,r+b,p)
87      *          
88      *          
89      *         
90      *        
91      *       
92      *      
93      *     
94      *    
95      *   
96      *  
97      * </pre>
98      * 
99      * for any p. For numerical stability, we take p=n/(r+b); with this choice, the denominator is not exponentially
100     * small.
101     */
102    public static double dhyper( int x, int r, int b, int n ) {
103       double p, q, p1, p2, p3;
104 
105       if ( r < 0 || b < 0 || n < 0 || n > r + b )
106             throw new IllegalArgumentException();
107 
108       if ( x < 0 ) return 0.0;
109 
110       if ( n < x || r < x || n - x > b ) return 0;
111       if ( n == 0 ) return ( ( x == 0 ) ? 1 : 0 );
112 
113       p = ( ( double ) n ) / ( ( double ) ( r + b ) );
114       q = ( ( double ) ( r + b - n ) ) / ( ( double ) ( r + b ) );
115 
116       p1 = dbinom_raw( x, r, p, q );
117       p2 = dbinom_raw( n - x, b, p, q );
118       p3 = dbinom_raw( n, r + b, p, q );
119 
120       return p1 * p2 / p3;
121    }
122 
123    /***
124     * See dbinom_raw.
125     * <hr>
126     * 
127     * @param x Number of successes
128     * @param n Number of trials
129     * @param p Probability of success
130     * @return
131     */
132    public static double dbinom( double x, double n, double p ) {
133 
134       if ( p < 0 || p > 1 || n < 0 ) throw new IllegalArgumentException();
135 
136       return dbinom_raw( x, n, p, 1 - p );
137    }
138 
139    /***
140     * Ported from R phyper.c
141     * <p>
142     * Calculate
143     * 
144     * <pre>
145     * 
146     *  
147     *         
148     *                  phyper (x, NR, NB, n, TRUE, FALSE)
149     *                [log]  ----------------------------------
150     *                     dhyper (x, NR, NB, n, FALSE)
151     *    
152     *   
153     *  
154     * </pre>
155     * 
156     * without actually calling phyper. This assumes that
157     * 
158     * <pre>
159     * x * ( NR + NB ) &lt;= n * NR
160     * </pre>
161     * 
162     * <hr>
163     * 
164     * @param x - number of reds retrieved == successes
165     * @param NR - number of reds in the urn. == positives
166     * @param NB - number of blacks in the urn == negatives
167     * @param n - the total number of objects drawn == successes + failures
168     */
169    private static double pdhyper( int x, int NR, int NB, int n ) {
170       double sum = 0.0;
171       double term = 1.0;
172 
173       while ( x > 0.0 && term >= Double.MIN_VALUE * sum ) {
174          term *= ( double ) x * ( NB - n + x ) / ( n + 1 - x ) / ( NR + 1 - x );
175          sum += term;
176          x--;
177       }
178 
179       return 1.0 + sum;
180    }
181 
182    /***
183     * Ported from R dbinom.c
184     * <p>
185     * Due to Catherine Loader, catherine@research.bell-labs.com.
186     * <p>
187     * To compute the binomial probability, call dbinom(x,n,p). This checks for argument validity, and calls
188     * dbinom_raw().
189     * <p>
190     * dbinom_raw() does the actual computation; note this is called by other functions in addition to dbinom()).
191     * <ol>
192     * <li>dbinom_raw() has both p and q arguments, when one may be represented more accurately than the other (in
193     * particular, in df()).
194     * <li>dbinom_raw() does NOT check that inputs x and n are integers. This should be done in the calling function,
195     * where necessary.
196     * <li>Also does not check for 0 <= p <= 1 and 0 <= q <= 1 or NaN's. Do this in the calling function.
197     * </ol>
198     * <hr>
199     * 
200     * @param x Number of successes
201     * @param n Number of trials
202     * @param p Probability of success
203     * @param q 1 - p
204     */
205    private static double dbinom_raw( double x, double n, double p, double q ) {
206       double f, lc;
207 
208       if ( p == 0 ) return ( ( x == 0 ) ? 1 : 0 );
209       if ( q == 0 ) return ( ( x == n ) ? 1 : 0 );
210 
211       if ( x == 0 ) {
212          if ( n == 0 ) return 1;
213          lc = ( p < 0.1 ) ? -bd0( n, n * q ) - n * p : n * Math.log( q );
214          return ( Math.exp( lc ) );
215       }
216       if ( x == n ) {
217          lc = ( q < 0.1 ) ? -bd0( n, n * p ) - n * q : n * Math.log( p );
218          return ( Math.exp( lc ) );
219       }
220       if ( x < 0 || x > n ) return ( 0 );
221 
222       lc = stirlerr( n ) - stirlerr( x ) - stirlerr( n - x ) - bd0( x, n * p )
223             - bd0( n - x, n * q );
224       f = ( 2 * Math.PI * x * ( n - x ) ) / n;
225 
226       return Math.exp( lc ) / Math.sqrt( f );
227    }
228 
229    /***
230     * Ported from stirlerr.c (Catherine Loader).
231     * <p>
232     * Note that this is the same functionality as colt's Arithemetic.stirlingCorrection. I am keeping this version for
233     * compatibility with R.
234     * 
235     * <pre>
236     * 
237     *  
238     *   
239     *    
240     *     
241     *      
242     *       
243     *         stirlerr(n) = log(n!) - log( sqrt(2*pi*n)*(n/e)&circ;n )
244     *                    = log Gamma(n+1) - 1/2 * [log(2*pi) + log(n)] - n*[log(n) - 1]
245     *                    = log Gamma(n+1) - (n + 1/2) * log(n) + n - log(2*pi)/2
246     *       
247     *       
248     *      
249     *     
250     *    
251     *   
252     *  
253     * </pre>
254     */
255    private static double stirlerr( double n ) {
256 
257       double S0 = 0.083333333333333333333; /* 1/12 */
258       double S1 = 0.00277777777777777777778; /* 1/360 */
259       double S2 = 0.00079365079365079365079365; /* 1/1260 */
260       double S3 = 0.000595238095238095238095238;/* 1/1680 */
261       double S4 = 0.0008417508417508417508417508;/* 1/1188 */
262 
263       /*
264        * error for 0, 0.5, 1.0, 1.5, ..., 14.5, 15.0.
265        */
266       double[] sferr_halves = new double[] {
267             0.0, /* n=0 - wrong, place holder only */
268             0.1534264097200273452913848, /* 0.5 */
269             0.0810614667953272582196702, /* 1.0 */
270             0.0548141210519176538961390, /* 1.5 */
271             0.0413406959554092940938221, /* 2.0 */
272             0.03316287351993628748511048, /* 2.5 */
273             0.02767792568499833914878929, /* 3.0 */
274             0.02374616365629749597132920, /* 3.5 */
275             0.02079067210376509311152277, /* 4.0 */
276             0.01848845053267318523077934, /* 4.5 */
277             0.01664469118982119216319487, /* 5.0 */
278             0.01513497322191737887351255, /* 5.5 */
279             0.01387612882307074799874573, /* 6.0 */
280             0.01281046524292022692424986, /* 6.5 */
281             0.01189670994589177009505572, /* 7.0 */
282             0.01110455975820691732662991, /* 7.5 */
283             0.010411265261972096497478567, /* 8.0 */
284             0.009799416126158803298389475, /* 8.5 */
285             0.009255462182712732917728637, /* 9.0 */
286             0.008768700134139385462952823, /* 9.5 */
287             0.008330563433362871256469318, /* 10.0 */
288             0.007934114564314020547248100, /* 10.5 */
289             0.007573675487951840794972024, /* 11.0 */
290             0.007244554301320383179543912, /* 11.5 */
291             0.006942840107209529865664152, /* 12.0 */
292             0.006665247032707682442354394, /* 12.5 */
293             0.006408994188004207068439631, /* 13.0 */
294             0.006171712263039457647532867, /* 13.5 */
295             0.005951370112758847735624416, /* 14.0 */
296             0.005746216513010115682023589, /* 14.5 */
297             0.005554733551962801371038690
298       /* 15.0 */
299       };
300 
301       double nn;
302 
303       if ( n <= 15.0 ) {
304          nn = n + n;
305          if ( nn == ( int ) nn ) return ( sferr_halves[( int ) nn] );
306          return ( Gamma.logGamma( n + 1. ) - ( n + 0.5 ) * Math.log( n ) + n - Constants.M_LN_SQRT_2PI );
307       }
308 
309       nn = n * n;
310       if ( n > 500 ) return ( ( S0 - S1 / nn ) / n );
311       if ( n > 80 ) return ( ( S0 - ( S1 - S2 / nn ) / nn ) / n );
312       if ( n > 35 )
313             return ( ( S0 - ( S1 - ( S2 - S3 / nn ) / nn ) / nn ) / n );
314       /* 15 < n <= 35 : */
315       return ( ( S0 - ( S1 - ( S2 - ( S3 - S4 / nn ) / nn ) / nn ) / nn ) / n );
316    }
317 
318    /***
319     * Ported from bd0.c in R source.
320     * <p>
321     * Evaluates the "deviance part"
322     * 
323     * <pre>
324     * 
325     *  
326     *   
327     *    bd0(x,M) :=  M * D0(x/M) = M*[ x/M * log(x/M) + 1 - (x/M) ] =
328     *         =  x * log(x/M) + M - x
329     *    where M = E[X] = n*p (or = lambda), for     x, M &gt; 0
330     *   
331     *   
332     *  
333     * <p>
334     * 
335     *  
336     *   
337     *    in a manner that should be stable (with small relative error)
338     *    for all x and np. In particular for x/np close to 1, direct
339     *    evaluation fails, and evaluation is based on the Taylor series
340     *    of log((1+v)/(1-v)) with v = (x-np)/(x+np).
341     *    
342     *   
343     *  
344     * <hr>
345     * 
346     *  
347     *   @param x
348     *   @param np
349     *   @return
350     * 
351     */
352    private static double bd0( double x, double np ) {
353       double ej, s, s1, v;
354       int j;
355 
356       if ( Math.abs( x - np ) < 0.1 * ( x + np ) ) {
357          v = ( x - np ) / ( x + np );
358          s = ( x - np ) * v;/* s using v -- change by MM */
359          ej = 2 * x * v;
360          v = v * v;
361          for ( j = 1;; j++ ) { /* Taylor series */
362             ej *= v;
363             s1 = s + ej / ( ( j << 1 ) + 1 );
364             if ( s1 == s ) /* last term was effectively 0 */
365             return ( s1 );
366             s = s1;
367          }
368       }
369       /* else: | x - np | is not too small */
370       return ( x * Math.log( x / np ) + np - x );
371    }
372 
373  
374 
375   
376 
377 }