View Javadoc

1   package baseCode.math;
2   
3   import java.util.Set;
4   
5   import cern.jet.stat.Probability;
6   
7   /***
8    * Functions for calculating Receiver operator characteristics.
9    * <p>
10   * Copyright (c) 2004 Columbia University
11   * 
12   * @author Paul Pavlidis
13   * @version $Id: ROC.java,v 1.13 2004/08/16 00:21:40 pavlidis Exp $
14   */
15  public class ROC {
16  
17     /***
18      * Calculate area under ROC. The input is the total number of items in the data, and the ranks of the positives in
19      * the current ranking. LOW ranks are considered better. (e.g., rank 0 is the 'best')
20      * 
21      * @param totalSize int
22      * @param ranks Map
23      * @return AROC
24      */
25     public static double aroc( int totalSize, Set ranks ) {
26        return ROC.aroc( totalSize, ranks, -1 );
27     }
28  
29     /***
30      * Calculate area under ROC, up to a given number of False positives. The input is the total number of items in the
31      * data, and the ranks of the positives in the current ranking. LOW ranks are considered better. (e.g., rank 0 is the
32      * 'best')
33      * 
34      * @param totalSize int
35      * @param ranks Map
36      * @param maxFP - the maximum number of false positives to see before stopping. Set to 50 to get the Gribskov roc50.
37      *        If maxFP <= 0, it is ignored.
38      * @return AROC
39      */
40     public static double aroc( int totalSize, Set ranks, int maxFP ) {
41        int numPosSeen = 0;
42        int numNegSeen = 0;
43        int targetSize = ranks.size();
44        
45        if ( targetSize == 0 ) {
46           return 0.0;
47        }
48        
49        if (totalSize <= 0) {
50           throw new IllegalArgumentException("Total size must be positive. ( received " + totalSize + ")");
51        }
52        
53        double result = 0.0;
54        for ( int i = 0; i < totalSize; i++ ) {
55           if ( ranks.contains( new Integer( i ) ) ) { // if the ith item in the ranked list is a positive.
56              numPosSeen++;
57           //   System.err.print(i + "+ " );
58           } else { 
59           //   System.err.print(i +  "- " );
60              result += numPosSeen;
61              numNegSeen++;
62              if ( maxFP > 0 && numNegSeen >= maxFP ) {
63                 break;
64              }
65           }
66  
67           //         if ( numPosSeen == targetSize ) { // we've seen all the positives, we can stop.
68           //            result += numPosSeen * ( totalSize - i - 1 );
69           //            break;
70           //        }
71        }
72  //      System.err.println( numNegSeen + " negs, " + numPosSeen
73  //            + " pos seen out of " + targetSize + " positives" );
74  
75        if (numPosSeen == 0 ) return 0.0;
76        
77        if ( maxFP > 0 ) {
78           return result / ( targetSize * numNegSeen );
79        }
80        return result / ( numPosSeen * ( totalSize - targetSize ) );
81  
82     }
83  
84     /***
85      * For an AROC value, calculates a p value based on approximation for calculating the stanadard deviation. Highly
86      * approximate!
87      * 
88      * @param numpos How many positives are in the data.
89      * @param aroc The AROC
90      * @return The p value.
91      */
92     public static double rocpval( int numpos, double aroc ) {
93        double stdev = Math.exp( -0.5 * ( Math.log( numpos ) + 1 ) );
94        double z = ( aroc - 0.5 ) / stdev;
95  
96        /* We are only interested in the upper tails. */
97        if ( z < 0.0 ) {
98           z = 0.0;
99        }
100       return 1.0 - Probability.normal( z );
101    }
102 
103 }