1 package baseCode.math;
2
3 import java.util.Set;
4
5 import cern.jet.stat.Probability;
6
7 /***
8 * Functions for calculating Receiver operator characteristics.
9 * <p>
10 * Copyright (c) 2004 Columbia University
11 *
12 * @author Paul Pavlidis
13 * @version $Id: ROC.java,v 1.13 2004/08/16 00:21:40 pavlidis Exp $
14 */
15 public class ROC {
16
17 /***
18 * Calculate area under ROC. The input is the total number of items in the data, and the ranks of the positives in
19 * the current ranking. LOW ranks are considered better. (e.g., rank 0 is the 'best')
20 *
21 * @param totalSize int
22 * @param ranks Map
23 * @return AROC
24 */
25 public static double aroc( int totalSize, Set ranks ) {
26 return ROC.aroc( totalSize, ranks, -1 );
27 }
28
29 /***
30 * Calculate area under ROC, up to a given number of False positives. The input is the total number of items in the
31 * data, and the ranks of the positives in the current ranking. LOW ranks are considered better. (e.g., rank 0 is the
32 * 'best')
33 *
34 * @param totalSize int
35 * @param ranks Map
36 * @param maxFP - the maximum number of false positives to see before stopping. Set to 50 to get the Gribskov roc50.
37 * If maxFP <= 0, it is ignored.
38 * @return AROC
39 */
40 public static double aroc( int totalSize, Set ranks, int maxFP ) {
41 int numPosSeen = 0;
42 int numNegSeen = 0;
43 int targetSize = ranks.size();
44
45 if ( targetSize == 0 ) {
46 return 0.0;
47 }
48
49 if (totalSize <= 0) {
50 throw new IllegalArgumentException("Total size must be positive. ( received " + totalSize + ")");
51 }
52
53 double result = 0.0;
54 for ( int i = 0; i < totalSize; i++ ) {
55 if ( ranks.contains( new Integer( i ) ) ) {
56 numPosSeen++;
57
58 } else {
59
60 result += numPosSeen;
61 numNegSeen++;
62 if ( maxFP > 0 && numNegSeen >= maxFP ) {
63 break;
64 }
65 }
66
67
68
69
70
71 }
72
73
74
75 if (numPosSeen == 0 ) return 0.0;
76
77 if ( maxFP > 0 ) {
78 return result / ( targetSize * numNegSeen );
79 }
80 return result / ( numPosSeen * ( totalSize - targetSize ) );
81
82 }
83
84 /***
85 * For an AROC value, calculates a p value based on approximation for calculating the stanadard deviation. Highly
86 * approximate!
87 *
88 * @param numpos How many positives are in the data.
89 * @param aroc The AROC
90 * @return The p value.
91 */
92 public static double rocpval( int numpos, double aroc ) {
93 double stdev = Math.exp( -0.5 * ( Math.log( numpos ) + 1 ) );
94 double z = ( aroc - 0.5 ) / stdev;
95
96
97 if ( z < 0.0 ) {
98 z = 0.0;
99 }
100 return 1.0 - Probability.normal( z );
101 }
102
103 }