View Javadoc

1   package baseCode.math;
2   
3   import cern.colt.list.DoubleArrayList;
4   import cern.colt.list.IntArrayList;
5   
6   /***
7    * Alternative distance and similarity metrics for vectors.
8    * <p>
9    * Copyright (c) 2004
10   * </p>
11   * <p>
12   * Institution:: Columbia University
13   * </p>
14   * 
15   * @author Paul Pavlidis
16   * @version $Id: Distance.java,v 1.7 2004/08/14 20:38:35 pavlidis Exp $
17   */
18  public class Distance {
19  
20     /***
21      * Calculate the Manhattan distance between two vectors.
22      * 
23      * @param x DoubleArrayList
24      * @param y DoubleArrayList
25      * @return Manhattan distance between x and y
26      */
27     public double manhattanDistance( DoubleArrayList x, DoubleArrayList y ) {
28        int j;
29        double sum = 0.0;
30        int numused = 0;
31  
32        if ( x.size() != y.size() ) {
33           throw new ArithmeticException();
34        }
35  
36        int length = x.size();
37        for ( j = 0; j < length; j++ ) {
38           if ( !Double.isNaN( x.elements()[j] )
39                 && !Double.isNaN( y.elements()[j] ) ) {
40              sum += Math.abs( x.elements()[j] - y.elements()[j] );
41              numused++;
42           }
43        }
44        return sum;
45     }
46  
47     /***
48      * Calculate the Euclidean distance between two vectors.
49      * 
50      * @param x DoubleArrayList
51      * @param y DoubleArrayList
52      * @return Euclidean distance between x and y
53      */
54     public double euclDistance( DoubleArrayList x, DoubleArrayList y ) {
55        int j;
56        double sum;
57        int numused;
58        sum = 0.0;
59        numused = 0;
60  
61        if ( x.size() != y.size() ) {
62           throw new ArithmeticException();
63        }
64  
65        int length = x.size();
66  
67        for ( j = 0; j < length; j++ ) {
68           if ( !Double.isNaN( x.elements()[j] )
69                 && !Double.isNaN( y.elements()[j] ) ) {
70              sum += Math.pow( ( x.elements()[j] - y.elements()[j] ), 2 );
71              numused++;
72           }
73        }
74        if ( sum == 0.0 ) {
75           return 0.0;
76        }
77        return Math.sqrt( sum );
78     }
79  
80     /***
81      * Spearman Rank Correlation. This does the rank transformation of the data.
82      * 
83      * @param x DoubleArrayList
84      * @param y DoubleArrayList
85      * @return Spearman's rank correlation between x and y.
86      */
87     public static double spearmanRankCorrelation( DoubleArrayList x,
88           DoubleArrayList y ) {
89        double sum = 0.0;
90  
91        if ( x.size() != y.size() ) {
92           throw new ArithmeticException();
93        }
94  
95        IntArrayList rx = Rank.rankTransform( x );
96        IntArrayList ry = Rank.rankTransform( y );
97  
98        for ( int j = 0; j < x.size(); j++ ) {
99           sum += ( rx.elements()[j] - ry.elements()[j]
100                * ( rx.elements()[j] - ry.elements()[j] ) );
101       }
102 
103       return 1.0 - 6.0 * sum / ( Math.pow( x.size(), 3 ) - x.size() );
104    }
105 
106    /***
107     * Highly optimized implementation of the Pearson correlation. The inputs must be standardized - mean zero, variance
108     * one, without any missing values.
109     * 
110     * @param xe A standardized vector
111     * @param ye A standardized vector
112     * @return Pearson correlation coefficient.
113     */
114    public static double correlationOfStandardized( double[] xe, double[] ye ) {
115       double sxy = 0.0;
116       for ( int i = 0, n = xe.length; i < n; i++ ) {
117          double xj = xe[i];
118          double yj = ye[i];
119          sxy += xj * yj;
120       }
121 
122       return sxy / xe.length;
123    }
124 
125    /***
126     * Like correlationofNormedFast, but takes DoubleArrayLists as inputs, handles missing values correctly, and does
127     * more error checking. Assumes the data has been converted to z scores already.
128     * 
129     * @param x A standardized vector
130     * @param y A standardized vector
131     * @return The Pearson correlation between x and y.
132     */
133    public static double correlationOfStandardized( DoubleArrayList x,
134          DoubleArrayList y ) {
135 
136       if ( x.size() != y.size() ) {
137          throw new IllegalArgumentException( "Array lengths must be the same" );
138       }
139 
140       double[] xe = x.elements();
141       double[] ye = y.elements();
142       double sxy = 0.0;
143       int length = 0;
144       for ( int i = 0, n = x.size(); i < n; i++ ) {
145          double xj = xe[i];
146          double yj = ye[i];
147 
148          if ( Double.isNaN( xj ) || Double.isNaN( yj ) ) {
149             continue;
150          }
151 
152          sxy += xj * yj;
153          length++;
154       }
155 
156       if ( length == 0 ) {
157          return -2.0; // flag of illegal value.
158       }
159       return sxy / length;
160    }
161 }