1 package baseCode.math;
2
3 import cern.colt.list.DoubleArrayList;
4 import cern.colt.list.IntArrayList;
5
6 /***
7 * Alternative distance and similarity metrics for vectors.
8 * <p>
9 * Copyright (c) 2004
10 * </p>
11 * <p>
12 * Institution:: Columbia University
13 * </p>
14 *
15 * @author Paul Pavlidis
16 * @version $Id: Distance.java,v 1.7 2004/08/14 20:38:35 pavlidis Exp $
17 */
18 public class Distance {
19
20 /***
21 * Calculate the Manhattan distance between two vectors.
22 *
23 * @param x DoubleArrayList
24 * @param y DoubleArrayList
25 * @return Manhattan distance between x and y
26 */
27 public double manhattanDistance( DoubleArrayList x, DoubleArrayList y ) {
28 int j;
29 double sum = 0.0;
30 int numused = 0;
31
32 if ( x.size() != y.size() ) {
33 throw new ArithmeticException();
34 }
35
36 int length = x.size();
37 for ( j = 0; j < length; j++ ) {
38 if ( !Double.isNaN( x.elements()[j] )
39 && !Double.isNaN( y.elements()[j] ) ) {
40 sum += Math.abs( x.elements()[j] - y.elements()[j] );
41 numused++;
42 }
43 }
44 return sum;
45 }
46
47 /***
48 * Calculate the Euclidean distance between two vectors.
49 *
50 * @param x DoubleArrayList
51 * @param y DoubleArrayList
52 * @return Euclidean distance between x and y
53 */
54 public double euclDistance( DoubleArrayList x, DoubleArrayList y ) {
55 int j;
56 double sum;
57 int numused;
58 sum = 0.0;
59 numused = 0;
60
61 if ( x.size() != y.size() ) {
62 throw new ArithmeticException();
63 }
64
65 int length = x.size();
66
67 for ( j = 0; j < length; j++ ) {
68 if ( !Double.isNaN( x.elements()[j] )
69 && !Double.isNaN( y.elements()[j] ) ) {
70 sum += Math.pow( ( x.elements()[j] - y.elements()[j] ), 2 );
71 numused++;
72 }
73 }
74 if ( sum == 0.0 ) {
75 return 0.0;
76 }
77 return Math.sqrt( sum );
78 }
79
80 /***
81 * Spearman Rank Correlation. This does the rank transformation of the data.
82 *
83 * @param x DoubleArrayList
84 * @param y DoubleArrayList
85 * @return Spearman's rank correlation between x and y.
86 */
87 public static double spearmanRankCorrelation( DoubleArrayList x,
88 DoubleArrayList y ) {
89 double sum = 0.0;
90
91 if ( x.size() != y.size() ) {
92 throw new ArithmeticException();
93 }
94
95 IntArrayList rx = Rank.rankTransform( x );
96 IntArrayList ry = Rank.rankTransform( y );
97
98 for ( int j = 0; j < x.size(); j++ ) {
99 sum += ( rx.elements()[j] - ry.elements()[j]
100 * ( rx.elements()[j] - ry.elements()[j] ) );
101 }
102
103 return 1.0 - 6.0 * sum / ( Math.pow( x.size(), 3 ) - x.size() );
104 }
105
106 /***
107 * Highly optimized implementation of the Pearson correlation. The inputs must be standardized - mean zero, variance
108 * one, without any missing values.
109 *
110 * @param xe A standardized vector
111 * @param ye A standardized vector
112 * @return Pearson correlation coefficient.
113 */
114 public static double correlationOfStandardized( double[] xe, double[] ye ) {
115 double sxy = 0.0;
116 for ( int i = 0, n = xe.length; i < n; i++ ) {
117 double xj = xe[i];
118 double yj = ye[i];
119 sxy += xj * yj;
120 }
121
122 return sxy / xe.length;
123 }
124
125 /***
126 * Like correlationofNormedFast, but takes DoubleArrayLists as inputs, handles missing values correctly, and does
127 * more error checking. Assumes the data has been converted to z scores already.
128 *
129 * @param x A standardized vector
130 * @param y A standardized vector
131 * @return The Pearson correlation between x and y.
132 */
133 public static double correlationOfStandardized( DoubleArrayList x,
134 DoubleArrayList y ) {
135
136 if ( x.size() != y.size() ) {
137 throw new IllegalArgumentException( "Array lengths must be the same" );
138 }
139
140 double[] xe = x.elements();
141 double[] ye = y.elements();
142 double sxy = 0.0;
143 int length = 0;
144 for ( int i = 0, n = x.size(); i < n; i++ ) {
145 double xj = xe[i];
146 double yj = ye[i];
147
148 if ( Double.isNaN( xj ) || Double.isNaN( yj ) ) {
149 continue;
150 }
151
152 sxy += xj * yj;
153 length++;
154 }
155
156 if ( length == 0 ) {
157 return -2.0;
158 }
159 return sxy / length;
160 }
161 }