View Javadoc

1   package baseCode.math;
2   
3   import cern.colt.list.DoubleArrayList;
4   import cern.jet.stat.Descriptive;
5   
6   ;
7   
8   /***
9    * Mathematical functions for statistics that allow missing values without scotching the calculations.
10   * <p>
11   * Be careful because some methods from cern.jet.stat.Descriptive have not been overridden and will yield a
12   * UnsupportedOperationException if used.
13   * </p>
14   * <p>
15   * Some functions that come with DoubleArrayLists will not work in an entirely compatible way with missing values. For
16   * examples, size() reports the total number of elements, including missing values. To get a count of non-missing
17   * values, use this.sizeWithoutMissingValues(). The right one to use may vary.
18   * </p>
19   * <p>
20   * Not all methods need to be overridden. However, all methods that take a "size" parameter should be passed the results
21   * of sizeWithoutMissingValues(data), instead of data.size().
22   * </p>
23   * <p>
24   * Copyright � 2004 Columbia University
25   * <p>
26   * Based in part on code from the colt package: Copyright � 1999 CERN - European Organization for Nuclear Research.
27   * 
28   * @see <a
29   *      href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/Descriptive.html">cern.jet.stat.Descriptive
30   *      </a>
31   * @author Paul Pavlidis
32   * @version $Id: DescriptiveWithMissing.java,v 1.18 2005/01/05 02:01:02 pavlidis Exp $
33   */
34  public class DescriptiveWithMissing extends cern.jet.stat.Descriptive {
35  
36     private DescriptiveWithMissing() {
37     }
38  
39     /***
40      * <b>Not supported. </b>
41      * 
42      * @param data DoubleArrayList
43      * @param lag int
44      * @param mean double
45      * @param variance double
46      * @return double
47      */
48     public static double autoCorrelation( DoubleArrayList data, int lag,
49           double mean, double variance ) {
50        throw new UnsupportedOperationException(
51              "autoCorrelation not supported with missing values" );
52     }
53  
54     /***
55      * Returns the correlation of two data sequences. That is
56      * <tt>covariance(data1,data2)/(standardDev1*standardDev2)</tt>. Missing values are ignored. This method is
57      * overridden to stop users from using the method in the superclass when missing values are present. The problem is
58      * that the standard deviation cannot be computed without knowning which values are not missing in both vectors to be
59      * compared. Thus the standardDev parameters are thrown away by this method.
60      * 
61      * @param data1 DoubleArrayList
62      * @param standardDev1 double - not used
63      * @param data2 DoubleArrayList
64      * @param standardDev2 double - not used
65      * @return double
66      */
67     public static double correlation( DoubleArrayList data1,
68           double standardDev1, DoubleArrayList data2, double standardDev2 ) {
69        return correlation( data1, data2 );
70     }
71  
72     /***
73      * Calculate the pearson correlation of two arrays. Missing values (NaNs) are ignored.
74      * 
75      * @param x DoubleArrayList
76      * @param y DoubleArrayList
77      * @return double
78      */
79     public static double correlation( DoubleArrayList x, DoubleArrayList y ) {
80        int j;
81        double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
82        int numused;
83        syy = 0.0;
84        sxy = 0.0;
85        sxx = 0.0;
86        sx = 0.0;
87        sy = 0.0;
88        numused = 0;
89        if ( x.size() != y.size() ) {
90           throw new ArithmeticException("Unequal vector sizes: " + x.size() + " != " + y.size());
91        }
92  
93        double[] xel = x.elements();
94        double[] yel = y.elements();
95  
96        int length = x.size();
97        for ( j = 0; j < length; j++ ) {
98           xj = xel[j];
99           yj = yel[j];
100 
101          if ( !Double.isNaN( xj ) && !Double.isNaN( yj ) ) {
102             sx += xj;
103             sy += yj;
104             sxy += xj * yj;
105             sxx += xj * xj;
106             syy += yj * yj;
107             numused++;
108          }
109       }
110 
111       if ( numused > 0 ) {
112          ay = sy / numused;
113          ax = sx / numused;
114          return ( sxy - sx * ay )
115                / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
116       }
117       return -2.0; // signifies that it could not be calculated.
118 
119    }
120 
121    /***
122     * Returns the SAMPLE covariance of two data sequences. Pairs of values are only considered if both are not NaN. If there
123     * are no non-missing pairs, the covariance is zero.
124     * 
125     * @param data1 the first vector
126     * @param data2 the second vector
127     * @return double
128     */
129    public static double covariance( DoubleArrayList data1, DoubleArrayList data2 ) {
130       int size = data1.size();
131       if ( size != data2.size() || size == 0 ) {
132          throw new IllegalArgumentException();
133       }
134       double[] elements1 = data1.elements();
135       double[] elements2 = data2.elements();
136 
137       /* initialize sumx and sumy to the first non-NaN pair of values */
138 
139       int i = 0;
140       double sumx = 0.0, sumy = 0.0, Sxy = 0.0;
141       while ( i < size ) {
142          sumx = elements1[i];
143          sumy = elements2[i];
144          if ( !Double.isNaN( elements1[i] ) && !Double.isNaN( elements2[i] ) ) {
145             break;
146          }
147          i++;
148       }
149       i++;
150       int usedPairs = 1;
151       for ( ; i < size; ++i ) {
152          double x = elements1[i];
153          double y = elements2[i];
154          if ( Double.isNaN( x ) || Double.isNaN( y ) ) {
155             continue;
156          }
157 
158          sumx += x;
159          Sxy += ( x - sumx / ( usedPairs + 1 ) ) * ( y - sumy / usedPairs );
160          sumy += y;
161          usedPairs++;
162       }
163       return Sxy / (usedPairs - 1);
164    }
165 
166    /***
167     * Durbin-Watson computation. This measures the serial correlation in a data series.
168     * 
169     * @param data DoubleArrayList
170     * @return double
171     * @todo this will still break in some situations where there are missing values
172     */
173    public static double durbinWatson( DoubleArrayList data ) {
174       int size = data.size();
175       if ( size < 2 ) {
176          throw new IllegalArgumentException(
177                "data sequence must contain at least two values." );
178       }
179 
180       double[] elements = data.elements();
181       double run = 0;
182       double run_sq = 0;
183       int firstNotNaN = 0;
184       while ( firstNotNaN < size ) {
185          run_sq = elements[firstNotNaN] * elements[firstNotNaN];
186 
187          if ( !Double.isNaN( elements[firstNotNaN] ) ) {
188             break;
189          }
190 
191          firstNotNaN++;
192       }
193 
194       if ( firstNotNaN > 0 && size - firstNotNaN < 2 ) {
195          throw new IllegalArgumentException(
196                "data sequence must contain at least two non-missing values." );
197 
198       }
199 
200       for ( int i = firstNotNaN + 1; i < size; i++ ) {
201          int gap = 1;
202          while ( Double.isNaN( elements[i] ) ) {
203             gap++;
204             i++;
205             continue;
206          }
207          double x = elements[i] - elements[i - gap];
208          /***  */
209          run += x * x;
210          run_sq += elements[i] * elements[i];
211       }
212 
213       return run / run_sq;
214    }
215 
216    /***
217     * Returns the geometric mean of a data sequence. Missing values are ignored. Note that for a geometric mean to be
218     * meaningful, the minimum of the data sequence must not be less or equal to zero. <br>
219     * The geometric mean is given by <tt>pow( Product( data[i] ),
220     * 1/data.size())</tt>. This method tries to avoid
221     * overflows at the expense of an equivalent but somewhat slow definition: <tt>geo = Math.exp( Sum(
222     * Log(data[i]) ) / data.size())</tt>.
223     * 
224     * @param data DoubleArrayList
225     * @return double
226     */
227    public static double geometricMean( DoubleArrayList data ) {
228       return geometricMean( sizeWithoutMissingValues( data ), sumOfLogarithms(
229             data, 0, data.size() - 1 ) );
230    }
231 
232    /***
233     * <b>Not supported. </b>
234     * 
235     * @param data DoubleArrayList
236     * @param from int
237     * @param to int
238     * @param inOut double[]
239     */
240    public static void incrementalUpdate( DoubleArrayList data, int from,
241          int to, double[] inOut ) {
242       throw new UnsupportedOperationException(
243             "incrementalUpdate not supported with missing values" );
244    }
245 
246    /***
247     * <b>Not supported. </b>
248     * 
249     * @param data DoubleArrayList
250     * @param from int
251     * @param to int
252     * @param fromSumIndex int
253     * @param toSumIndex int
254     * @param sumOfPowers double[]
255     */
256    public static void incrementalUpdateSumsOfPowers( DoubleArrayList data,
257          int from, int to, int fromSumIndex, int toSumIndex,
258          double[] sumOfPowers ) {
259       throw new UnsupportedOperationException(
260             "incrementalUpdateSumsOfPowers not supported with missing values" );
261    }
262 
263    /***
264     * <b>Not supported. </b>
265     * 
266     * @param data DoubleArrayList
267     * @param weights DoubleArrayList
268     * @param from int
269     * @param to int
270     * @param inOut double[]
271     */
272    public static void incrementalWeightedUpdate( DoubleArrayList data,
273          DoubleArrayList weights, int from, int to, double[] inOut ) {
274       throw new UnsupportedOperationException(
275             "incrementalWeightedUpdate not supported with missing values" );
276    }
277 
278    /***
279     * Returns the kurtosis (aka excess) of a data sequence.
280     * 
281     * @param moment4 the fourth central moment, which is <tt>moment(data,4,mean)</tt>.
282     * @param standardDeviation the standardDeviation.
283     * @return double
284     */
285    public static double kurtosis( double moment4, double standardDeviation ) {
286       return -3
287             + moment4
288             / ( standardDeviation * standardDeviation * standardDeviation * standardDeviation );
289    }
290 
291    /***
292     * Returns the kurtosis (aka excess) of a data sequence, which is <tt>-3 +
293     * moment(data,4,mean) / standardDeviation<sup>4</sup></tt>.
294     * 
295     * @param data DoubleArrayList
296     * @param mean double
297     * @param standardDeviation double
298     * @return double
299     */
300    public static double kurtosis( DoubleArrayList data, double mean,
301          double standardDeviation ) {
302       return kurtosis( moment( data, 4, mean ), standardDeviation );
303    }
304 
305    /***
306     * <b>Not supported. </b>
307     * 
308     * @param data DoubleArrayList
309     * @param mean double
310     * @return double
311     */
312    public static double lag1( DoubleArrayList data, double mean ) {
313       throw new UnsupportedOperationException(
314             "lag1 not supported with missing values" );
315    }
316 
317    /***
318     * @param data Values to be analyzed.
319     * @return Mean of the values in x. Missing values are ignored in the analysis.
320     */
321    public static double mean( DoubleArrayList data ) {
322       return sum( data ) / sizeWithoutMissingValues( data );
323    }
324 
325    /***
326     * Special mean calculation where we use the effective size as an input.
327     * 
328     * @param x The data
329     * @param effectiveSize The effective size used for the mean calculation.
330     * @return double
331     */
332    public static double mean( DoubleArrayList x, int effectiveSize ) {
333 
334       int length = x.size();
335 
336       if ( 0 == effectiveSize ) {
337          return Double.NaN;
338       }
339 
340       double sum = 0.0;
341       int i, count;
342       count = 0;
343       double value;
344       double[] elements = x.elements();
345       for ( i = 0; i < length; i++ ) {
346          value = elements[i];
347          if ( Double.isNaN( value ) ) {
348             continue;
349          }
350          sum += value;
351          count++;
352       }
353       if ( 0.0 == count ) {
354          return Double.NaN;
355       }
356       return sum / effectiveSize;
357 
358    }
359 
360    /***
361     * Special mean calculation where we use the effective size as an input.
362     * 
363     * @param elements The data double array.
364     * @param effectiveSize The effective size used for the mean calculation.
365     * @return double
366     */
367    public static double mean( double[] elements, int effectiveSize ) {
368 
369       int length = elements.length;
370 
371       if ( 0 == effectiveSize ) {
372          return Double.NaN;
373       }
374 
375       double sum = 0.0;
376       int i, count;
377       count = 0;
378       double value;
379       for ( i = 0; i < length; i++ ) {
380          value = elements[i];
381          if ( Double.isNaN( value ) ) {
382             continue;
383          }
384          sum += value;
385          count++;
386       }
387       if ( 0.0 == count ) {
388          return Double.NaN;
389       }
390       return sum / effectiveSize;
391    }
392 
393    /***
394     * Calculate the mean of the values above a particular quantile of an array.
395     * 
396     * @param quantile A value from 0 to 100
397     * @param array Array for which we want to get the quantile.
398     * @return double
399     */
400    public static double meanAboveQuantile( int quantile, DoubleArrayList array ) {
401 
402       if ( quantile < 0 || quantile > 100 ) {
403          throw new IllegalArgumentException(
404                "Quantile must be between 0 and 100" );
405       }
406 
407       double returnvalue = 0.0;
408       int k = 0;
409 
410       double median = Descriptive.quantile( array, quantile );
411 
412       for ( int i = 0; i < array.size(); i++ ) {
413          if ( array.get( i ) >= median ) {
414             returnvalue += array.get( i );
415             k++;
416          }
417       }
418 
419       if ( k == 0 ) {
420          throw new ArithmeticException( "No values found above quantile" );
421       }
422 
423       return ( returnvalue / k );
424    }
425 
426    /***
427     * Returns the median of a sorted data sequence. Missing values are not considered.
428     * 
429     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
430     * @return double
431     */
432    public static double median( DoubleArrayList sortedData ) {
433       return quantile( sortedData, 0.5 );
434    }
435 
436    /***
437     * Returns the moment of <tt>k</tt> -th order with constant <tt>c</tt> of a data sequence, which is
438     * <tt>Sum( (data[i]-c)<sup>k</sup> ) /
439     * data.size()</tt>.
440     * 
441     * @param data DoubleArrayList
442     * @param k int
443     * @param c double
444     * @return double
445     */
446    public static double moment( DoubleArrayList data, int k, double c ) {
447       return sumOfPowerDeviations( data, k, c )
448             / sizeWithoutMissingValues( data );
449    }
450 
451    /***
452     * Returns the product of a data sequence, which is <tt>Prod( data[i] )</tt>. Missing values are ignored. In other
453     * words: <tt>data[0]*data[1]*...*data[data.size()-1]</tt>. Note that you may easily get numeric overflows.
454     * 
455     * @param data DoubleArrayList
456     * @return double
457     */
458    public static double product( DoubleArrayList data ) {
459       int size = data.size();
460       double[] elements = data.elements();
461 
462       double product = 1;
463       for ( int i = size; --i >= 0; ) {
464          if ( Double.isNaN( elements[i] ) ) {
465             continue;
466          }
467          product *= elements[i];
468 
469       }
470       return product;
471    }
472 
473    /***
474     * Returns the <tt>phi-</tt> quantile; that is, an element <tt>elem</tt> for which holds that <tt>phi</tt>
475     * percent of data elements are less than <tt>elem</tt>. Missing values are ignored. The quantile need not
476     * necessarily be contained in the data sequence, it can be a linear interpolation.
477     * 
478     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
479     * @param phi the percentage; must satisfy <tt>0 &lt;= phi &lt;= 1</tt>.
480     * @todo possibly implement so a copy is not made.
481     * @return double
482     */
483    public static double quantile( DoubleArrayList sortedData, double phi ) {
484       return Descriptive.quantile( removeMissing( sortedData ), phi );
485    }
486 
487    /***
488     * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
489     * interpolation if the element is not contained but lies in between two contained elements. Missing values are
490     * ignored.
491     * 
492     * @param sortedList the list to be searched (must be sorted ascending).
493     * @param element the element to search for.
494     * @return the percentage <tt>phi</tt> of elements <tt>&lt;= element</tt>(<tt>0.0 &lt;= phi &lt;= 1.0)</tt>.
495     */
496    public static double quantileInverse( DoubleArrayList sortedList,
497          double element ) {
498       return rankInterpolated( sortedList, element ) / sortedList.size();
499    }
500 
501    /***
502     * Returns the quantiles of the specified percentages. The quantiles need not necessarily be contained in the data
503     * sequence, it can be a linear interpolation.
504     * 
505     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
506     * @param percentages the percentages for which quantiles are to be computed. Each percentage must be in the interval
507     *        <tt>[0.0,1.0]</tt>.
508     * @return the quantiles.
509     */
510    public static DoubleArrayList quantiles( DoubleArrayList sortedData,
511          DoubleArrayList percentages ) {
512       int s = percentages.size();
513       DoubleArrayList quantiles = new DoubleArrayList( s );
514 
515       for ( int i = 0; i < s; i++ ) {
516          quantiles.add( quantile( sortedData, percentages.get( i ) ) );
517       }
518 
519       return quantiles;
520    }
521 
522    /***
523     * Returns the linearly interpolated number of elements in a list less or equal to a given element. Missing values
524     * are ignored. The rank is the number of elements <= element. Ranks are of the form
525     * <tt>{0, 1, 2,..., sortedList.size()}</tt>. If no element is <= element, then the rank is zero. If the element
526     * lies in between two contained elements, then linear interpolation is used and a non integer value is returned.
527     * 
528     * @param sortedList the list to be searched (must be sorted ascending).
529     * @param element the element to search for.
530     * @return the rank of the element.
531     * @todo possibly implement so a copy is not made.
532     */
533    public static double rankInterpolated( DoubleArrayList sortedList,
534          double element ) {
535       return Descriptive
536             .rankInterpolated( removeMissing( sortedList ), element );
537    }
538 
539    /***
540     * Returns the sample kurtosis (aka excess) of a data sequence.
541     * 
542     * @param data DoubleArrayList
543     * @param mean double
544     * @param sampleVariance double
545     * @return double
546     */
547    public static double sampleKurtosis( DoubleArrayList data, double mean,
548          double sampleVariance ) {
549       return sampleKurtosis( sizeWithoutMissingValues( data ), moment( data, 4,
550             mean ), sampleVariance );
551    }
552 
553    /***
554     * Returns the sample skew of a data sequence.
555     * 
556     * @param data DoubleArrayList
557     * @param mean double
558     * @param sampleVariance double
559     * @return double
560     */
561    public static double sampleSkew( DoubleArrayList data, double mean,
562          double sampleVariance ) {
563       return sampleSkew( sizeWithoutMissingValues( data ), moment( data, 3,
564             mean ), sampleVariance );
565    }
566 
567    /***
568     * Returns the skew of a data sequence, which is <tt>moment(data,3,mean) /
569     * standardDeviation<sup>3</sup></tt>.
570     * 
571     * @param data DoubleArrayList
572     * @param mean double
573     * @param standardDeviation double
574     * @return double
575     */
576    public static double skew( DoubleArrayList data, double mean,
577          double standardDeviation ) {
578       return skew( moment( data, 3, mean ), standardDeviation );
579    }
580 
581    /***
582     * Returns the sample standard deviation.
583     * <p>
584     * This is included for compatibility with the superclass, but does not implement the correction used there.
585     * 
586     * @see cern.jet.stat.Descriptive#sampleStandardDeviation(int, double)
587     * @param size the number of elements of the data sequence.
588     * @param sampleVariance the <b>sample variance </b>.
589     */
590    public static double sampleStandardDeviation( int size, double sampleVariance ) {
591       return Math.sqrt( sampleVariance );
592    }
593 
594    /***
595     * Returns the sample variance of a data sequence. That is <tt>Sum (
596     * (data[i]-mean)^2 ) / (data.size()-1)</tt>.
597     * 
598     * @param data DoubleArrayList
599     * @param mean double
600     * @return double
601     */
602    public static double sampleVariance( DoubleArrayList data, double mean ) {
603       double[] elements = data.elements();
604       int effsize = sizeWithoutMissingValues( data );
605       int size = data.size();
606       double sum = 0;
607       // find the sum of the squares
608       for ( int i = size; --i >= 0; ) {
609          if ( Double.isNaN( elements[i] ) ) {
610             continue;
611          }
612          double delta = elements[i] - mean;
613          sum += delta * delta;
614       }
615 
616       return sum / ( effsize - 1 );
617    }
618 
619    /***
620     * Modifies a data sequence to be standardized. Mising values are ignored. Changes each element <tt>data[i]</tt> as
621     * follows: <tt>data[i] = (data[i]-mean)/standardDeviation</tt>.
622     * 
623     * @param data DoubleArrayList
624     * @param mean mean of data
625     * @param standardDeviation stdev of data
626     */
627    public static void standardize( DoubleArrayList data, double mean,
628          double standardDeviation ) {
629       double[] elements = data.elements();
630       for ( int i = data.size(); --i >= 0; ) {
631          if ( Double.isNaN( elements[i] ) ) {
632             continue;
633          }
634          elements[i] = ( elements[i] - mean ) / standardDeviation;
635       }
636    }
637 
638    /***
639     * Standardize. Note that this does something slightly different than standardize in the superclass, because our
640     * sampleStandardDeviation does not use the correction of the superclass (which isn't really standard).
641     * 
642     * @param data DoubleArrayList
643     */
644    public static void standardize( DoubleArrayList data ) {
645       double mean = mean( data );
646       double stdev = Math.sqrt( sampleVariance( data, mean ) );
647       DescriptiveWithMissing.standardize( data, mean, stdev );
648    }
649 
650    /***
651     * Returns the sum of a data sequence. That is <tt>Sum( data[i] )</tt>.
652     * 
653     * @param data DoubleArrayList
654     * @return double
655     */
656    public static double sum( DoubleArrayList data ) {
657       return sumOfPowerDeviations( data, 1, 0.0 );
658    }
659 
660    /***
661     * Returns the sum of inversions of a data sequence, which is <tt>Sum( 1.0 /
662     * data[i])</tt>.
663     * 
664     * @param data the data sequence.
665     * @param from the index of the first data element (inclusive).
666     * @param to the index of the last data element (inclusive).
667     * @return double
668     */
669    public static double sumOfInversions( DoubleArrayList data, int from, int to ) {
670       return sumOfPowerDeviations( data, -1, 0.0, from, to );
671    }
672 
673    /***
674     * Returns the sum of logarithms of a data sequence, which is <tt>Sum(
675     * Log(data[i])</tt>. Missing values are
676     * ignored.
677     * 
678     * @param data the data sequence.
679     * @param from the index of the first data element (inclusive).
680     * @param to the index of the last data element (inclusive).
681     * @return double
682     */
683    public static double sumOfLogarithms( DoubleArrayList data, int from, int to ) {
684       double[] elements = data.elements();
685       double logsum = 0;
686       for ( int i = from - 1; ++i <= to; ) {
687          if ( Double.isNaN( elements[i] ) ) {
688             continue;
689          }
690          logsum += Math.log( elements[i] );
691       }
692       return logsum;
693    }
694 
695    /***
696     * Returns the sum of powers of a data sequence, which is <tt>Sum (
697     * data[i]<sup>k</sup> )</tt>.
698     * 
699     * @param data DoubleArrayList
700     * @param k int
701     * @return double
702     */
703    public static double sumOfPowers( DoubleArrayList data, int k ) {
704       return sumOfPowerDeviations( data, k, 0 );
705    }
706 
707    /***
708     * Returns the sum of squares of a data sequence. Skips missing values.
709     * 
710     * @param data DoubleArrayList
711     * @return double
712     */
713    public static double sumOfSquares( DoubleArrayList data ) {
714       return sumOfPowerDeviations( data, 2, 0.0 );
715    }
716 
717    /***
718     * Compute the sum of the squared deviations from the mean of a data sequence. Missing values are ignored.
719     * 
720     * @param data DoubleArrayList
721     * @return double
722     */
723    public static double sumOfSquaredDeviations( DoubleArrayList data ) {
724       return sumOfSquaredDeviations( sizeWithoutMissingValues( data ),
725             variance( sizeWithoutMissingValues( data ), sum( data ),
726                   sumOfSquares( data ) ) );
727    }
728 
729    /***
730     * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt>; optimized for common parameters like <tt>c == 0.0</tt>
731     * and/or <tt>k == -2 .. 4</tt>.
732     * 
733     * @param data DoubleArrayList
734     * @param k int
735     * @param c double
736     * @return double
737     */
738    public static double sumOfPowerDeviations( DoubleArrayList data, int k,
739          double c ) {
740       return sumOfPowerDeviations( data, k, c, 0, data.size() - 1 );
741    }
742 
743    /***
744     * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt> for all <tt>i = from ..
745     * to</tt>; optimized for common
746     * parameters like <tt>c == 0.0</tt> and/or <tt>k == -2 .. 5</tt>. Missing values are ignored.
747     * 
748     * @param data DoubleArrayList
749     * @param k int
750     * @param c double
751     * @param from int
752     * @param to int
753     * @return double
754     */
755    public static double sumOfPowerDeviations( final DoubleArrayList data,
756          final int k, final double c, final int from, final int to ) {
757       final double[] elements = data.elements();
758       double sum = 0;
759       double v;
760       int i;
761       switch ( k ) { // optimized for speed
762          case -2:
763             if ( c == 0.0 ) {
764                for ( i = from - 1; ++i <= to; ) {
765                   if ( Double.isNaN( elements[i] ) ) {
766                      continue;
767                   }
768                   v = elements[i];
769                   sum += 1 / ( v * v );
770                }
771             } else {
772                for ( i = from - 1; ++i <= to; ) {
773                   if ( Double.isNaN( elements[i] ) ) {
774                      continue;
775                   }
776                   v = elements[i] - c;
777                   sum += 1 / ( v * v );
778                }
779             }
780             break;
781          case -1:
782             if ( c == 0.0 ) {
783                for ( i = from - 1; ++i <= to; ) {
784                   if ( Double.isNaN( elements[i] ) ) {
785                      continue;
786                   }
787                   sum += 1 / ( elements[i] );
788                }
789             } else {
790                for ( i = from - 1; ++i <= to; ) {
791                   if ( Double.isNaN( elements[i] ) ) {
792                      continue;
793                   }
794                   sum += 1 / ( elements[i] - c );
795                }
796             }
797             break;
798          case 0:
799             sum += to - from + 1;
800             break;
801          case 1:
802             if ( c == 0.0 ) {
803                for ( i = from - 1; ++i <= to; ) {
804                   if ( Double.isNaN( elements[i] ) ) {
805                      continue;
806                   }
807                   sum += elements[i];
808                }
809             } else {
810                for ( i = from - 1; ++i <= to; ) {
811                   if ( Double.isNaN( elements[i] ) ) {
812                      continue;
813                   }
814                   sum += elements[i] - c;
815                }
816             }
817             break;
818          case 2:
819             if ( c == 0.0 ) {
820                for ( i = from - 1; ++i <= to; ) {
821                   if ( Double.isNaN( elements[i] ) ) {
822                      continue;
823                   }
824                   v = elements[i];
825                   sum += v * v;
826                }
827             } else {
828                for ( i = from - 1; ++i <= to; ) {
829                   if ( Double.isNaN( elements[i] ) ) {
830                      continue;
831                   }
832                   v = elements[i] - c;
833                   sum += v * v;
834                }
835             }
836             break;
837          case 3:
838             if ( c == 0.0 ) {
839                for ( i = from - 1; ++i <= to; ) {
840                   v = elements[i];
841                   sum += v * v * v;
842                }
843             } else {
844                for ( i = from - 1; ++i <= to; ) {
845                   if ( Double.isNaN( elements[i] ) ) {
846                      continue;
847                   }
848                   v = elements[i] - c;
849                   sum += v * v * v;
850                }
851             }
852             break;
853          case 4:
854             if ( c == 0.0 ) {
855                for ( i = from - 1; ++i <= to; ) {
856                   if ( Double.isNaN( elements[i] ) ) {
857                      continue;
858                   }
859                   v = elements[i];
860                   sum += v * v * v * v;
861                }
862             } else {
863                for ( i = from - 1; ++i <= to; ) {
864                   if ( Double.isNaN( elements[i] ) ) {
865                      continue;
866                   }
867                   v = elements[i] - c;
868                   sum += v * v * v * v;
869                }
870             }
871             break;
872          case 5:
873             if ( c == 0.0 ) {
874                for ( i = from - 1; ++i <= to; ) {
875                   if ( Double.isNaN( elements[i] ) ) {
876                      continue;
877                   }
878                   v = elements[i];
879                   sum += v * v * v * v * v;
880                }
881             } else {
882                for ( i = from - 1; ++i <= to; ) {
883                   if ( Double.isNaN( elements[i] ) ) {
884                      continue;
885                   }
886                   v = elements[i] - c;
887                   sum += v * v * v * v * v;
888                }
889             }
890             break;
891          default:
892             for ( i = from - 1; ++i <= to; ) {
893                if ( Double.isNaN( elements[i] ) ) {
894                   continue;
895                }
896                sum += Math.pow( elements[i] - c, k );
897             }
898             break;
899       }
900       return sum;
901    }
902 
903    /***
904     * Return the size of the list, ignoring missing values.
905     * 
906     * @param list DoubleArrayList
907     * @return int
908     */
909    public static int sizeWithoutMissingValues( DoubleArrayList list ) {
910 
911       int size = 0;
912       for ( int i = 0; i < list.size(); i++ ) {
913          if ( !Double.isNaN( list.get( i ) ) ) {
914             size++;
915          }
916       }
917       return size;
918    }
919 
920    /***
921     * Returns the trimmed mean of a sorted data sequence. Missing values are completely ignored.
922     * 
923     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
924     * @param mean the mean of the (full) sorted data sequence.
925     * @param left int the number of leading elements to trim.
926     * @param right int number of trailing elements to trim.
927     * @return double
928     */
929    public static double trimmedMean( DoubleArrayList sortedData, double mean,
930          int left, int right ) {
931       return Descriptive.trimmedMean( removeMissing( sortedData ), mean, left,
932             right );
933    }
934 
935    /***
936     * Provided for convenience!
937     * 
938     * @param data DoubleArrayList
939     * @return double
940     */
941    public static double variance( DoubleArrayList data ) {
942       return variance( sizeWithoutMissingValues( data ), sum( data ),
943             sumOfSquares( data ) );
944    }
945 
946    /***
947     * Returns the weighted mean of a data sequence. That is <tt> Sum (data[i] *
948     * weights[i]) / Sum ( weights[i] )</tt>.
949     * 
950     * @param data DoubleArrayList
951     * @param weights DoubleArrayList
952     * @return double
953     */
954    public static double weightedMean( DoubleArrayList data,
955          DoubleArrayList weights ) {
956       int size = data.size();
957       if ( size != weights.size() || size == 0 ) {
958          throw new IllegalArgumentException();
959       }
960 
961       double[] elements = data.elements();
962       double[] theWeights = weights.elements();
963       double sum = 0.0;
964       double weightsSum = 0.0;
965       for ( int i = size; --i >= 0; ) {
966          double w = theWeights[i];
967          if ( Double.isNaN( elements[i] ) ) {
968             continue;
969          }
970          sum += elements[i] * w;
971          weightsSum += w;
972       }
973 
974       return sum / weightsSum;
975    }
976 
977    /***
978     * <b>Not supported. </b>
979     * 
980     * @param sortedData DoubleArrayList
981     * @param mean double
982     * @param left int
983     * @param right int
984     * @return double
985     */
986    public static double winsorizedMean( DoubleArrayList sortedData,
987          double mean, int left, int right ) {
988       throw new UnsupportedOperationException(
989             "winsorizedMean not supported with missing values" );
990    }
991 
992    /* private methods */
993 
994    /***
995     * Convenience function for internal use. Makes a copy of the list that doesn't have the missing values.
996     * 
997     * @param data DoubleArrayList
998     * @return DoubleArrayList
999     */
1000    private static DoubleArrayList removeMissing( DoubleArrayList data ) {
1001       DoubleArrayList r = new DoubleArrayList( sizeWithoutMissingValues( data ) );
1002       double[] elements = data.elements();
1003       int size = data.size();
1004       for ( int i = 0; i < size; i++ ) {
1005          if ( Double.isNaN( elements[i] ) ) {
1006             continue;
1007          }
1008          r.add( elements[i] );
1009       }
1010       return r;
1011    }
1012 
1013 } // end of class