1 package baseCode.math;
2
3 import cern.colt.list.DoubleArrayList;
4 import cern.jet.stat.Descriptive;
5
6 ;
7
8 /***
9 * Mathematical functions for statistics that allow missing values without scotching the calculations.
10 * <p>
11 * Be careful because some methods from cern.jet.stat.Descriptive have not been overridden and will yield a
12 * UnsupportedOperationException if used.
13 * </p>
14 * <p>
15 * Some functions that come with DoubleArrayLists will not work in an entirely compatible way with missing values. For
16 * examples, size() reports the total number of elements, including missing values. To get a count of non-missing
17 * values, use this.sizeWithoutMissingValues(). The right one to use may vary.
18 * </p>
19 * <p>
20 * Not all methods need to be overridden. However, all methods that take a "size" parameter should be passed the results
21 * of sizeWithoutMissingValues(data), instead of data.size().
22 * </p>
23 * <p>
24 * Copyright � 2004 Columbia University
25 * <p>
26 * Based in part on code from the colt package: Copyright � 1999 CERN - European Organization for Nuclear Research.
27 *
28 * @see <a
29 * href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/Descriptive.html">cern.jet.stat.Descriptive
30 * </a>
31 * @author Paul Pavlidis
32 * @version $Id: DescriptiveWithMissing.java,v 1.18 2005/01/05 02:01:02 pavlidis Exp $
33 */
34 public class DescriptiveWithMissing extends cern.jet.stat.Descriptive {
35
36 private DescriptiveWithMissing() {
37 }
38
39 /***
40 * <b>Not supported. </b>
41 *
42 * @param data DoubleArrayList
43 * @param lag int
44 * @param mean double
45 * @param variance double
46 * @return double
47 */
48 public static double autoCorrelation( DoubleArrayList data, int lag,
49 double mean, double variance ) {
50 throw new UnsupportedOperationException(
51 "autoCorrelation not supported with missing values" );
52 }
53
54 /***
55 * Returns the correlation of two data sequences. That is
56 * <tt>covariance(data1,data2)/(standardDev1*standardDev2)</tt>. Missing values are ignored. This method is
57 * overridden to stop users from using the method in the superclass when missing values are present. The problem is
58 * that the standard deviation cannot be computed without knowning which values are not missing in both vectors to be
59 * compared. Thus the standardDev parameters are thrown away by this method.
60 *
61 * @param data1 DoubleArrayList
62 * @param standardDev1 double - not used
63 * @param data2 DoubleArrayList
64 * @param standardDev2 double - not used
65 * @return double
66 */
67 public static double correlation( DoubleArrayList data1,
68 double standardDev1, DoubleArrayList data2, double standardDev2 ) {
69 return correlation( data1, data2 );
70 }
71
72 /***
73 * Calculate the pearson correlation of two arrays. Missing values (NaNs) are ignored.
74 *
75 * @param x DoubleArrayList
76 * @param y DoubleArrayList
77 * @return double
78 */
79 public static double correlation( DoubleArrayList x, DoubleArrayList y ) {
80 int j;
81 double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
82 int numused;
83 syy = 0.0;
84 sxy = 0.0;
85 sxx = 0.0;
86 sx = 0.0;
87 sy = 0.0;
88 numused = 0;
89 if ( x.size() != y.size() ) {
90 throw new ArithmeticException("Unequal vector sizes: " + x.size() + " != " + y.size());
91 }
92
93 double[] xel = x.elements();
94 double[] yel = y.elements();
95
96 int length = x.size();
97 for ( j = 0; j < length; j++ ) {
98 xj = xel[j];
99 yj = yel[j];
100
101 if ( !Double.isNaN( xj ) && !Double.isNaN( yj ) ) {
102 sx += xj;
103 sy += yj;
104 sxy += xj * yj;
105 sxx += xj * xj;
106 syy += yj * yj;
107 numused++;
108 }
109 }
110
111 if ( numused > 0 ) {
112 ay = sy / numused;
113 ax = sx / numused;
114 return ( sxy - sx * ay )
115 / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
116 }
117 return -2.0;
118
119 }
120
121 /***
122 * Returns the SAMPLE covariance of two data sequences. Pairs of values are only considered if both are not NaN. If there
123 * are no non-missing pairs, the covariance is zero.
124 *
125 * @param data1 the first vector
126 * @param data2 the second vector
127 * @return double
128 */
129 public static double covariance( DoubleArrayList data1, DoubleArrayList data2 ) {
130 int size = data1.size();
131 if ( size != data2.size() || size == 0 ) {
132 throw new IllegalArgumentException();
133 }
134 double[] elements1 = data1.elements();
135 double[] elements2 = data2.elements();
136
137
138
139 int i = 0;
140 double sumx = 0.0, sumy = 0.0, Sxy = 0.0;
141 while ( i < size ) {
142 sumx = elements1[i];
143 sumy = elements2[i];
144 if ( !Double.isNaN( elements1[i] ) && !Double.isNaN( elements2[i] ) ) {
145 break;
146 }
147 i++;
148 }
149 i++;
150 int usedPairs = 1;
151 for ( ; i < size; ++i ) {
152 double x = elements1[i];
153 double y = elements2[i];
154 if ( Double.isNaN( x ) || Double.isNaN( y ) ) {
155 continue;
156 }
157
158 sumx += x;
159 Sxy += ( x - sumx / ( usedPairs + 1 ) ) * ( y - sumy / usedPairs );
160 sumy += y;
161 usedPairs++;
162 }
163 return Sxy / (usedPairs - 1);
164 }
165
166 /***
167 * Durbin-Watson computation. This measures the serial correlation in a data series.
168 *
169 * @param data DoubleArrayList
170 * @return double
171 * @todo this will still break in some situations where there are missing values
172 */
173 public static double durbinWatson( DoubleArrayList data ) {
174 int size = data.size();
175 if ( size < 2 ) {
176 throw new IllegalArgumentException(
177 "data sequence must contain at least two values." );
178 }
179
180 double[] elements = data.elements();
181 double run = 0;
182 double run_sq = 0;
183 int firstNotNaN = 0;
184 while ( firstNotNaN < size ) {
185 run_sq = elements[firstNotNaN] * elements[firstNotNaN];
186
187 if ( !Double.isNaN( elements[firstNotNaN] ) ) {
188 break;
189 }
190
191 firstNotNaN++;
192 }
193
194 if ( firstNotNaN > 0 && size - firstNotNaN < 2 ) {
195 throw new IllegalArgumentException(
196 "data sequence must contain at least two non-missing values." );
197
198 }
199
200 for ( int i = firstNotNaN + 1; i < size; i++ ) {
201 int gap = 1;
202 while ( Double.isNaN( elements[i] ) ) {
203 gap++;
204 i++;
205 continue;
206 }
207 double x = elements[i] - elements[i - gap];
208 /*** */
209 run += x * x;
210 run_sq += elements[i] * elements[i];
211 }
212
213 return run / run_sq;
214 }
215
216 /***
217 * Returns the geometric mean of a data sequence. Missing values are ignored. Note that for a geometric mean to be
218 * meaningful, the minimum of the data sequence must not be less or equal to zero. <br>
219 * The geometric mean is given by <tt>pow( Product( data[i] ),
220 * 1/data.size())</tt>. This method tries to avoid
221 * overflows at the expense of an equivalent but somewhat slow definition: <tt>geo = Math.exp( Sum(
222 * Log(data[i]) ) / data.size())</tt>.
223 *
224 * @param data DoubleArrayList
225 * @return double
226 */
227 public static double geometricMean( DoubleArrayList data ) {
228 return geometricMean( sizeWithoutMissingValues( data ), sumOfLogarithms(
229 data, 0, data.size() - 1 ) );
230 }
231
232 /***
233 * <b>Not supported. </b>
234 *
235 * @param data DoubleArrayList
236 * @param from int
237 * @param to int
238 * @param inOut double[]
239 */
240 public static void incrementalUpdate( DoubleArrayList data, int from,
241 int to, double[] inOut ) {
242 throw new UnsupportedOperationException(
243 "incrementalUpdate not supported with missing values" );
244 }
245
246 /***
247 * <b>Not supported. </b>
248 *
249 * @param data DoubleArrayList
250 * @param from int
251 * @param to int
252 * @param fromSumIndex int
253 * @param toSumIndex int
254 * @param sumOfPowers double[]
255 */
256 public static void incrementalUpdateSumsOfPowers( DoubleArrayList data,
257 int from, int to, int fromSumIndex, int toSumIndex,
258 double[] sumOfPowers ) {
259 throw new UnsupportedOperationException(
260 "incrementalUpdateSumsOfPowers not supported with missing values" );
261 }
262
263 /***
264 * <b>Not supported. </b>
265 *
266 * @param data DoubleArrayList
267 * @param weights DoubleArrayList
268 * @param from int
269 * @param to int
270 * @param inOut double[]
271 */
272 public static void incrementalWeightedUpdate( DoubleArrayList data,
273 DoubleArrayList weights, int from, int to, double[] inOut ) {
274 throw new UnsupportedOperationException(
275 "incrementalWeightedUpdate not supported with missing values" );
276 }
277
278 /***
279 * Returns the kurtosis (aka excess) of a data sequence.
280 *
281 * @param moment4 the fourth central moment, which is <tt>moment(data,4,mean)</tt>.
282 * @param standardDeviation the standardDeviation.
283 * @return double
284 */
285 public static double kurtosis( double moment4, double standardDeviation ) {
286 return -3
287 + moment4
288 / ( standardDeviation * standardDeviation * standardDeviation * standardDeviation );
289 }
290
291 /***
292 * Returns the kurtosis (aka excess) of a data sequence, which is <tt>-3 +
293 * moment(data,4,mean) / standardDeviation<sup>4</sup></tt>.
294 *
295 * @param data DoubleArrayList
296 * @param mean double
297 * @param standardDeviation double
298 * @return double
299 */
300 public static double kurtosis( DoubleArrayList data, double mean,
301 double standardDeviation ) {
302 return kurtosis( moment( data, 4, mean ), standardDeviation );
303 }
304
305 /***
306 * <b>Not supported. </b>
307 *
308 * @param data DoubleArrayList
309 * @param mean double
310 * @return double
311 */
312 public static double lag1( DoubleArrayList data, double mean ) {
313 throw new UnsupportedOperationException(
314 "lag1 not supported with missing values" );
315 }
316
317 /***
318 * @param data Values to be analyzed.
319 * @return Mean of the values in x. Missing values are ignored in the analysis.
320 */
321 public static double mean( DoubleArrayList data ) {
322 return sum( data ) / sizeWithoutMissingValues( data );
323 }
324
325 /***
326 * Special mean calculation where we use the effective size as an input.
327 *
328 * @param x The data
329 * @param effectiveSize The effective size used for the mean calculation.
330 * @return double
331 */
332 public static double mean( DoubleArrayList x, int effectiveSize ) {
333
334 int length = x.size();
335
336 if ( 0 == effectiveSize ) {
337 return Double.NaN;
338 }
339
340 double sum = 0.0;
341 int i, count;
342 count = 0;
343 double value;
344 double[] elements = x.elements();
345 for ( i = 0; i < length; i++ ) {
346 value = elements[i];
347 if ( Double.isNaN( value ) ) {
348 continue;
349 }
350 sum += value;
351 count++;
352 }
353 if ( 0.0 == count ) {
354 return Double.NaN;
355 }
356 return sum / effectiveSize;
357
358 }
359
360 /***
361 * Special mean calculation where we use the effective size as an input.
362 *
363 * @param elements The data double array.
364 * @param effectiveSize The effective size used for the mean calculation.
365 * @return double
366 */
367 public static double mean( double[] elements, int effectiveSize ) {
368
369 int length = elements.length;
370
371 if ( 0 == effectiveSize ) {
372 return Double.NaN;
373 }
374
375 double sum = 0.0;
376 int i, count;
377 count = 0;
378 double value;
379 for ( i = 0; i < length; i++ ) {
380 value = elements[i];
381 if ( Double.isNaN( value ) ) {
382 continue;
383 }
384 sum += value;
385 count++;
386 }
387 if ( 0.0 == count ) {
388 return Double.NaN;
389 }
390 return sum / effectiveSize;
391 }
392
393 /***
394 * Calculate the mean of the values above a particular quantile of an array.
395 *
396 * @param quantile A value from 0 to 100
397 * @param array Array for which we want to get the quantile.
398 * @return double
399 */
400 public static double meanAboveQuantile( int quantile, DoubleArrayList array ) {
401
402 if ( quantile < 0 || quantile > 100 ) {
403 throw new IllegalArgumentException(
404 "Quantile must be between 0 and 100" );
405 }
406
407 double returnvalue = 0.0;
408 int k = 0;
409
410 double median = Descriptive.quantile( array, quantile );
411
412 for ( int i = 0; i < array.size(); i++ ) {
413 if ( array.get( i ) >= median ) {
414 returnvalue += array.get( i );
415 k++;
416 }
417 }
418
419 if ( k == 0 ) {
420 throw new ArithmeticException( "No values found above quantile" );
421 }
422
423 return ( returnvalue / k );
424 }
425
426 /***
427 * Returns the median of a sorted data sequence. Missing values are not considered.
428 *
429 * @param sortedData the data sequence; <b>must be sorted ascending </b>.
430 * @return double
431 */
432 public static double median( DoubleArrayList sortedData ) {
433 return quantile( sortedData, 0.5 );
434 }
435
436 /***
437 * Returns the moment of <tt>k</tt> -th order with constant <tt>c</tt> of a data sequence, which is
438 * <tt>Sum( (data[i]-c)<sup>k</sup> ) /
439 * data.size()</tt>.
440 *
441 * @param data DoubleArrayList
442 * @param k int
443 * @param c double
444 * @return double
445 */
446 public static double moment( DoubleArrayList data, int k, double c ) {
447 return sumOfPowerDeviations( data, k, c )
448 / sizeWithoutMissingValues( data );
449 }
450
451 /***
452 * Returns the product of a data sequence, which is <tt>Prod( data[i] )</tt>. Missing values are ignored. In other
453 * words: <tt>data[0]*data[1]*...*data[data.size()-1]</tt>. Note that you may easily get numeric overflows.
454 *
455 * @param data DoubleArrayList
456 * @return double
457 */
458 public static double product( DoubleArrayList data ) {
459 int size = data.size();
460 double[] elements = data.elements();
461
462 double product = 1;
463 for ( int i = size; --i >= 0; ) {
464 if ( Double.isNaN( elements[i] ) ) {
465 continue;
466 }
467 product *= elements[i];
468
469 }
470 return product;
471 }
472
473 /***
474 * Returns the <tt>phi-</tt> quantile; that is, an element <tt>elem</tt> for which holds that <tt>phi</tt>
475 * percent of data elements are less than <tt>elem</tt>. Missing values are ignored. The quantile need not
476 * necessarily be contained in the data sequence, it can be a linear interpolation.
477 *
478 * @param sortedData the data sequence; <b>must be sorted ascending </b>.
479 * @param phi the percentage; must satisfy <tt>0 <= phi <= 1</tt>.
480 * @todo possibly implement so a copy is not made.
481 * @return double
482 */
483 public static double quantile( DoubleArrayList sortedData, double phi ) {
484 return Descriptive.quantile( removeMissing( sortedData ), phi );
485 }
486
487 /***
488 * Returns how many percent of the elements contained in the receiver are <tt><= element</tt>. Does linear
489 * interpolation if the element is not contained but lies in between two contained elements. Missing values are
490 * ignored.
491 *
492 * @param sortedList the list to be searched (must be sorted ascending).
493 * @param element the element to search for.
494 * @return the percentage <tt>phi</tt> of elements <tt><= element</tt>(<tt>0.0 <= phi <= 1.0)</tt>.
495 */
496 public static double quantileInverse( DoubleArrayList sortedList,
497 double element ) {
498 return rankInterpolated( sortedList, element ) / sortedList.size();
499 }
500
501 /***
502 * Returns the quantiles of the specified percentages. The quantiles need not necessarily be contained in the data
503 * sequence, it can be a linear interpolation.
504 *
505 * @param sortedData the data sequence; <b>must be sorted ascending </b>.
506 * @param percentages the percentages for which quantiles are to be computed. Each percentage must be in the interval
507 * <tt>[0.0,1.0]</tt>.
508 * @return the quantiles.
509 */
510 public static DoubleArrayList quantiles( DoubleArrayList sortedData,
511 DoubleArrayList percentages ) {
512 int s = percentages.size();
513 DoubleArrayList quantiles = new DoubleArrayList( s );
514
515 for ( int i = 0; i < s; i++ ) {
516 quantiles.add( quantile( sortedData, percentages.get( i ) ) );
517 }
518
519 return quantiles;
520 }
521
522 /***
523 * Returns the linearly interpolated number of elements in a list less or equal to a given element. Missing values
524 * are ignored. The rank is the number of elements <= element. Ranks are of the form
525 * <tt>{0, 1, 2,..., sortedList.size()}</tt>. If no element is <= element, then the rank is zero. If the element
526 * lies in between two contained elements, then linear interpolation is used and a non integer value is returned.
527 *
528 * @param sortedList the list to be searched (must be sorted ascending).
529 * @param element the element to search for.
530 * @return the rank of the element.
531 * @todo possibly implement so a copy is not made.
532 */
533 public static double rankInterpolated( DoubleArrayList sortedList,
534 double element ) {
535 return Descriptive
536 .rankInterpolated( removeMissing( sortedList ), element );
537 }
538
539 /***
540 * Returns the sample kurtosis (aka excess) of a data sequence.
541 *
542 * @param data DoubleArrayList
543 * @param mean double
544 * @param sampleVariance double
545 * @return double
546 */
547 public static double sampleKurtosis( DoubleArrayList data, double mean,
548 double sampleVariance ) {
549 return sampleKurtosis( sizeWithoutMissingValues( data ), moment( data, 4,
550 mean ), sampleVariance );
551 }
552
553 /***
554 * Returns the sample skew of a data sequence.
555 *
556 * @param data DoubleArrayList
557 * @param mean double
558 * @param sampleVariance double
559 * @return double
560 */
561 public static double sampleSkew( DoubleArrayList data, double mean,
562 double sampleVariance ) {
563 return sampleSkew( sizeWithoutMissingValues( data ), moment( data, 3,
564 mean ), sampleVariance );
565 }
566
567 /***
568 * Returns the skew of a data sequence, which is <tt>moment(data,3,mean) /
569 * standardDeviation<sup>3</sup></tt>.
570 *
571 * @param data DoubleArrayList
572 * @param mean double
573 * @param standardDeviation double
574 * @return double
575 */
576 public static double skew( DoubleArrayList data, double mean,
577 double standardDeviation ) {
578 return skew( moment( data, 3, mean ), standardDeviation );
579 }
580
581 /***
582 * Returns the sample standard deviation.
583 * <p>
584 * This is included for compatibility with the superclass, but does not implement the correction used there.
585 *
586 * @see cern.jet.stat.Descriptive#sampleStandardDeviation(int, double)
587 * @param size the number of elements of the data sequence.
588 * @param sampleVariance the <b>sample variance </b>.
589 */
590 public static double sampleStandardDeviation( int size, double sampleVariance ) {
591 return Math.sqrt( sampleVariance );
592 }
593
594 /***
595 * Returns the sample variance of a data sequence. That is <tt>Sum (
596 * (data[i]-mean)^2 ) / (data.size()-1)</tt>.
597 *
598 * @param data DoubleArrayList
599 * @param mean double
600 * @return double
601 */
602 public static double sampleVariance( DoubleArrayList data, double mean ) {
603 double[] elements = data.elements();
604 int effsize = sizeWithoutMissingValues( data );
605 int size = data.size();
606 double sum = 0;
607
608 for ( int i = size; --i >= 0; ) {
609 if ( Double.isNaN( elements[i] ) ) {
610 continue;
611 }
612 double delta = elements[i] - mean;
613 sum += delta * delta;
614 }
615
616 return sum / ( effsize - 1 );
617 }
618
619 /***
620 * Modifies a data sequence to be standardized. Mising values are ignored. Changes each element <tt>data[i]</tt> as
621 * follows: <tt>data[i] = (data[i]-mean)/standardDeviation</tt>.
622 *
623 * @param data DoubleArrayList
624 * @param mean mean of data
625 * @param standardDeviation stdev of data
626 */
627 public static void standardize( DoubleArrayList data, double mean,
628 double standardDeviation ) {
629 double[] elements = data.elements();
630 for ( int i = data.size(); --i >= 0; ) {
631 if ( Double.isNaN( elements[i] ) ) {
632 continue;
633 }
634 elements[i] = ( elements[i] - mean ) / standardDeviation;
635 }
636 }
637
638 /***
639 * Standardize. Note that this does something slightly different than standardize in the superclass, because our
640 * sampleStandardDeviation does not use the correction of the superclass (which isn't really standard).
641 *
642 * @param data DoubleArrayList
643 */
644 public static void standardize( DoubleArrayList data ) {
645 double mean = mean( data );
646 double stdev = Math.sqrt( sampleVariance( data, mean ) );
647 DescriptiveWithMissing.standardize( data, mean, stdev );
648 }
649
650 /***
651 * Returns the sum of a data sequence. That is <tt>Sum( data[i] )</tt>.
652 *
653 * @param data DoubleArrayList
654 * @return double
655 */
656 public static double sum( DoubleArrayList data ) {
657 return sumOfPowerDeviations( data, 1, 0.0 );
658 }
659
660 /***
661 * Returns the sum of inversions of a data sequence, which is <tt>Sum( 1.0 /
662 * data[i])</tt>.
663 *
664 * @param data the data sequence.
665 * @param from the index of the first data element (inclusive).
666 * @param to the index of the last data element (inclusive).
667 * @return double
668 */
669 public static double sumOfInversions( DoubleArrayList data, int from, int to ) {
670 return sumOfPowerDeviations( data, -1, 0.0, from, to );
671 }
672
673 /***
674 * Returns the sum of logarithms of a data sequence, which is <tt>Sum(
675 * Log(data[i])</tt>. Missing values are
676 * ignored.
677 *
678 * @param data the data sequence.
679 * @param from the index of the first data element (inclusive).
680 * @param to the index of the last data element (inclusive).
681 * @return double
682 */
683 public static double sumOfLogarithms( DoubleArrayList data, int from, int to ) {
684 double[] elements = data.elements();
685 double logsum = 0;
686 for ( int i = from - 1; ++i <= to; ) {
687 if ( Double.isNaN( elements[i] ) ) {
688 continue;
689 }
690 logsum += Math.log( elements[i] );
691 }
692 return logsum;
693 }
694
695 /***
696 * Returns the sum of powers of a data sequence, which is <tt>Sum (
697 * data[i]<sup>k</sup> )</tt>.
698 *
699 * @param data DoubleArrayList
700 * @param k int
701 * @return double
702 */
703 public static double sumOfPowers( DoubleArrayList data, int k ) {
704 return sumOfPowerDeviations( data, k, 0 );
705 }
706
707 /***
708 * Returns the sum of squares of a data sequence. Skips missing values.
709 *
710 * @param data DoubleArrayList
711 * @return double
712 */
713 public static double sumOfSquares( DoubleArrayList data ) {
714 return sumOfPowerDeviations( data, 2, 0.0 );
715 }
716
717 /***
718 * Compute the sum of the squared deviations from the mean of a data sequence. Missing values are ignored.
719 *
720 * @param data DoubleArrayList
721 * @return double
722 */
723 public static double sumOfSquaredDeviations( DoubleArrayList data ) {
724 return sumOfSquaredDeviations( sizeWithoutMissingValues( data ),
725 variance( sizeWithoutMissingValues( data ), sum( data ),
726 sumOfSquares( data ) ) );
727 }
728
729 /***
730 * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt>; optimized for common parameters like <tt>c == 0.0</tt>
731 * and/or <tt>k == -2 .. 4</tt>.
732 *
733 * @param data DoubleArrayList
734 * @param k int
735 * @param c double
736 * @return double
737 */
738 public static double sumOfPowerDeviations( DoubleArrayList data, int k,
739 double c ) {
740 return sumOfPowerDeviations( data, k, c, 0, data.size() - 1 );
741 }
742
743 /***
744 * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt> for all <tt>i = from ..
745 * to</tt>; optimized for common
746 * parameters like <tt>c == 0.0</tt> and/or <tt>k == -2 .. 5</tt>. Missing values are ignored.
747 *
748 * @param data DoubleArrayList
749 * @param k int
750 * @param c double
751 * @param from int
752 * @param to int
753 * @return double
754 */
755 public static double sumOfPowerDeviations( final DoubleArrayList data,
756 final int k, final double c, final int from, final int to ) {
757 final double[] elements = data.elements();
758 double sum = 0;
759 double v;
760 int i;
761 switch ( k ) {
762 case -2:
763 if ( c == 0.0 ) {
764 for ( i = from - 1; ++i <= to; ) {
765 if ( Double.isNaN( elements[i] ) ) {
766 continue;
767 }
768 v = elements[i];
769 sum += 1 / ( v * v );
770 }
771 } else {
772 for ( i = from - 1; ++i <= to; ) {
773 if ( Double.isNaN( elements[i] ) ) {
774 continue;
775 }
776 v = elements[i] - c;
777 sum += 1 / ( v * v );
778 }
779 }
780 break;
781 case -1:
782 if ( c == 0.0 ) {
783 for ( i = from - 1; ++i <= to; ) {
784 if ( Double.isNaN( elements[i] ) ) {
785 continue;
786 }
787 sum += 1 / ( elements[i] );
788 }
789 } else {
790 for ( i = from - 1; ++i <= to; ) {
791 if ( Double.isNaN( elements[i] ) ) {
792 continue;
793 }
794 sum += 1 / ( elements[i] - c );
795 }
796 }
797 break;
798 case 0:
799 sum += to - from + 1;
800 break;
801 case 1:
802 if ( c == 0.0 ) {
803 for ( i = from - 1; ++i <= to; ) {
804 if ( Double.isNaN( elements[i] ) ) {
805 continue;
806 }
807 sum += elements[i];
808 }
809 } else {
810 for ( i = from - 1; ++i <= to; ) {
811 if ( Double.isNaN( elements[i] ) ) {
812 continue;
813 }
814 sum += elements[i] - c;
815 }
816 }
817 break;
818 case 2:
819 if ( c == 0.0 ) {
820 for ( i = from - 1; ++i <= to; ) {
821 if ( Double.isNaN( elements[i] ) ) {
822 continue;
823 }
824 v = elements[i];
825 sum += v * v;
826 }
827 } else {
828 for ( i = from - 1; ++i <= to; ) {
829 if ( Double.isNaN( elements[i] ) ) {
830 continue;
831 }
832 v = elements[i] - c;
833 sum += v * v;
834 }
835 }
836 break;
837 case 3:
838 if ( c == 0.0 ) {
839 for ( i = from - 1; ++i <= to; ) {
840 v = elements[i];
841 sum += v * v * v;
842 }
843 } else {
844 for ( i = from - 1; ++i <= to; ) {
845 if ( Double.isNaN( elements[i] ) ) {
846 continue;
847 }
848 v = elements[i] - c;
849 sum += v * v * v;
850 }
851 }
852 break;
853 case 4:
854 if ( c == 0.0 ) {
855 for ( i = from - 1; ++i <= to; ) {
856 if ( Double.isNaN( elements[i] ) ) {
857 continue;
858 }
859 v = elements[i];
860 sum += v * v * v * v;
861 }
862 } else {
863 for ( i = from - 1; ++i <= to; ) {
864 if ( Double.isNaN( elements[i] ) ) {
865 continue;
866 }
867 v = elements[i] - c;
868 sum += v * v * v * v;
869 }
870 }
871 break;
872 case 5:
873 if ( c == 0.0 ) {
874 for ( i = from - 1; ++i <= to; ) {
875 if ( Double.isNaN( elements[i] ) ) {
876 continue;
877 }
878 v = elements[i];
879 sum += v * v * v * v * v;
880 }
881 } else {
882 for ( i = from - 1; ++i <= to; ) {
883 if ( Double.isNaN( elements[i] ) ) {
884 continue;
885 }
886 v = elements[i] - c;
887 sum += v * v * v * v * v;
888 }
889 }
890 break;
891 default:
892 for ( i = from - 1; ++i <= to; ) {
893 if ( Double.isNaN( elements[i] ) ) {
894 continue;
895 }
896 sum += Math.pow( elements[i] - c, k );
897 }
898 break;
899 }
900 return sum;
901 }
902
903 /***
904 * Return the size of the list, ignoring missing values.
905 *
906 * @param list DoubleArrayList
907 * @return int
908 */
909 public static int sizeWithoutMissingValues( DoubleArrayList list ) {
910
911 int size = 0;
912 for ( int i = 0; i < list.size(); i++ ) {
913 if ( !Double.isNaN( list.get( i ) ) ) {
914 size++;
915 }
916 }
917 return size;
918 }
919
920 /***
921 * Returns the trimmed mean of a sorted data sequence. Missing values are completely ignored.
922 *
923 * @param sortedData the data sequence; <b>must be sorted ascending </b>.
924 * @param mean the mean of the (full) sorted data sequence.
925 * @param left int the number of leading elements to trim.
926 * @param right int number of trailing elements to trim.
927 * @return double
928 */
929 public static double trimmedMean( DoubleArrayList sortedData, double mean,
930 int left, int right ) {
931 return Descriptive.trimmedMean( removeMissing( sortedData ), mean, left,
932 right );
933 }
934
935 /***
936 * Provided for convenience!
937 *
938 * @param data DoubleArrayList
939 * @return double
940 */
941 public static double variance( DoubleArrayList data ) {
942 return variance( sizeWithoutMissingValues( data ), sum( data ),
943 sumOfSquares( data ) );
944 }
945
946 /***
947 * Returns the weighted mean of a data sequence. That is <tt> Sum (data[i] *
948 * weights[i]) / Sum ( weights[i] )</tt>.
949 *
950 * @param data DoubleArrayList
951 * @param weights DoubleArrayList
952 * @return double
953 */
954 public static double weightedMean( DoubleArrayList data,
955 DoubleArrayList weights ) {
956 int size = data.size();
957 if ( size != weights.size() || size == 0 ) {
958 throw new IllegalArgumentException();
959 }
960
961 double[] elements = data.elements();
962 double[] theWeights = weights.elements();
963 double sum = 0.0;
964 double weightsSum = 0.0;
965 for ( int i = size; --i >= 0; ) {
966 double w = theWeights[i];
967 if ( Double.isNaN( elements[i] ) ) {
968 continue;
969 }
970 sum += elements[i] * w;
971 weightsSum += w;
972 }
973
974 return sum / weightsSum;
975 }
976
977 /***
978 * <b>Not supported. </b>
979 *
980 * @param sortedData DoubleArrayList
981 * @param mean double
982 * @param left int
983 * @param right int
984 * @return double
985 */
986 public static double winsorizedMean( DoubleArrayList sortedData,
987 double mean, int left, int right ) {
988 throw new UnsupportedOperationException(
989 "winsorizedMean not supported with missing values" );
990 }
991
992
993
994 /***
995 * Convenience function for internal use. Makes a copy of the list that doesn't have the missing values.
996 *
997 * @param data DoubleArrayList
998 * @return DoubleArrayList
999 */
1000 private static DoubleArrayList removeMissing( DoubleArrayList data ) {
1001 DoubleArrayList r = new DoubleArrayList( sizeWithoutMissingValues( data ) );
1002 double[] elements = data.elements();
1003 int size = data.size();
1004 for ( int i = 0; i < size; i++ ) {
1005 if ( Double.isNaN( elements[i] ) ) {
1006 continue;
1007 }
1008 r.add( elements[i] );
1009 }
1010 return r;
1011 }
1012
1013 }