Clover coverage report - baseCode - 0.2.5
Coverage timestamp: Tue Apr 12 2005 11:31:58 EDT
file stats: LOC: 1,014   Methods: 45
NCLOC: 539   Classes: 1
30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover
 
 Source file Conditionals Statements Methods TOTAL
DescriptiveWithMissing.java 39.2% 50.8% 48.9% 47.1%
coverage coverage
 1   
 package baseCode.math;
 2   
 
 3   
 import cern.colt.list.DoubleArrayList;
 4   
 import cern.jet.stat.Descriptive;
 5   
 
 6   
 ;
 7   
 
 8   
 /**
 9   
  * Mathematical functions for statistics that allow missing values without scotching the calculations.
 10   
  * <p>
 11   
  * Be careful because some methods from cern.jet.stat.Descriptive have not been overridden and will yield a
 12   
  * UnsupportedOperationException if used.
 13   
  * </p>
 14   
  * <p>
 15   
  * Some functions that come with DoubleArrayLists will not work in an entirely compatible way with missing values. For
 16   
  * examples, size() reports the total number of elements, including missing values. To get a count of non-missing
 17   
  * values, use this.sizeWithoutMissingValues(). The right one to use may vary.
 18   
  * </p>
 19   
  * <p>
 20   
  * Not all methods need to be overridden. However, all methods that take a "size" parameter should be passed the results
 21   
  * of sizeWithoutMissingValues(data), instead of data.size().
 22   
  * </p>
 23   
  * <p>
 24   
  * Copyright � 2004 Columbia University
 25   
  * <p>
 26   
  * Based in part on code from the colt package: Copyright � 1999 CERN - European Organization for Nuclear Research.
 27   
  * 
 28   
  * @see <a
 29   
  *      href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/Descriptive.html">cern.jet.stat.Descriptive
 30   
  *      </a>
 31   
  * @author Paul Pavlidis
 32   
  * @version $Id: DescriptiveWithMissing.java,v 1.18 2005/01/05 02:01:02 pavlidis Exp $
 33   
  */
 34   
 public class DescriptiveWithMissing extends cern.jet.stat.Descriptive {
 35   
 
 36  0
    private DescriptiveWithMissing() {
 37   
    }
 38   
 
 39   
    /**
 40   
     * <b>Not supported. </b>
 41   
     * 
 42   
     * @param data DoubleArrayList
 43   
     * @param lag int
 44   
     * @param mean double
 45   
     * @param variance double
 46   
     * @return double
 47   
     */
 48  0
    public static double autoCorrelation( DoubleArrayList data, int lag,
 49   
          double mean, double variance ) {
 50  0
       throw new UnsupportedOperationException(
 51   
             "autoCorrelation not supported with missing values" );
 52   
    }
 53   
 
 54   
    /**
 55   
     * Returns the correlation of two data sequences. That is
 56   
     * <tt>covariance(data1,data2)/(standardDev1*standardDev2)</tt>. Missing values are ignored. This method is
 57   
     * overridden to stop users from using the method in the superclass when missing values are present. The problem is
 58   
     * that the standard deviation cannot be computed without knowning which values are not missing in both vectors to be
 59   
     * compared. Thus the standardDev parameters are thrown away by this method.
 60   
     * 
 61   
     * @param data1 DoubleArrayList
 62   
     * @param standardDev1 double - not used
 63   
     * @param data2 DoubleArrayList
 64   
     * @param standardDev2 double - not used
 65   
     * @return double
 66   
     */
 67  1
    public static double correlation( DoubleArrayList data1,
 68   
          double standardDev1, DoubleArrayList data2, double standardDev2 ) {
 69  1
       return correlation( data1, data2 );
 70   
    }
 71   
 
 72   
    /**
 73   
     * Calculate the pearson correlation of two arrays. Missing values (NaNs) are ignored.
 74   
     * 
 75   
     * @param x DoubleArrayList
 76   
     * @param y DoubleArrayList
 77   
     * @return double
 78   
     */
 79  437
    public static double correlation( DoubleArrayList x, DoubleArrayList y ) {
 80  437
       int j;
 81  437
       double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
 82  437
       int numused;
 83  437
       syy = 0.0;
 84  437
       sxy = 0.0;
 85  437
       sxx = 0.0;
 86  437
       sx = 0.0;
 87  437
       sy = 0.0;
 88  437
       numused = 0;
 89  437
       if ( x.size() != y.size() ) {
 90  0
          throw new ArithmeticException("Unequal vector sizes: " + x.size() + " != " + y.size());
 91   
       }
 92   
 
 93  437
       double[] xel = x.elements();
 94  437
       double[] yel = y.elements();
 95   
 
 96  437
       int length = x.size();
 97  437
       for ( j = 0; j < length; j++ ) {
 98  5232
          xj = xel[j];
 99  5232
          yj = yel[j];
 100   
 
 101  5232
          if ( !Double.isNaN( xj ) && !Double.isNaN( yj ) ) {
 102  5226
             sx += xj;
 103  5226
             sy += yj;
 104  5226
             sxy += xj * yj;
 105  5226
             sxx += xj * xj;
 106  5226
             syy += yj * yj;
 107  5226
             numused++;
 108   
          }
 109   
       }
 110   
 
 111  437
       if ( numused > 0 ) {
 112  437
          ay = sy / numused;
 113  437
          ax = sx / numused;
 114  437
          return ( sxy - sx * ay )
 115   
                / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
 116   
       }
 117  0
       return -2.0; // signifies that it could not be calculated.
 118   
 
 119   
    }
 120   
 
 121   
    /**
 122   
     * Returns the SAMPLE covariance of two data sequences. Pairs of values are only considered if both are not NaN. If there
 123   
     * are no non-missing pairs, the covariance is zero.
 124   
     * 
 125   
     * @param data1 the first vector
 126   
     * @param data2 the second vector
 127   
     * @return double
 128   
     */
 129  1
    public static double covariance( DoubleArrayList data1, DoubleArrayList data2 ) {
 130  1
       int size = data1.size();
 131  1
       if ( size != data2.size() || size == 0 ) {
 132  0
          throw new IllegalArgumentException();
 133   
       }
 134  1
       double[] elements1 = data1.elements();
 135  1
       double[] elements2 = data2.elements();
 136   
 
 137   
       /* initialize sumx and sumy to the first non-NaN pair of values */
 138   
 
 139  1
       int i = 0;
 140  1
       double sumx = 0.0, sumy = 0.0, Sxy = 0.0;
 141  3
       while ( i < size ) {
 142  3
          sumx = elements1[i];
 143  3
          sumy = elements2[i];
 144  3
          if ( !Double.isNaN( elements1[i] ) && !Double.isNaN( elements2[i] ) ) {
 145  1
             break;
 146   
          }
 147  2
          i++;
 148   
       }
 149  1
       i++;
 150  1
       int usedPairs = 1;
 151  1
       for ( ; i < size; ++i ) {
 152  3
          double x = elements1[i];
 153  3
          double y = elements2[i];
 154  3
          if ( Double.isNaN( x ) || Double.isNaN( y ) ) {
 155  1
             continue;
 156   
          }
 157   
 
 158  2
          sumx += x;
 159  2
          Sxy += ( x - sumx / ( usedPairs + 1 ) ) * ( y - sumy / usedPairs );
 160  2
          sumy += y;
 161  2
          usedPairs++;
 162   
       }
 163  1
       return Sxy / (usedPairs - 1);
 164   
    }
 165   
 
 166   
    /**
 167   
     * Durbin-Watson computation. This measures the serial correlation in a data series.
 168   
     * 
 169   
     * @param data DoubleArrayList
 170   
     * @return double
 171   
     * @todo this will still break in some situations where there are missing values
 172   
     */
 173  2
    public static double durbinWatson( DoubleArrayList data ) {
 174  2
       int size = data.size();
 175  2
       if ( size < 2 ) {
 176  0
          throw new IllegalArgumentException(
 177   
                "data sequence must contain at least two values." );
 178   
       }
 179   
 
 180  2
       double[] elements = data.elements();
 181  2
       double run = 0;
 182  2
       double run_sq = 0;
 183  2
       int firstNotNaN = 0;
 184  4
       while ( firstNotNaN < size ) {
 185  4
          run_sq = elements[firstNotNaN] * elements[firstNotNaN];
 186   
 
 187  4
          if ( !Double.isNaN( elements[firstNotNaN] ) ) {
 188  2
             break;
 189   
          }
 190   
 
 191  2
          firstNotNaN++;
 192   
       }
 193   
 
 194  2
       if ( firstNotNaN > 0 && size - firstNotNaN < 2 ) {
 195  0
          throw new IllegalArgumentException(
 196   
                "data sequence must contain at least two non-missing values." );
 197   
 
 198   
       }
 199   
 
 200  2
       for ( int i = firstNotNaN + 1; i < size; i++ ) {
 201  6
          int gap = 1;
 202  6
          while ( Double.isNaN( elements[i] ) ) {
 203  2
             gap++;
 204  2
             i++;
 205  2
             continue;
 206   
          }
 207  6
          double x = elements[i] - elements[i - gap];
 208   
          /**  */
 209  6
          run += x * x;
 210  6
          run_sq += elements[i] * elements[i];
 211   
       }
 212   
 
 213  2
       return run / run_sq;
 214   
    }
 215   
 
 216   
    /**
 217   
     * Returns the geometric mean of a data sequence. Missing values are ignored. Note that for a geometric mean to be
 218   
     * meaningful, the minimum of the data sequence must not be less or equal to zero. <br>
 219   
     * The geometric mean is given by <tt>pow( Product( data[i] ),
 220   
     * 1/data.size())</tt>. This method tries to avoid
 221   
     * overflows at the expense of an equivalent but somewhat slow definition: <tt>geo = Math.exp( Sum(
 222   
     * Log(data[i]) ) / data.size())</tt>.
 223   
     * 
 224   
     * @param data DoubleArrayList
 225   
     * @return double
 226   
     */
 227  1
    public static double geometricMean( DoubleArrayList data ) {
 228  1
       return geometricMean( sizeWithoutMissingValues( data ), sumOfLogarithms(
 229   
             data, 0, data.size() - 1 ) );
 230   
    }
 231   
 
 232   
    /**
 233   
     * <b>Not supported. </b>
 234   
     * 
 235   
     * @param data DoubleArrayList
 236   
     * @param from int
 237   
     * @param to int
 238   
     * @param inOut double[]
 239   
     */
 240  0
    public static void incrementalUpdate( DoubleArrayList data, int from,
 241   
          int to, double[] inOut ) {
 242  0
       throw new UnsupportedOperationException(
 243   
             "incrementalUpdate not supported with missing values" );
 244   
    }
 245   
 
 246   
    /**
 247   
     * <b>Not supported. </b>
 248   
     * 
 249   
     * @param data DoubleArrayList
 250   
     * @param from int
 251   
     * @param to int
 252   
     * @param fromSumIndex int
 253   
     * @param toSumIndex int
 254   
     * @param sumOfPowers double[]
 255   
     */
 256  0
    public static void incrementalUpdateSumsOfPowers( DoubleArrayList data,
 257   
          int from, int to, int fromSumIndex, int toSumIndex,
 258   
          double[] sumOfPowers ) {
 259  0
       throw new UnsupportedOperationException(
 260   
             "incrementalUpdateSumsOfPowers not supported with missing values" );
 261   
    }
 262   
 
 263   
    /**
 264   
     * <b>Not supported. </b>
 265   
     * 
 266   
     * @param data DoubleArrayList
 267   
     * @param weights DoubleArrayList
 268   
     * @param from int
 269   
     * @param to int
 270   
     * @param inOut double[]
 271   
     */
 272  0
    public static void incrementalWeightedUpdate( DoubleArrayList data,
 273   
          DoubleArrayList weights, int from, int to, double[] inOut ) {
 274  0
       throw new UnsupportedOperationException(
 275   
             "incrementalWeightedUpdate not supported with missing values" );
 276   
    }
 277   
 
 278   
    /**
 279   
     * Returns the kurtosis (aka excess) of a data sequence.
 280   
     * 
 281   
     * @param moment4 the fourth central moment, which is <tt>moment(data,4,mean)</tt>.
 282   
     * @param standardDeviation the standardDeviation.
 283   
     * @return double
 284   
     */
 285  0
    public static double kurtosis( double moment4, double standardDeviation ) {
 286  0
       return -3
 287   
             + moment4
 288   
             / ( standardDeviation * standardDeviation * standardDeviation * standardDeviation );
 289   
    }
 290   
 
 291   
    /**
 292   
     * Returns the kurtosis (aka excess) of a data sequence, which is <tt>-3 +
 293   
     * moment(data,4,mean) / standardDeviation<sup>4</sup></tt>.
 294   
     * 
 295   
     * @param data DoubleArrayList
 296   
     * @param mean double
 297   
     * @param standardDeviation double
 298   
     * @return double
 299   
     */
 300  0
    public static double kurtosis( DoubleArrayList data, double mean,
 301   
          double standardDeviation ) {
 302  0
       return kurtosis( moment( data, 4, mean ), standardDeviation );
 303   
    }
 304   
 
 305   
    /**
 306   
     * <b>Not supported. </b>
 307   
     * 
 308   
     * @param data DoubleArrayList
 309   
     * @param mean double
 310   
     * @return double
 311   
     */
 312  0
    public static double lag1( DoubleArrayList data, double mean ) {
 313  0
       throw new UnsupportedOperationException(
 314   
             "lag1 not supported with missing values" );
 315   
    }
 316   
 
 317   
    /**
 318   
     * @param data Values to be analyzed.
 319   
     * @return Mean of the values in x. Missing values are ignored in the analysis.
 320   
     */
 321  127
    public static double mean( DoubleArrayList data ) {
 322  127
       return sum( data ) / sizeWithoutMissingValues( data );
 323   
    }
 324   
 
 325   
    /**
 326   
     * Special mean calculation where we use the effective size as an input.
 327   
     * 
 328   
     * @param x The data
 329   
     * @param effectiveSize The effective size used for the mean calculation.
 330   
     * @return double
 331   
     */
 332  0
    public static double mean( DoubleArrayList x, int effectiveSize ) {
 333   
 
 334  0
       int length = x.size();
 335   
 
 336  0
       if ( 0 == effectiveSize ) {
 337  0
          return Double.NaN;
 338   
       }
 339   
 
 340  0
       double sum = 0.0;
 341  0
       int i, count;
 342  0
       count = 0;
 343  0
       double value;
 344  0
       double[] elements = x.elements();
 345  0
       for ( i = 0; i < length; i++ ) {
 346  0
          value = elements[i];
 347  0
          if ( Double.isNaN( value ) ) {
 348  0
             continue;
 349   
          }
 350  0
          sum += value;
 351  0
          count++;
 352   
       }
 353  0
       if ( 0.0 == count ) {
 354  0
          return Double.NaN;
 355   
       }
 356  0
       return sum / effectiveSize;
 357   
 
 358   
    }
 359   
 
 360   
    /**
 361   
     * Special mean calculation where we use the effective size as an input.
 362   
     * 
 363   
     * @param elements The data double array.
 364   
     * @param effectiveSize The effective size used for the mean calculation.
 365   
     * @return double
 366   
     */
 367  0
    public static double mean( double[] elements, int effectiveSize ) {
 368   
 
 369  0
       int length = elements.length;
 370   
 
 371  0
       if ( 0 == effectiveSize ) {
 372  0
          return Double.NaN;
 373   
       }
 374   
 
 375  0
       double sum = 0.0;
 376  0
       int i, count;
 377  0
       count = 0;
 378  0
       double value;
 379  0
       for ( i = 0; i < length; i++ ) {
 380  0
          value = elements[i];
 381  0
          if ( Double.isNaN( value ) ) {
 382  0
             continue;
 383   
          }
 384  0
          sum += value;
 385  0
          count++;
 386   
       }
 387  0
       if ( 0.0 == count ) {
 388  0
          return Double.NaN;
 389   
       }
 390  0
       return sum / effectiveSize;
 391   
    }
 392   
 
 393   
    /**
 394   
     * Calculate the mean of the values above a particular quantile of an array.
 395   
     * 
 396   
     * @param quantile A value from 0 to 100
 397   
     * @param array Array for which we want to get the quantile.
 398   
     * @return double
 399   
     */
 400  0
    public static double meanAboveQuantile( int quantile, DoubleArrayList array ) {
 401   
 
 402  0
       if ( quantile < 0 || quantile > 100 ) {
 403  0
          throw new IllegalArgumentException(
 404   
                "Quantile must be between 0 and 100" );
 405   
       }
 406   
 
 407  0
       double returnvalue = 0.0;
 408  0
       int k = 0;
 409   
 
 410  0
       double median = Descriptive.quantile( array, quantile );
 411   
 
 412  0
       for ( int i = 0; i < array.size(); i++ ) {
 413  0
          if ( array.get( i ) >= median ) {
 414  0
             returnvalue += array.get( i );
 415  0
             k++;
 416   
          }
 417   
       }
 418   
 
 419  0
       if ( k == 0 ) {
 420  0
          throw new ArithmeticException( "No values found above quantile" );
 421   
       }
 422   
 
 423  0
       return ( returnvalue / k );
 424   
    }
 425   
 
 426   
    /**
 427   
     * Returns the median of a sorted data sequence. Missing values are not considered.
 428   
     * 
 429   
     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
 430   
     * @return double
 431   
     */
 432  61
    public static double median( DoubleArrayList sortedData ) {
 433  61
       return quantile( sortedData, 0.5 );
 434   
    }
 435   
 
 436   
    /**
 437   
     * Returns the moment of <tt>k</tt> -th order with constant <tt>c</tt> of a data sequence, which is
 438   
     * <tt>Sum( (data[i]-c)<sup>k</sup> ) /
 439   
     * data.size()</tt>.
 440   
     * 
 441   
     * @param data DoubleArrayList
 442   
     * @param k int
 443   
     * @param c double
 444   
     * @return double
 445   
     */
 446  1
    public static double moment( DoubleArrayList data, int k, double c ) {
 447  1
       return sumOfPowerDeviations( data, k, c )
 448   
             / sizeWithoutMissingValues( data );
 449   
    }
 450   
 
 451   
    /**
 452   
     * Returns the product of a data sequence, which is <tt>Prod( data[i] )</tt>. Missing values are ignored. In other
 453   
     * words: <tt>data[0]*data[1]*...*data[data.size()-1]</tt>. Note that you may easily get numeric overflows.
 454   
     * 
 455   
     * @param data DoubleArrayList
 456   
     * @return double
 457   
     */
 458  1
    public static double product( DoubleArrayList data ) {
 459  1
       int size = data.size();
 460  1
       double[] elements = data.elements();
 461   
 
 462  1
       double product = 1;
 463  1
       for ( int i = size; --i >= 0; ) {
 464  6
          if ( Double.isNaN( elements[i] ) ) {
 465  1
             continue;
 466   
          }
 467  5
          product *= elements[i];
 468   
 
 469   
       }
 470  1
       return product;
 471   
    }
 472   
 
 473   
    /**
 474   
     * Returns the <tt>phi-</tt> quantile; that is, an element <tt>elem</tt> for which holds that <tt>phi</tt>
 475   
     * percent of data elements are less than <tt>elem</tt>. Missing values are ignored. The quantile need not
 476   
     * necessarily be contained in the data sequence, it can be a linear interpolation.
 477   
     * 
 478   
     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
 479   
     * @param phi the percentage; must satisfy <tt>0 &lt;= phi &lt;= 1</tt>.
 480   
     * @todo possibly implement so a copy is not made.
 481   
     * @return double
 482   
     */
 483  62
    public static double quantile( DoubleArrayList sortedData, double phi ) {
 484  62
       return Descriptive.quantile( removeMissing( sortedData ), phi );
 485   
    }
 486   
 
 487   
    /**
 488   
     * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
 489   
     * interpolation if the element is not contained but lies in between two contained elements. Missing values are
 490   
     * ignored.
 491   
     * 
 492   
     * @param sortedList the list to be searched (must be sorted ascending).
 493   
     * @param element the element to search for.
 494   
     * @return the percentage <tt>phi</tt> of elements <tt>&lt;= element</tt>(<tt>0.0 &lt;= phi &lt;= 1.0)</tt>.
 495   
     */
 496  0
    public static double quantileInverse( DoubleArrayList sortedList,
 497   
          double element ) {
 498  0
       return rankInterpolated( sortedList, element ) / sortedList.size();
 499   
    }
 500   
 
 501   
    /**
 502   
     * Returns the quantiles of the specified percentages. The quantiles need not necessarily be contained in the data
 503   
     * sequence, it can be a linear interpolation.
 504   
     * 
 505   
     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
 506   
     * @param percentages the percentages for which quantiles are to be computed. Each percentage must be in the interval
 507   
     *        <tt>[0.0,1.0]</tt>.
 508   
     * @return the quantiles.
 509   
     */
 510  0
    public static DoubleArrayList quantiles( DoubleArrayList sortedData,
 511   
          DoubleArrayList percentages ) {
 512  0
       int s = percentages.size();
 513  0
       DoubleArrayList quantiles = new DoubleArrayList( s );
 514   
 
 515  0
       for ( int i = 0; i < s; i++ ) {
 516  0
          quantiles.add( quantile( sortedData, percentages.get( i ) ) );
 517   
       }
 518   
 
 519  0
       return quantiles;
 520   
    }
 521   
 
 522   
    /**
 523   
     * Returns the linearly interpolated number of elements in a list less or equal to a given element. Missing values
 524   
     * are ignored. The rank is the number of elements <= element. Ranks are of the form
 525   
     * <tt>{0, 1, 2,..., sortedList.size()}</tt>. If no element is <= element, then the rank is zero. If the element
 526   
     * lies in between two contained elements, then linear interpolation is used and a non integer value is returned.
 527   
     * 
 528   
     * @param sortedList the list to be searched (must be sorted ascending).
 529   
     * @param element the element to search for.
 530   
     * @return the rank of the element.
 531   
     * @todo possibly implement so a copy is not made.
 532   
     */
 533  0
    public static double rankInterpolated( DoubleArrayList sortedList,
 534   
          double element ) {
 535  0
       return Descriptive
 536   
             .rankInterpolated( removeMissing( sortedList ), element );
 537   
    }
 538   
 
 539   
    /**
 540   
     * Returns the sample kurtosis (aka excess) of a data sequence.
 541   
     * 
 542   
     * @param data DoubleArrayList
 543   
     * @param mean double
 544   
     * @param sampleVariance double
 545   
     * @return double
 546   
     */
 547  1
    public static double sampleKurtosis( DoubleArrayList data, double mean,
 548   
          double sampleVariance ) {
 549  1
       return sampleKurtosis( sizeWithoutMissingValues( data ), moment( data, 4,
 550   
             mean ), sampleVariance );
 551   
    }
 552   
 
 553   
    /**
 554   
     * Returns the sample skew of a data sequence.
 555   
     * 
 556   
     * @param data DoubleArrayList
 557   
     * @param mean double
 558   
     * @param sampleVariance double
 559   
     * @return double
 560   
     */
 561  0
    public static double sampleSkew( DoubleArrayList data, double mean,
 562   
          double sampleVariance ) {
 563  0
       return sampleSkew( sizeWithoutMissingValues( data ), moment( data, 3,
 564   
             mean ), sampleVariance );
 565   
    }
 566   
 
 567   
    /**
 568   
     * Returns the skew of a data sequence, which is <tt>moment(data,3,mean) /
 569   
     * standardDeviation<sup>3</sup></tt>.
 570   
     * 
 571   
     * @param data DoubleArrayList
 572   
     * @param mean double
 573   
     * @param standardDeviation double
 574   
     * @return double
 575   
     */
 576  0
    public static double skew( DoubleArrayList data, double mean,
 577   
          double standardDeviation ) {
 578  0
       return skew( moment( data, 3, mean ), standardDeviation );
 579   
    }
 580   
 
 581   
    /**
 582   
     * Returns the sample standard deviation.
 583   
     * <p>
 584   
     * This is included for compatibility with the superclass, but does not implement the correction used there.
 585   
     * 
 586   
     * @see cern.jet.stat.Descriptive#sampleStandardDeviation(int, double)
 587   
     * @param size the number of elements of the data sequence.
 588   
     * @param sampleVariance the <b>sample variance </b>.
 589   
     */
 590  0
    public static double sampleStandardDeviation( int size, double sampleVariance ) {
 591  0
       return Math.sqrt( sampleVariance );
 592   
    }
 593   
 
 594   
    /**
 595   
     * Returns the sample variance of a data sequence. That is <tt>Sum (
 596   
     * (data[i]-mean)^2 ) / (data.size()-1)</tt>.
 597   
     * 
 598   
     * @param data DoubleArrayList
 599   
     * @param mean double
 600   
     * @return double
 601   
     */
 602  34
    public static double sampleVariance( DoubleArrayList data, double mean ) {
 603  34
       double[] elements = data.elements();
 604  34
       int effsize = sizeWithoutMissingValues( data );
 605  34
       int size = data.size();
 606  34
       double sum = 0;
 607   
       // find the sum of the squares
 608  34
       for ( int i = size; --i >= 0; ) {
 609  383
          if ( Double.isNaN( elements[i] ) ) {
 610  3
             continue;
 611   
          }
 612  380
          double delta = elements[i] - mean;
 613  380
          sum += delta * delta;
 614   
       }
 615   
 
 616  34
       return sum / ( effsize - 1 );
 617   
    }
 618   
 
 619   
    /**
 620   
     * Modifies a data sequence to be standardized. Mising values are ignored. Changes each element <tt>data[i]</tt> as
 621   
     * follows: <tt>data[i] = (data[i]-mean)/standardDeviation</tt>.
 622   
     * 
 623   
     * @param data DoubleArrayList
 624   
     * @param mean mean of data
 625   
     * @param standardDeviation stdev of data
 626   
     */
 627  1
    public static void standardize( DoubleArrayList data, double mean,
 628   
          double standardDeviation ) {
 629  1
       double[] elements = data.elements();
 630  1
       for ( int i = data.size(); --i >= 0; ) {
 631  6
          if ( Double.isNaN( elements[i] ) ) {
 632  1
             continue;
 633   
          }
 634  5
          elements[i] = ( elements[i] - mean ) / standardDeviation;
 635   
       }
 636   
    }
 637   
 
 638   
    /**
 639   
     * Standardize. Note that this does something slightly different than standardize in the superclass, because our
 640   
     * sampleStandardDeviation does not use the correction of the superclass (which isn't really standard).
 641   
     * 
 642   
     * @param data DoubleArrayList
 643   
     */
 644  1
    public static void standardize( DoubleArrayList data ) {
 645  1
       double mean = mean( data );
 646  1
       double stdev = Math.sqrt( sampleVariance( data, mean ) );
 647  1
       DescriptiveWithMissing.standardize( data, mean, stdev );
 648   
    }
 649   
 
 650   
    /**
 651   
     * Returns the sum of a data sequence. That is <tt>Sum( data[i] )</tt>.
 652   
     * 
 653   
     * @param data DoubleArrayList
 654   
     * @return double
 655   
     */
 656  159
    public static double sum( DoubleArrayList data ) {
 657  159
       return sumOfPowerDeviations( data, 1, 0.0 );
 658   
    }
 659   
 
 660   
    /**
 661   
     * Returns the sum of inversions of a data sequence, which is <tt>Sum( 1.0 /
 662   
     * data[i])</tt>.
 663   
     * 
 664   
     * @param data the data sequence.
 665   
     * @param from the index of the first data element (inclusive).
 666   
     * @param to the index of the last data element (inclusive).
 667   
     * @return double
 668   
     */
 669  0
    public static double sumOfInversions( DoubleArrayList data, int from, int to ) {
 670  0
       return sumOfPowerDeviations( data, -1, 0.0, from, to );
 671   
    }
 672   
 
 673   
    /**
 674   
     * Returns the sum of logarithms of a data sequence, which is <tt>Sum(
 675   
     * Log(data[i])</tt>. Missing values are
 676   
     * ignored.
 677   
     * 
 678   
     * @param data the data sequence.
 679   
     * @param from the index of the first data element (inclusive).
 680   
     * @param to the index of the last data element (inclusive).
 681   
     * @return double
 682   
     */
 683  1
    public static double sumOfLogarithms( DoubleArrayList data, int from, int to ) {
 684  1
       double[] elements = data.elements();
 685  1
       double logsum = 0;
 686  1
       for ( int i = from - 1; ++i <= to; ) {
 687  6
          if ( Double.isNaN( elements[i] ) ) {
 688  1
             continue;
 689   
          }
 690  5
          logsum += Math.log( elements[i] );
 691   
       }
 692  1
       return logsum;
 693   
    }
 694   
 
 695   
    /**
 696   
     * Returns the sum of powers of a data sequence, which is <tt>Sum (
 697   
     * data[i]<sup>k</sup> )</tt>.
 698   
     * 
 699   
     * @param data DoubleArrayList
 700   
     * @param k int
 701   
     * @return double
 702   
     */
 703  0
    public static double sumOfPowers( DoubleArrayList data, int k ) {
 704  0
       return sumOfPowerDeviations( data, k, 0 );
 705   
    }
 706   
 
 707   
    /**
 708   
     * Returns the sum of squares of a data sequence. Skips missing values.
 709   
     * 
 710   
     * @param data DoubleArrayList
 711   
     * @return double
 712   
     */
 713  62
    public static double sumOfSquares( DoubleArrayList data ) {
 714  62
       return sumOfPowerDeviations( data, 2, 0.0 );
 715   
    }
 716   
 
 717   
    /**
 718   
     * Compute the sum of the squared deviations from the mean of a data sequence. Missing values are ignored.
 719   
     * 
 720   
     * @param data DoubleArrayList
 721   
     * @return double
 722   
     */
 723  0
    public static double sumOfSquaredDeviations( DoubleArrayList data ) {
 724  0
       return sumOfSquaredDeviations( sizeWithoutMissingValues( data ),
 725   
             variance( sizeWithoutMissingValues( data ), sum( data ),
 726   
                   sumOfSquares( data ) ) );
 727   
    }
 728   
 
 729   
    /**
 730   
     * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt>; optimized for common parameters like <tt>c == 0.0</tt>
 731   
     * and/or <tt>k == -2 .. 4</tt>.
 732   
     * 
 733   
     * @param data DoubleArrayList
 734   
     * @param k int
 735   
     * @param c double
 736   
     * @return double
 737   
     */
 738  222
    public static double sumOfPowerDeviations( DoubleArrayList data, int k,
 739   
          double c ) {
 740  222
       return sumOfPowerDeviations( data, k, c, 0, data.size() - 1 );
 741   
    }
 742   
 
 743   
    /**
 744   
     * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt> for all <tt>i = from ..
 745   
     * to</tt>; optimized for common
 746   
     * parameters like <tt>c == 0.0</tt> and/or <tt>k == -2 .. 5</tt>. Missing values are ignored.
 747   
     * 
 748   
     * @param data DoubleArrayList
 749   
     * @param k int
 750   
     * @param c double
 751   
     * @param from int
 752   
     * @param to int
 753   
     * @return double
 754   
     */
 755  222
    public static double sumOfPowerDeviations( final DoubleArrayList data,
 756   
          final int k, final double c, final int from, final int to ) {
 757  222
       final double[] elements = data.elements();
 758  222
       double sum = 0;
 759  222
       double v;
 760  222
       int i;
 761  222
       switch ( k ) { // optimized for speed
 762   
          case -2:
 763  0
             if ( c == 0.0 ) {
 764  0
                for ( i = from - 1; ++i <= to; ) {
 765  0
                   if ( Double.isNaN( elements[i] ) ) {
 766  0
                      continue;
 767   
                   }
 768  0
                   v = elements[i];
 769  0
                   sum += 1 / ( v * v );
 770   
                }
 771   
             } else {
 772  0
                for ( i = from - 1; ++i <= to; ) {
 773  0
                   if ( Double.isNaN( elements[i] ) ) {
 774  0
                      continue;
 775   
                   }
 776  0
                   v = elements[i] - c;
 777  0
                   sum += 1 / ( v * v );
 778   
                }
 779   
             }
 780  0
             break;
 781   
          case -1:
 782  0
             if ( c == 0.0 ) {
 783  0
                for ( i = from - 1; ++i <= to; ) {
 784  0
                   if ( Double.isNaN( elements[i] ) ) {
 785  0
                      continue;
 786   
                   }
 787  0
                   sum += 1 / ( elements[i] );
 788   
                }
 789   
             } else {
 790  0
                for ( i = from - 1; ++i <= to; ) {
 791  0
                   if ( Double.isNaN( elements[i] ) ) {
 792  0
                      continue;
 793   
                   }
 794  0
                   sum += 1 / ( elements[i] - c );
 795   
                }
 796   
             }
 797  0
             break;
 798   
          case 0:
 799  0
             sum += to - from + 1;
 800  0
             break;
 801   
          case 1:
 802  159
             if ( c == 0.0 ) {
 803  159
                for ( i = from - 1; ++i <= to; ) {
 804  1853
                   if ( Double.isNaN( elements[i] ) ) {
 805  8
                      continue;
 806   
                   }
 807  1845
                   sum += elements[i];
 808   
                }
 809   
             } else {
 810  0
                for ( i = from - 1; ++i <= to; ) {
 811  0
                   if ( Double.isNaN( elements[i] ) ) {
 812  0
                      continue;
 813   
                   }
 814  0
                   sum += elements[i] - c;
 815   
                }
 816   
             }
 817  159
             break;
 818   
          case 2:
 819  62
             if ( c == 0.0 ) {
 820  62
                for ( i = from - 1; ++i <= to; ) {
 821  732
                   if ( Double.isNaN( elements[i] ) ) {
 822  2
                      continue;
 823   
                   }
 824  730
                   v = elements[i];
 825  730
                   sum += v * v;
 826   
                }
 827   
             } else {
 828  0
                for ( i = from - 1; ++i <= to; ) {
 829  0
                   if ( Double.isNaN( elements[i] ) ) {
 830  0
                      continue;
 831   
                   }
 832  0
                   v = elements[i] - c;
 833  0
                   sum += v * v;
 834   
                }
 835   
             }
 836  62
             break;
 837   
          case 3:
 838  0
             if ( c == 0.0 ) {
 839  0
                for ( i = from - 1; ++i <= to; ) {
 840  0
                   v = elements[i];
 841  0
                   sum += v * v * v;
 842   
                }
 843   
             } else {
 844  0
                for ( i = from - 1; ++i <= to; ) {
 845  0
                   if ( Double.isNaN( elements[i] ) ) {
 846  0
                      continue;
 847   
                   }
 848  0
                   v = elements[i] - c;
 849  0
                   sum += v * v * v;
 850   
                }
 851   
             }
 852  0
             break;
 853   
          case 4:
 854  1
             if ( c == 0.0 ) {
 855  0
                for ( i = from - 1; ++i <= to; ) {
 856  0
                   if ( Double.isNaN( elements[i] ) ) {
 857  0
                      continue;
 858   
                   }
 859  0
                   v = elements[i];
 860  0
                   sum += v * v * v * v;
 861   
                }
 862   
             } else {
 863  1
                for ( i = from - 1; ++i <= to; ) {
 864  6
                   if ( Double.isNaN( elements[i] ) ) {
 865  1
                      continue;
 866   
                   }
 867  5
                   v = elements[i] - c;
 868  5
                   sum += v * v * v * v;
 869   
                }
 870   
             }
 871  1
             break;
 872   
          case 5:
 873  0
             if ( c == 0.0 ) {
 874  0
                for ( i = from - 1; ++i <= to; ) {
 875  0
                   if ( Double.isNaN( elements[i] ) ) {
 876  0
                      continue;
 877   
                   }
 878  0
                   v = elements[i];
 879  0
                   sum += v * v * v * v * v;
 880   
                }
 881   
             } else {
 882  0
                for ( i = from - 1; ++i <= to; ) {
 883  0
                   if ( Double.isNaN( elements[i] ) ) {
 884  0
                      continue;
 885   
                   }
 886  0
                   v = elements[i] - c;
 887  0
                   sum += v * v * v * v * v;
 888   
                }
 889   
             }
 890  0
             break;
 891   
          default:
 892  0
             for ( i = from - 1; ++i <= to; ) {
 893  0
                if ( Double.isNaN( elements[i] ) ) {
 894  0
                   continue;
 895   
                }
 896  0
                sum += Math.pow( elements[i] - c, k );
 897   
             }
 898  0
             break;
 899   
       }
 900  222
       return sum;
 901   
    }
 902   
 
 903   
    /**
 904   
     * Return the size of the list, ignoring missing values.
 905   
     * 
 906   
     * @param list DoubleArrayList
 907   
     * @return int
 908   
     */
 909  228
    public static int sizeWithoutMissingValues( DoubleArrayList list ) {
 910   
 
 911  228
       int size = 0;
 912  228
       for ( int i = 0; i < list.size(); i++ ) {
 913  2626
          if ( !Double.isNaN( list.get( i ) ) ) {
 914  2610
             size++;
 915   
          }
 916   
       }
 917  228
       return size;
 918   
    }
 919   
 
 920   
    /**
 921   
     * Returns the trimmed mean of a sorted data sequence. Missing values are completely ignored.
 922   
     * 
 923   
     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
 924   
     * @param mean the mean of the (full) sorted data sequence.
 925   
     * @param left int the number of leading elements to trim.
 926   
     * @param right int number of trailing elements to trim.
 927   
     * @return double
 928   
     */
 929  1
    public static double trimmedMean( DoubleArrayList sortedData, double mean,
 930   
          int left, int right ) {
 931  1
       return Descriptive.trimmedMean( removeMissing( sortedData ), mean, left,
 932   
             right );
 933   
    }
 934   
 
 935   
    /**
 936   
     * Provided for convenience!
 937   
     * 
 938   
     * @param data DoubleArrayList
 939   
     * @return double
 940   
     */
 941  0
    public static double variance( DoubleArrayList data ) {
 942  0
       return variance( sizeWithoutMissingValues( data ), sum( data ),
 943   
             sumOfSquares( data ) );
 944   
    }
 945   
 
 946   
    /**
 947   
     * Returns the weighted mean of a data sequence. That is <tt> Sum (data[i] *
 948   
     * weights[i]) / Sum ( weights[i] )</tt>.
 949   
     * 
 950   
     * @param data DoubleArrayList
 951   
     * @param weights DoubleArrayList
 952   
     * @return double
 953   
     */
 954  0
    public static double weightedMean( DoubleArrayList data,
 955   
          DoubleArrayList weights ) {
 956  0
       int size = data.size();
 957  0
       if ( size != weights.size() || size == 0 ) {
 958  0
          throw new IllegalArgumentException();
 959   
       }
 960   
 
 961  0
       double[] elements = data.elements();
 962  0
       double[] theWeights = weights.elements();
 963  0
       double sum = 0.0;
 964  0
       double weightsSum = 0.0;
 965  0
       for ( int i = size; --i >= 0; ) {
 966  0
          double w = theWeights[i];
 967  0
          if ( Double.isNaN( elements[i] ) ) {
 968  0
             continue;
 969   
          }
 970  0
          sum += elements[i] * w;
 971  0
          weightsSum += w;
 972   
       }
 973   
 
 974  0
       return sum / weightsSum;
 975   
    }
 976   
 
 977   
    /**
 978   
     * <b>Not supported. </b>
 979   
     * 
 980   
     * @param sortedData DoubleArrayList
 981   
     * @param mean double
 982   
     * @param left int
 983   
     * @param right int
 984   
     * @return double
 985   
     */
 986  0
    public static double winsorizedMean( DoubleArrayList sortedData,
 987   
          double mean, int left, int right ) {
 988  0
       throw new UnsupportedOperationException(
 989   
             "winsorizedMean not supported with missing values" );
 990   
    }
 991   
 
 992   
    /* private methods */
 993   
 
 994   
    /**
 995   
     * Convenience function for internal use. Makes a copy of the list that doesn't have the missing values.
 996   
     * 
 997   
     * @param data DoubleArrayList
 998   
     * @return DoubleArrayList
 999   
     */
 1000  63
    private static DoubleArrayList removeMissing( DoubleArrayList data ) {
 1001  63
       DoubleArrayList r = new DoubleArrayList( sizeWithoutMissingValues( data ) );
 1002  63
       double[] elements = data.elements();
 1003  63
       int size = data.size();
 1004  63
       for ( int i = 0; i < size; i++ ) {
 1005  738
          if ( Double.isNaN( elements[i] ) ) {
 1006  3
             continue;
 1007   
          }
 1008  735
          r.add( elements[i] );
 1009   
       }
 1010  63
       return r;
 1011   
    }
 1012   
 
 1013   
 } // end of class
 1014