Clover coverage report - baseCode - 0.2.5
Coverage timestamp: Tue Apr 12 2005 11:31:58 EDT
file stats: LOC: 257   Methods: 3
NCLOC: 147   Classes: 1
30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover
 
 Source file Conditionals Statements Methods TOTAL
RowLevelFilter.java 87.5% 88.9% 100% 88.8%
coverage coverage
 1   
 package baseCode.dataFilter;
 2   
 
 3   
 import java.util.Vector;
 4   
 
 5   
 import baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed;
 6   
 import baseCode.dataStructure.matrix.NamedMatrix;
 7   
 import baseCode.math.DescriptiveWithMissing;
 8   
 import baseCode.math.Stats;
 9   
 import cern.colt.list.DoubleArrayList;
 10   
 import cern.jet.stat.Descriptive;
 11   
 
 12   
 /**
 13   
  * Remove rows from a matrix based on some row-based statistic. Rows with values too high and/or too low can be removed.
 14   
  * Thresholds are inclusive (i.e., values must be at least as high as the set threshold to be included. A number of
 15   
  * statistics are available. In addition, this filter can remove rows that have all negative data values.
 16   
  * <p>
 17   
  * There are a number of decisions/caveats to consider:
 18   
  * <h2>Cutpoint determination</h2>
 19   
  * <p>
 20   
  * There are multiple ways of determining cutpoints. Some possibilities are the maximum value, the minimum value, the
 21   
  * mean value, or the median value. The range and coefficient of variation are also included.
 22   
  * <p>
 23   
  * Note that if you want to use different methods for high-level filtering than for low-level filtering (e.g., using max
 24   
  * for the low-level, and min for the high-level, you have to filter twice. This could cause problems if you are using
 25   
  * fractional filtering and there are negative values (see below).
 26   
  * <h2>Filtering ratiometric data</h2>
 27   
  * <p>
 28   
  * For data that are normalized or ratios, it does not make sense to use this method on the raw data. In that situation,
 29   
  * you should filter the data based on the raw data, and then use a {@link RowNameFilter}to select the rows from the
 30   
  * ratio data.
 31   
  * <h2>Negative values</h2>
 32   
  * <p>
 33   
  * For microarray expression data based on the Affymetrix MAS4.0 protocol (and possibly others), negative values can
 34   
  * occur. In some cases all the values can be negative. As these values are generally viewed as nonsensical, one might
 35   
  * decide that data rows that are all negative should be filtered.
 36   
  * <h2>Behavior at extremes</h2>
 37   
  * <p>
 38   
  * If you request removal/inclusion of 1.0 of the data, you might not get the result you expect because the filtering is
 39   
  * inclusive.
 40   
  * <hr>
 41   
  * <p>
 42   
  * Copyright (c) 2004 Columbia University
 43   
  * <p>
 44   
  * 
 45   
  * @author Paul Pavlidis
 46   
  * @version $Id: RowLevelFilter.java,v 1.7 2004/07/27 03:18:58 pavlidis Exp $
 47   
  */
 48   
 public class RowLevelFilter extends AbstractLevelFilter {
 49   
 
 50   
    private boolean removeAllNegative = false;
 51   
 
 52   
    /**
 53   
     * Use the minimum of the row as the criterion.
 54   
     */
 55   
    public static final int MIN = 1;
 56   
 
 57   
    /**
 58   
     * Use the maximum of the row as the criterion.
 59   
     */
 60   
    public static final int MAX = 2;
 61   
 
 62   
    /**
 63   
     * Use the median as the criterion.
 64   
     */
 65   
    public static final int MEDIAN = 3;
 66   
 
 67   
    /**
 68   
     * Use the mean as the criterion.
 69   
     */
 70   
    public static final int MEAN = 4;
 71   
 
 72   
    /**
 73   
     * Use the range as the criterion
 74   
     */
 75   
    public static final int RANGE = 5;
 76   
 
 77   
    /**
 78   
     * Use the coefficient of variation as the criterion
 79   
     */
 80   
    public static final int CV = 6;
 81   
 
 82   
    private int method = MAX;
 83   
 
 84   
    /**
 85   
     * Choose the method that will be used for filtering. Default is 'MAX'. Those rows with the lowest values are removed
 86   
     * during 'low' filtering.
 87   
     * 
 88   
     * @param method one of the filtering method constants.
 89   
     */
 90  9
    public void setMethod( int method ) {
 91  9
       if ( method != MIN && method != MAX && method != MEDIAN && method != MEAN
 92   
             && method != RANGE && method != CV ) {
 93  0
          throw new IllegalArgumentException(
 94   
                "Unknown filtering method requested" );
 95   
       }
 96  9
       this.method = method;
 97   
    }
 98   
 
 99   
    /**
 100   
     * Set the filter to remove all rows that have only negative values. This is applied BEFORE applying fraction-based
 101   
     * criteria. In other words, if you request filtering 0.5 of the values, and 0.5 have all negative values, you will
 102   
     * get 0.25 of the data back. Default = false.
 103   
     * 
 104   
     * @param t boolean
 105   
     */
 106  21
    public void setRemoveAllNegative( boolean t ) {
 107  21
       log.info( "Rows with all negative values will be "
 108   
             + "removed PRIOR TO applying fraction-based criteria." );
 109  21
       removeAllNegative = t;
 110   
    }
 111   
 
 112   
    /**
 113   
     * @param data
 114   
     * @return
 115   
     */
 116  22
    public NamedMatrix filter( NamedMatrix data ) {
 117   
 
 118  22
       if ( !( data instanceof DenseDoubleMatrix2DNamed ) ) {
 119  0
          throw new IllegalArgumentException(
 120   
                "Only valid for DenseDoubleMatrix2DNamed" );
 121   
       }
 122   
 
 123  22
       if ( lowCut == -Double.MAX_VALUE && highCut == Double.MAX_VALUE ) {
 124  1
          log.info( "No filtering requested" );
 125  1
          return data;
 126   
       }
 127   
 
 128  21
       int numRows = data.rows();
 129  21
       int numCols = data.columns();
 130   
 
 131  21
       DoubleArrayList criteria = new DoubleArrayList( new double[numRows] );
 132   
 
 133   
       /*
 134   
        * compute criteria.
 135   
        */
 136  21
       DoubleArrayList rowAsList = new DoubleArrayList( new double[numCols] );
 137  21
       int numAllNeg = 0;
 138  21
       for ( int i = 0; i < numRows; i++ ) {
 139  630
          Double[] row = ( Double[] ) data.getRowObj( i );
 140  630
          int numNeg = 0;
 141   
          /* stupid, copy into a DoubleArrayList so we can do stats */
 142  630
          for ( int j = 0; j < numCols; j++ ) {
 143  7560
             double item = row[j].doubleValue();
 144  7560
             rowAsList.set( j, item );
 145  7560
             if ( item < 0.0 || Double.isNaN( item ) ) {
 146  1617
                numNeg++;
 147   
             }
 148   
          }
 149  630
          if ( numNeg == numCols ) {
 150  63
             numAllNeg++;
 151   
          }
 152   
 
 153  630
          switch ( method ) {
 154   
             case MIN: {
 155  60
                criteria.set( i, Descriptive.min( rowAsList ) );
 156  60
                break;
 157   
             }
 158   
             case MAX: {
 159  450
                criteria.set( i, Descriptive.max( rowAsList ) );
 160  450
                break;
 161   
             }
 162   
             case MEAN: {
 163  60
                criteria.set( i, DescriptiveWithMissing.mean( rowAsList ) );
 164  60
                break;
 165   
             }
 166   
             case MEDIAN: {
 167  60
                criteria.set( i, DescriptiveWithMissing.median( rowAsList ) );
 168  60
                break;
 169   
             }
 170   
             case RANGE: {
 171  0
                criteria.set( i, Stats.range( rowAsList ) );
 172  0
                break;
 173   
             }
 174   
             case CV: {
 175  0
                criteria.set( i, Stats.cv( rowAsList ) );
 176  0
                break;
 177   
             }
 178   
             default: {
 179  0
                break;
 180   
             }
 181   
          }
 182   
       }
 183   
 
 184  21
       DoubleArrayList sortedCriteria = criteria.copy();
 185  21
       sortedCriteria.sort();
 186   
 
 187  21
       double realLowCut = -Double.MAX_VALUE;
 188  21
       double realHighCut = Double.MAX_VALUE;
 189  21
       int consideredRows = numRows;
 190  21
       int startIndex = 0;
 191  21
       if ( removeAllNegative ) {
 192  16
          consideredRows = numRows - numAllNeg;
 193  16
          startIndex = numAllNeg;
 194   
       }
 195   
 
 196  21
       if ( useHighAsFraction ) {
 197  4
          if ( !Stats.isValidFraction( highCut ) ) {
 198  0
             throw new IllegalStateException(
 199   
                   "High level cut must be a fraction between 0 and 1" );
 200   
          }
 201  4
          int thresholdIndex = 0;
 202  4
          thresholdIndex = ( int ) Math
 203   
                .ceil( consideredRows * ( 1.0 - highCut ) ) - 1;
 204   
 
 205  4
          thresholdIndex = Math.max( 0, thresholdIndex );
 206  4
          realHighCut = sortedCriteria.get( thresholdIndex );
 207   
       } else {
 208  17
          realHighCut = highCut;
 209   
       }
 210   
 
 211  21
       if ( useLowAsFraction ) {
 212  6
          if ( !Stats.isValidFraction( lowCut ) ) {
 213  0
             throw new IllegalStateException(
 214   
                   "Low level cut must be a fraction between 0 and 1" );
 215   
          }
 216   
 
 217  6
          int thresholdIndex = 0;
 218  6
          thresholdIndex = startIndex
 219   
                + ( int ) Math.floor( consideredRows * lowCut );
 220  6
          thresholdIndex = Math.min( numRows - 1, thresholdIndex );
 221  6
          realLowCut = sortedCriteria.get( thresholdIndex );
 222   
       } else {
 223  15
          realLowCut = lowCut;
 224   
       }
 225   
 
 226   
       // go back over the data now using the cutpoints. This is not optimally
 227   
       // efficient.
 228  21
       int kept = 0;
 229  21
       Vector rowsToKeep = new Vector();
 230  21
       Vector rowNames = new Vector();
 231   
 
 232  21
       for ( int i = 0; i < numRows; i++ ) {
 233  630
          if ( criteria.get( i ) >= realLowCut
 234   
                && criteria.get( i ) <= realHighCut ) {
 235  363
             kept++;
 236  363
             rowsToKeep.add( data.getRowObj( i ) );
 237  363
             rowNames.add( data.getRowName( i ) );
 238   
          }
 239   
       }
 240   
 
 241  21
       DenseDoubleMatrix2DNamed returnval = new DenseDoubleMatrix2DNamed(
 242   
             rowsToKeep.size(), numCols );
 243  21
       for ( int i = 0; i < kept; i++ ) {
 244  363
          Double[] row = ( Double[] ) rowsToKeep.get( i );
 245  363
          for ( int j = 0; j < numCols; j++ ) {
 246  4356
             returnval.set( i, j, row[j].doubleValue() );
 247   
          }
 248   
       }
 249  21
       returnval.setColumnNames( data.getColNames() );
 250  21
       returnval.setRowNames( rowNames );
 251   
 
 252  21
       log.info( "There are " + kept + " rows left after filtering." );
 253   
 
 254  21
       return ( returnval );
 255   
 
 256   
    }
 257   
 }