Clover coverage report - baseCode - 0.2.5
Coverage timestamp: Tue Apr 12 2005 11:31:58 EDT
file stats: LOC: 161   Methods: 4
NCLOC: 109   Classes: 1
30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover
 
 Source file Conditionals Statements Methods TOTAL
RowMissingFilter.java 100% 100% 100% 100%
coverage
 1   
 package baseCode.dataFilter;
 2   
 
 3   
 import java.util.Vector;
 4   
 
 5   
 import baseCode.dataStructure.matrix.NamedMatrix;
 6   
 import cern.colt.list.IntArrayList;
 7   
 
 8   
 /**
 9   
  * Remove rows from a matrix that are missing too many points.
 10   
  * <p>
 11   
  * Copyright (c) 2004 Columbia University
 12   
  * </p>
 13   
  * 
 14   
  * @author Paul Pavlidis
 15   
  * @version $Id: RowMissingFilter.java,v 1.4 2004/07/27 03:18:58 pavlidis Exp $
 16   
  */
 17   
 public class RowMissingFilter extends AbstractFilter implements Filter {
 18   
 
 19   
    private int minPresentCount = 5;
 20   
    private static final int ABSOLUTEMINPRESENT = 1;
 21   
    private double maxFractionRemoved = 0.0;
 22   
    private double minPresentFraction = 1.0;
 23   
    private boolean maxFractionRemovedIsSet = false;
 24   
    private boolean minPresentFractionIsSet = false;
 25   
    private boolean minPresentIsSet = false;
 26   
 
 27   
    /**
 28   
     * Set the minimum number of values that must be present in each row. The default value is 5. This is always
 29   
     * overridden by a hard-coded value (currently 2) that must be present for a row to be kept; but this value is in
 30   
     * turn overridden by the maxfractionRemoved.
 31   
     * 
 32   
     * @param m int
 33   
     */
 34  8
    public void setMinPresentCount( int m ) {
 35  8
       if ( m < 0 ) {
 36  1
          throw new IllegalArgumentException(
 37   
                "Minimum present count must be > 0." );
 38   
       }
 39  7
       minPresentIsSet = true;
 40  7
       minPresentCount = m;
 41   
    }
 42   
 
 43   
    /**
 44   
     * @param k double the fraction of values to be removed.
 45   
     */
 46  2
    public void setMinPresentFraction( double k ) {
 47  2
       if ( k < 0.0 || k > 1.0 )
 48  1
             throw new IllegalArgumentException(
 49   
                   "Min present fraction must be between 0 and 1, got " + k );
 50  1
       minPresentFractionIsSet = true;
 51  1
       minPresentFraction = k;
 52   
    }
 53   
 
 54   
    /**
 55   
     * Set the maximum fraction of rows which will be removed from the data set. The default value is 0.3 Set it to 1.0
 56   
     * to remove this restriction.
 57   
     * 
 58   
     * @param f double
 59   
     */
 60  2
    public void setMaxFractionRemoved( double f ) {
 61  2
       if ( f < 0.0 || f > 1.0 )
 62  1
             throw new IllegalArgumentException(
 63   
                   "Max fraction removed must be between 0 and 1, got " + f );
 64  1
       maxFractionRemovedIsSet = true;
 65  1
       maxFractionRemoved = f;
 66   
    }
 67   
 
 68  8
    public NamedMatrix filter( NamedMatrix data ) {
 69  8
       Vector MTemp = new Vector();
 70  8
       Vector rowNames = new Vector();
 71  8
       int numRows = data.rows();
 72  8
       int numCols = data.columns();
 73  8
       IntArrayList present = new IntArrayList( numRows );
 74   
 
 75  8
       int kept = 0;
 76   
 
 77  8
       if ( minPresentFractionIsSet ) {
 78  1
          setMinPresentCount( ( int ) Math.ceil( minPresentFraction * numCols ) );
 79   
       }
 80   
 
 81  8
       if ( minPresentCount > numCols ) {
 82  1
          throw new IllegalStateException( "Minimum present count is set to "
 83   
                + minPresentCount + " but there are only " + numCols
 84   
                + " columns in the matrix." );
 85   
       }
 86   
 
 87  7
       if ( !minPresentIsSet ) {
 88  1
          log.info( "No filtering was requested" );
 89  1
          return data;
 90   
       }
 91   
 
 92   
       /* first pass - determine how many missing values there are per row */
 93  6
       for ( int i = 0; i < numRows; i++ ) {
 94  180
          int missingCount = 0;
 95  180
          for ( int j = 0; j < numCols; j++ ) {
 96  2160
             if ( !data.isMissing( i, j ) ) {
 97  2085
                missingCount++;
 98   
             }
 99   
          }
 100  180
          present.add( missingCount );
 101  180
          if ( missingCount >= ABSOLUTEMINPRESENT
 102   
                && missingCount >= minPresentCount ) {
 103  143
             kept++;
 104  143
             MTemp.add( data.getRowObj( i ) );
 105   
          }
 106   
       }
 107   
 
 108   
       /* decide whether we need to invoke the 'too many removed' clause */
 109  6
       if ( kept < numRows * ( 1.0 - maxFractionRemoved )
 110   
             && maxFractionRemoved != 0.0 ) {
 111  1
          IntArrayList sortedPresent = new IntArrayList( numRows );
 112  1
          sortedPresent = present.copy();
 113  1
          sortedPresent.sort();
 114  1
          sortedPresent.reverse();
 115   
 
 116  1
          log
 117   
                .info( "There are "
 118   
                      + kept
 119   
                      + " rows that meet criterion of at least "
 120   
                      + minPresentCount
 121   
                      + " non-missing values, but that's too many given the max fraction of "
 122   
                      + maxFractionRemoved
 123   
                      + "; minpresent adjusted to "
 124   
                      + sortedPresent
 125   
                            .get( ( int ) ( numRows * ( maxFractionRemoved ) ) ) );
 126  1
          minPresentCount = sortedPresent
 127   
                .get( ( int ) ( numRows * ( maxFractionRemoved ) ) );
 128   
 
 129   
          // Do another pass to add rows we missed before.
 130  1
          kept = 0;
 131  1
          MTemp.clear();
 132  1
          for ( int i = 0; i < numRows; i++ ) {
 133  30
             if ( present.get( i ) >= minPresentCount
 134   
                   && present.get( i ) >= ABSOLUTEMINPRESENT ) {
 135  21
                kept++;
 136  21
                MTemp.add( data.getRowObj( i ) );
 137   
             }
 138   
          }
 139   
 
 140   
       }
 141   
 
 142  6
       NamedMatrix returnval = getOutputMatrix( data, MTemp.size(), numCols );
 143   
 
 144   
       // Finally fill in the return value.
 145  6
       for ( int i = 0; i < MTemp.size(); i++ ) {
 146  143
          for ( int j = 0; j < numCols; j++ ) {
 147  1716
             returnval.set( i, j, ( ( Object[] ) MTemp.get( i ) )[j] );
 148   
          }
 149   
       }
 150  6
       returnval.setColumnNames( data.getColNames() );
 151  6
       returnval.setRowNames( rowNames );
 152   
 
 153  6
       log.info( "There are " + kept
 154   
             + " rows after removing rows which have fewer than "
 155   
             + minPresentCount + " values (or fewer than " + ABSOLUTEMINPRESENT
 156   
             + ")" );
 157   
 
 158  6
       return ( returnval );
 159   
 
 160   
    }
 161   
 }