|
|||||||||||||||||||
| 30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover | |||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| RowMissingFilter.java | 100% | 100% | 100% | 100% |
|
||||||||||||||
| 1 |
package baseCode.dataFilter;
|
|
| 2 |
|
|
| 3 |
import java.util.Vector;
|
|
| 4 |
|
|
| 5 |
import baseCode.dataStructure.matrix.NamedMatrix;
|
|
| 6 |
import cern.colt.list.IntArrayList;
|
|
| 7 |
|
|
| 8 |
/**
|
|
| 9 |
* Remove rows from a matrix that are missing too many points.
|
|
| 10 |
* <p>
|
|
| 11 |
* Copyright (c) 2004 Columbia University
|
|
| 12 |
* </p>
|
|
| 13 |
*
|
|
| 14 |
* @author Paul Pavlidis
|
|
| 15 |
* @version $Id: RowMissingFilter.java,v 1.4 2004/07/27 03:18:58 pavlidis Exp $
|
|
| 16 |
*/
|
|
| 17 |
public class RowMissingFilter extends AbstractFilter implements Filter { |
|
| 18 |
|
|
| 19 |
private int minPresentCount = 5; |
|
| 20 |
private static final int ABSOLUTEMINPRESENT = 1; |
|
| 21 |
private double maxFractionRemoved = 0.0; |
|
| 22 |
private double minPresentFraction = 1.0; |
|
| 23 |
private boolean maxFractionRemovedIsSet = false; |
|
| 24 |
private boolean minPresentFractionIsSet = false; |
|
| 25 |
private boolean minPresentIsSet = false; |
|
| 26 |
|
|
| 27 |
/**
|
|
| 28 |
* Set the minimum number of values that must be present in each row. The default value is 5. This is always
|
|
| 29 |
* overridden by a hard-coded value (currently 2) that must be present for a row to be kept; but this value is in
|
|
| 30 |
* turn overridden by the maxfractionRemoved.
|
|
| 31 |
*
|
|
| 32 |
* @param m int
|
|
| 33 |
*/
|
|
| 34 | 8 |
public void setMinPresentCount( int m ) { |
| 35 | 8 |
if ( m < 0 ) {
|
| 36 | 1 |
throw new IllegalArgumentException( |
| 37 |
"Minimum present count must be > 0." );
|
|
| 38 |
} |
|
| 39 | 7 |
minPresentIsSet = true;
|
| 40 | 7 |
minPresentCount = m; |
| 41 |
} |
|
| 42 |
|
|
| 43 |
/**
|
|
| 44 |
* @param k double the fraction of values to be removed.
|
|
| 45 |
*/
|
|
| 46 | 2 |
public void setMinPresentFraction( double k ) { |
| 47 | 2 |
if ( k < 0.0 || k > 1.0 )
|
| 48 | 1 |
throw new IllegalArgumentException( |
| 49 |
"Min present fraction must be between 0 and 1, got " + k );
|
|
| 50 | 1 |
minPresentFractionIsSet = true;
|
| 51 | 1 |
minPresentFraction = k; |
| 52 |
} |
|
| 53 |
|
|
| 54 |
/**
|
|
| 55 |
* Set the maximum fraction of rows which will be removed from the data set. The default value is 0.3 Set it to 1.0
|
|
| 56 |
* to remove this restriction.
|
|
| 57 |
*
|
|
| 58 |
* @param f double
|
|
| 59 |
*/
|
|
| 60 | 2 |
public void setMaxFractionRemoved( double f ) { |
| 61 | 2 |
if ( f < 0.0 || f > 1.0 )
|
| 62 | 1 |
throw new IllegalArgumentException( |
| 63 |
"Max fraction removed must be between 0 and 1, got " + f );
|
|
| 64 | 1 |
maxFractionRemovedIsSet = true;
|
| 65 | 1 |
maxFractionRemoved = f; |
| 66 |
} |
|
| 67 |
|
|
| 68 | 8 |
public NamedMatrix filter( NamedMatrix data ) {
|
| 69 | 8 |
Vector MTemp = new Vector();
|
| 70 | 8 |
Vector rowNames = new Vector();
|
| 71 | 8 |
int numRows = data.rows();
|
| 72 | 8 |
int numCols = data.columns();
|
| 73 | 8 |
IntArrayList present = new IntArrayList( numRows );
|
| 74 |
|
|
| 75 | 8 |
int kept = 0;
|
| 76 |
|
|
| 77 | 8 |
if ( minPresentFractionIsSet ) {
|
| 78 | 1 |
setMinPresentCount( ( int ) Math.ceil( minPresentFraction * numCols ) );
|
| 79 |
} |
|
| 80 |
|
|
| 81 | 8 |
if ( minPresentCount > numCols ) {
|
| 82 | 1 |
throw new IllegalStateException( "Minimum present count is set to " |
| 83 |
+ minPresentCount + " but there are only " + numCols
|
|
| 84 |
+ " columns in the matrix." );
|
|
| 85 |
} |
|
| 86 |
|
|
| 87 | 7 |
if ( !minPresentIsSet ) {
|
| 88 | 1 |
log.info( "No filtering was requested" );
|
| 89 | 1 |
return data;
|
| 90 |
} |
|
| 91 |
|
|
| 92 |
/* first pass - determine how many missing values there are per row */
|
|
| 93 | 6 |
for ( int i = 0; i < numRows; i++ ) { |
| 94 | 180 |
int missingCount = 0;
|
| 95 | 180 |
for ( int j = 0; j < numCols; j++ ) { |
| 96 | 2160 |
if ( !data.isMissing( i, j ) ) {
|
| 97 | 2085 |
missingCount++; |
| 98 |
} |
|
| 99 |
} |
|
| 100 | 180 |
present.add( missingCount ); |
| 101 | 180 |
if ( missingCount >= ABSOLUTEMINPRESENT
|
| 102 |
&& missingCount >= minPresentCount ) {
|
|
| 103 | 143 |
kept++; |
| 104 | 143 |
MTemp.add( data.getRowObj( i ) ); |
| 105 |
} |
|
| 106 |
} |
|
| 107 |
|
|
| 108 |
/* decide whether we need to invoke the 'too many removed' clause */
|
|
| 109 | 6 |
if ( kept < numRows * ( 1.0 - maxFractionRemoved )
|
| 110 |
&& maxFractionRemoved != 0.0 ) {
|
|
| 111 | 1 |
IntArrayList sortedPresent = new IntArrayList( numRows );
|
| 112 | 1 |
sortedPresent = present.copy(); |
| 113 | 1 |
sortedPresent.sort(); |
| 114 | 1 |
sortedPresent.reverse(); |
| 115 |
|
|
| 116 | 1 |
log |
| 117 |
.info( "There are "
|
|
| 118 |
+ kept |
|
| 119 |
+ " rows that meet criterion of at least "
|
|
| 120 |
+ minPresentCount |
|
| 121 |
+ " non-missing values, but that's too many given the max fraction of "
|
|
| 122 |
+ maxFractionRemoved |
|
| 123 |
+ "; minpresent adjusted to "
|
|
| 124 |
+ sortedPresent |
|
| 125 |
.get( ( int ) ( numRows * ( maxFractionRemoved ) ) ) );
|
|
| 126 | 1 |
minPresentCount = sortedPresent |
| 127 |
.get( ( int ) ( numRows * ( maxFractionRemoved ) ) );
|
|
| 128 |
|
|
| 129 |
// Do another pass to add rows we missed before.
|
|
| 130 | 1 |
kept = 0; |
| 131 | 1 |
MTemp.clear(); |
| 132 | 1 |
for ( int i = 0; i < numRows; i++ ) { |
| 133 | 30 |
if ( present.get( i ) >= minPresentCount
|
| 134 |
&& present.get( i ) >= ABSOLUTEMINPRESENT ) {
|
|
| 135 | 21 |
kept++; |
| 136 | 21 |
MTemp.add( data.getRowObj( i ) ); |
| 137 |
} |
|
| 138 |
} |
|
| 139 |
|
|
| 140 |
} |
|
| 141 |
|
|
| 142 | 6 |
NamedMatrix returnval = getOutputMatrix( data, MTemp.size(), numCols ); |
| 143 |
|
|
| 144 |
// Finally fill in the return value.
|
|
| 145 | 6 |
for ( int i = 0; i < MTemp.size(); i++ ) { |
| 146 | 143 |
for ( int j = 0; j < numCols; j++ ) { |
| 147 | 1716 |
returnval.set( i, j, ( ( Object[] ) MTemp.get( i ) )[j] ); |
| 148 |
} |
|
| 149 |
} |
|
| 150 | 6 |
returnval.setColumnNames( data.getColNames() ); |
| 151 | 6 |
returnval.setRowNames( rowNames ); |
| 152 |
|
|
| 153 | 6 |
log.info( "There are " + kept
|
| 154 |
+ " rows after removing rows which have fewer than "
|
|
| 155 |
+ minPresentCount + " values (or fewer than " + ABSOLUTEMINPRESENT
|
|
| 156 |
+ ")" );
|
|
| 157 |
|
|
| 158 | 6 |
return ( returnval );
|
| 159 |
|
|
| 160 |
} |
|
| 161 |
} |
|
||||||||||