|
|||||||||||||||||||
| 30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover | |||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| RowAbsentFilter.java | 81% | 81.2% | 100% | 82.1% |
|
||||||||||||||
| 1 |
package baseCode.dataFilter;
|
|
| 2 |
|
|
| 3 |
import java.util.Vector;
|
|
| 4 |
|
|
| 5 |
import baseCode.dataStructure.matrix.NamedMatrix;
|
|
| 6 |
import baseCode.dataStructure.matrix.StringMatrix2DNamed;
|
|
| 7 |
|
|
| 8 |
/**
|
|
| 9 |
* Filter a data matrix according to flags given in a separate matrix.
|
|
| 10 |
* <p>
|
|
| 11 |
* The flags can be 'A', 'P' or 'M', for absent, present and marginal, following the Affymetrix convention. By default,
|
|
| 12 |
* Marginal flags are counted as "absent", but this can be changed by the user.
|
|
| 13 |
* <p>
|
|
| 14 |
* Copyright (c) 2004
|
|
| 15 |
* </p>
|
|
| 16 |
* <p>
|
|
| 17 |
* Institution:: Columbia University
|
|
| 18 |
* </p>
|
|
| 19 |
*
|
|
| 20 |
* @author Paul Pavlidis
|
|
| 21 |
* @version $Id: RowAbsentFilter.java,v 1.5 2004/07/27 03:18:58 pavlidis Exp $
|
|
| 22 |
*/
|
|
| 23 |
public class RowAbsentFilter extends AbstractFilter implements Filter { |
|
| 24 |
|
|
| 25 |
private StringMatrix2DNamed flags = null; |
|
| 26 |
|
|
| 27 |
private double minPresentFraction = 0.0; |
|
| 28 |
private int minPresentCount = 0; |
|
| 29 |
private boolean keepMarginal = false; |
|
| 30 |
private boolean fractionIsSet = false; |
|
| 31 |
private boolean countIsSet = false; |
|
| 32 |
private boolean flagsSet = false; |
|
| 33 |
|
|
| 34 |
/**
|
|
| 35 |
* @param f the matrix containing the flags.
|
|
| 36 |
*/
|
|
| 37 | 9 |
public void setFlagMatrix( StringMatrix2DNamed f ) { |
| 38 | 9 |
if ( f == null ) { |
| 39 | 1 |
throw new IllegalArgumentException( "Flag matrix is null" ); |
| 40 |
} |
|
| 41 | 8 |
flags = f; |
| 42 | 8 |
flagsSet = true;
|
| 43 |
} |
|
| 44 |
|
|
| 45 |
/**
|
|
| 46 |
* @param k the minimum fraction of present values that there must be, in order to keep the row.
|
|
| 47 |
*/
|
|
| 48 | 2 |
public void setMinPresentFraction( double k ) { |
| 49 | 2 |
if ( k < 0.0 || k > 1.0 )
|
| 50 | 1 |
throw new IllegalArgumentException( |
| 51 |
"Min present fraction must be between 0 and 1, got " + k );
|
|
| 52 | 1 |
minPresentFraction = k; |
| 53 | 1 |
fractionIsSet = true;
|
| 54 |
} |
|
| 55 |
|
|
| 56 |
/**
|
|
| 57 |
* @param k the minimum number of present values there must be in order to keep the row.
|
|
| 58 |
*/
|
|
| 59 | 6 |
public void setMinPresentCount( int k ) { |
| 60 | 6 |
if ( k < 0 ) {
|
| 61 | 0 |
throw new IllegalArgumentException( |
| 62 |
"Minimum present count must be > 0." );
|
|
| 63 |
} |
|
| 64 | 6 |
minPresentCount = k; |
| 65 | 6 |
countIsSet = true;
|
| 66 |
} |
|
| 67 |
|
|
| 68 |
/**
|
|
| 69 |
* @param k whether to count 'marginal' as 'present'. Default is false.
|
|
| 70 |
*/
|
|
| 71 | 1 |
public void setKeepMarginal( boolean k ) { |
| 72 | 1 |
keepMarginal = k; |
| 73 |
} |
|
| 74 |
|
|
| 75 |
/**
|
|
| 76 |
* The data is going to be filtered in accordance to strings in 'flags'. These are either 'A', 'P' or 'M' for absent,
|
|
| 77 |
* present and marginal.
|
|
| 78 |
*
|
|
| 79 |
* @param data The input matrix
|
|
| 80 |
* @return Matrix after filtering.
|
|
| 81 |
*/
|
|
| 82 | 7 |
public NamedMatrix filter( NamedMatrix data ) {
|
| 83 |
|
|
| 84 | 7 |
int numRows = data.rows();
|
| 85 | 7 |
int numCols = data.columns();
|
| 86 |
|
|
| 87 | 7 |
if ( minPresentCount > numCols ) {
|
| 88 | 1 |
throw new IllegalStateException( "Minimum present count is set to " |
| 89 |
+ minPresentCount + " but there are only " + numCols
|
|
| 90 |
+ " columns in the matrix." );
|
|
| 91 |
} |
|
| 92 |
|
|
| 93 | 6 |
if ( flags == null ) { |
| 94 | 0 |
throw new IllegalStateException( "Flag matrix is null" ); |
| 95 |
} |
|
| 96 |
|
|
| 97 |
// no filtering requested.
|
|
| 98 | 6 |
if ( !fractionIsSet && !countIsSet ) {
|
| 99 | 0 |
log.info( "No filtering was requested" );
|
| 100 | 0 |
return data;
|
| 101 |
} |
|
| 102 |
|
|
| 103 | 6 |
if ( !flagsSet ) {
|
| 104 | 0 |
log.info( "No flag matrix was provided." );
|
| 105 | 0 |
return data;
|
| 106 |
} |
|
| 107 |
|
|
| 108 | 6 |
validateFlags( data ); |
| 109 |
|
|
| 110 |
// nothing will happen.
|
|
| 111 | 6 |
if ( minPresentFraction == 0.0 && minPresentCount == 0 ) {
|
| 112 | 0 |
log |
| 113 |
.info( "Criteria are set too low to result in any changes to the input." );
|
|
| 114 | 0 |
return data;
|
| 115 |
} |
|
| 116 |
|
|
| 117 | 6 |
Vector MTemp = new Vector();
|
| 118 | 6 |
Vector rowNames = new Vector();
|
| 119 |
|
|
| 120 | 6 |
int kept = 0;
|
| 121 | 6 |
for ( int i = 0; i < numRows; i++ ) { |
| 122 | 180 |
String rowName = data.getRowName( i ); |
| 123 |
|
|
| 124 | 180 |
if ( !flags.containsRowName( rowName ) ) {
|
| 125 | 0 |
log.debug( "Row " + rowName + " not found in flags, skipping." ); |
| 126 | 0 |
continue;
|
| 127 |
} |
|
| 128 |
|
|
| 129 | 180 |
int numPresent = 0;
|
| 130 | 180 |
for ( int j = 0; j < numCols; j++ ) { |
| 131 | 2160 |
String colName = data.getColName( j ); |
| 132 |
|
|
| 133 | 2160 |
if ( !flags.containsColumnName( colName ) ) {
|
| 134 | 0 |
log.debug( "Column " + colName
|
| 135 |
+ " not found in flags, skipping." );
|
|
| 136 | 0 |
continue;
|
| 137 |
} |
|
| 138 |
|
|
| 139 |
// count missing values in the data as "absent", whatever the
|
|
| 140 |
// flag really is.
|
|
| 141 | 2160 |
if ( data.isMissing( i, j ) ) {
|
| 142 |
// log.debug( "Found missing data, counting as absent." );
|
|
| 143 | 30 |
continue;
|
| 144 |
} |
|
| 145 |
|
|
| 146 | 2130 |
String flag = ( String ) flags.get( flags |
| 147 |
.getRowIndexByName( rowName ), flags |
|
| 148 |
.getColIndexByName( colName ) ); |
|
| 149 |
|
|
| 150 | 2130 |
if ( flags.isMissing( flags.getRowIndexByName( rowName ), flags
|
| 151 |
.getColIndexByName( colName ) ) ) {
|
|
| 152 | 6 |
log |
| 153 |
.warn( "Flags had no value for an item, counting as present." );
|
|
| 154 | 2124 |
} else if ( flag.equals( "A" ) ) { |
| 155 | 332 |
continue;
|
| 156 | 1792 |
} else if ( flag.equals( "M" ) && !keepMarginal ) { |
| 157 | 20 |
continue;
|
| 158 | 1772 |
} else if ( !flag.equals( "P" ) && !flag.equals( "M" ) ) { |
| 159 | 6 |
log.warn( "Found a flag I don't know about, ignoring " + flag
|
| 160 |
+ " and counting as present." );
|
|
| 161 |
} |
|
| 162 |
|
|
| 163 | 1778 |
numPresent++; |
| 164 |
} |
|
| 165 |
|
|
| 166 |
/* decide whether this row is a keeper */
|
|
| 167 | 180 |
if ( ( countIsSet && numPresent >= minPresentCount )
|
| 168 |
|| ( fractionIsSet && ( double ) numPresent / numCols >= minPresentFraction ) ) {
|
|
| 169 | 134 |
MTemp.add( data.getRowObj( i ) ); |
| 170 | 134 |
rowNames.add( rowName ); |
| 171 | 134 |
kept++; |
| 172 |
} |
|
| 173 |
} |
|
| 174 |
|
|
| 175 | 6 |
NamedMatrix returnval = getOutputMatrix( data, MTemp.size(), numCols ); |
| 176 | 6 |
for ( int i = 0; i < MTemp.size(); i++ ) { |
| 177 | 134 |
for ( int j = 0; j < numCols; j++ ) { |
| 178 | 1608 |
returnval.set( i, j, ( ( Object[] ) MTemp.get( i ) )[j] ); |
| 179 |
} |
|
| 180 |
} |
|
| 181 | 6 |
returnval.setColumnNames( data.getColNames() ); |
| 182 | 6 |
returnval.setRowNames( rowNames ); |
| 183 |
|
|
| 184 | 6 |
log.info( "There are " + kept + " rows left after filtering." ); |
| 185 |
|
|
| 186 | 6 |
return ( returnval );
|
| 187 |
} |
|
| 188 |
|
|
| 189 |
/**
|
|
| 190 |
* @param data NamedMatrix
|
|
| 191 |
* @todo this should check more carefully - actually test that the rows are all the same.
|
|
| 192 |
*/
|
|
| 193 | 6 |
private void validateFlags( NamedMatrix data ) { |
| 194 | 6 |
if ( flags == null || flags.rows() < data.rows() |
| 195 |
|| flags.columns() < data.columns() ) {
|
|
| 196 | 0 |
throw new IllegalStateException( "Flags do not match data." ); |
| 197 |
} |
|
| 198 |
} |
|
| 199 |
|
|
| 200 |
} |
|
||||||||||