1 package baseCode.dataFilter;
2
3 import java.util.Vector;
4
5 import baseCode.dataStructure.matrix.NamedMatrix;
6 import baseCode.dataStructure.matrix.StringMatrix2DNamed;
7
8 /***
9 * Filter a data matrix according to flags given in a separate matrix.
10 * <p>
11 * The flags can be 'A', 'P' or 'M', for absent, present and marginal, following the Affymetrix convention. By default,
12 * Marginal flags are counted as "absent", but this can be changed by the user.
13 * <p>
14 * Copyright (c) 2004
15 * </p>
16 * <p>
17 * Institution:: Columbia University
18 * </p>
19 *
20 * @author Paul Pavlidis
21 * @version $Id: RowAbsentFilter.java,v 1.5 2004/07/27 03:18:58 pavlidis Exp $
22 */
23 public class RowAbsentFilter extends AbstractFilter implements Filter {
24
25 private StringMatrix2DNamed flags = null;
26
27 private double minPresentFraction = 0.0;
28 private int minPresentCount = 0;
29 private boolean keepMarginal = false;
30 private boolean fractionIsSet = false;
31 private boolean countIsSet = false;
32 private boolean flagsSet = false;
33
34 /***
35 * @param f the matrix containing the flags.
36 */
37 public void setFlagMatrix( StringMatrix2DNamed f ) {
38 if ( f == null ) {
39 throw new IllegalArgumentException( "Flag matrix is null" );
40 }
41 flags = f;
42 flagsSet = true;
43 }
44
45 /***
46 * @param k the minimum fraction of present values that there must be, in order to keep the row.
47 */
48 public void setMinPresentFraction( double k ) {
49 if ( k < 0.0 || k > 1.0 )
50 throw new IllegalArgumentException(
51 "Min present fraction must be between 0 and 1, got " + k );
52 minPresentFraction = k;
53 fractionIsSet = true;
54 }
55
56 /***
57 * @param k the minimum number of present values there must be in order to keep the row.
58 */
59 public void setMinPresentCount( int k ) {
60 if ( k < 0 ) {
61 throw new IllegalArgumentException(
62 "Minimum present count must be > 0." );
63 }
64 minPresentCount = k;
65 countIsSet = true;
66 }
67
68 /***
69 * @param k whether to count 'marginal' as 'present'. Default is false.
70 */
71 public void setKeepMarginal( boolean k ) {
72 keepMarginal = k;
73 }
74
75 /***
76 * The data is going to be filtered in accordance to strings in 'flags'. These are either 'A', 'P' or 'M' for absent,
77 * present and marginal.
78 *
79 * @param data The input matrix
80 * @return Matrix after filtering.
81 */
82 public NamedMatrix filter( NamedMatrix data ) {
83
84 int numRows = data.rows();
85 int numCols = data.columns();
86
87 if ( minPresentCount > numCols ) {
88 throw new IllegalStateException( "Minimum present count is set to "
89 + minPresentCount + " but there are only " + numCols
90 + " columns in the matrix." );
91 }
92
93 if ( flags == null ) {
94 throw new IllegalStateException( "Flag matrix is null" );
95 }
96
97
98 if ( !fractionIsSet && !countIsSet ) {
99 log.info( "No filtering was requested" );
100 return data;
101 }
102
103 if ( !flagsSet ) {
104 log.info( "No flag matrix was provided." );
105 return data;
106 }
107
108 validateFlags( data );
109
110
111 if ( minPresentFraction == 0.0 && minPresentCount == 0 ) {
112 log
113 .info( "Criteria are set too low to result in any changes to the input." );
114 return data;
115 }
116
117 Vector MTemp = new Vector();
118 Vector rowNames = new Vector();
119
120 int kept = 0;
121 for ( int i = 0; i < numRows; i++ ) {
122 String rowName = data.getRowName( i );
123
124 if ( !flags.containsRowName( rowName ) ) {
125 log.debug( "Row " + rowName + " not found in flags, skipping." );
126 continue;
127 }
128
129 int numPresent = 0;
130 for ( int j = 0; j < numCols; j++ ) {
131 String colName = data.getColName( j );
132
133 if ( !flags.containsColumnName( colName ) ) {
134 log.debug( "Column " + colName
135 + " not found in flags, skipping." );
136 continue;
137 }
138
139
140
141 if ( data.isMissing( i, j ) ) {
142
143 continue;
144 }
145
146 String flag = ( String ) flags.get( flags
147 .getRowIndexByName( rowName ), flags
148 .getColIndexByName( colName ) );
149
150 if ( flags.isMissing( flags.getRowIndexByName( rowName ), flags
151 .getColIndexByName( colName ) ) ) {
152 log
153 .warn( "Flags had no value for an item, counting as present." );
154 } else if ( flag.equals( "A" ) ) {
155 continue;
156 } else if ( flag.equals( "M" ) && !keepMarginal ) {
157 continue;
158 } else if ( !flag.equals( "P" ) && !flag.equals( "M" ) ) {
159 log.warn( "Found a flag I don't know about, ignoring " + flag
160 + " and counting as present." );
161 }
162
163 numPresent++;
164 }
165
166
167 if ( ( countIsSet && numPresent >= minPresentCount )
168 || ( fractionIsSet && ( double ) numPresent / numCols >= minPresentFraction ) ) {
169 MTemp.add( data.getRowObj( i ) );
170 rowNames.add( rowName );
171 kept++;
172 }
173 }
174
175 NamedMatrix returnval = getOutputMatrix( data, MTemp.size(), numCols );
176 for ( int i = 0; i < MTemp.size(); i++ ) {
177 for ( int j = 0; j < numCols; j++ ) {
178 returnval.set( i, j, ( ( Object[] ) MTemp.get( i ) )[j] );
179 }
180 }
181 returnval.setColumnNames( data.getColNames() );
182 returnval.setRowNames( rowNames );
183
184 log.info( "There are " + kept + " rows left after filtering." );
185
186 return ( returnval );
187 }
188
189 /***
190 * @param data NamedMatrix
191 * @todo this should check more carefully - actually test that the rows are all the same.
192 */
193 private void validateFlags( NamedMatrix data ) {
194 if ( flags == null || flags.rows() < data.rows()
195 || flags.columns() < data.columns() ) {
196 throw new IllegalStateException( "Flags do not match data." );
197 }
198 }
199
200 }