View Javadoc

1   package baseCode.dataFilter;
2   
3   import java.util.Vector;
4   
5   import baseCode.dataStructure.matrix.NamedMatrix;
6   import baseCode.dataStructure.matrix.StringMatrix2DNamed;
7   
8   /***
9    * Filter a data matrix according to flags given in a separate matrix.
10   * <p>
11   * The flags can be 'A', 'P' or 'M', for absent, present and marginal, following the Affymetrix convention. By default,
12   * Marginal flags are counted as "absent", but this can be changed by the user.
13   * <p>
14   * Copyright (c) 2004
15   * </p>
16   * <p>
17   * Institution:: Columbia University
18   * </p>
19   * 
20   * @author Paul Pavlidis
21   * @version $Id: RowAbsentFilter.java,v 1.5 2004/07/27 03:18:58 pavlidis Exp $
22   */
23  public class RowAbsentFilter extends AbstractFilter implements Filter {
24  
25     private StringMatrix2DNamed flags = null;
26  
27     private double minPresentFraction = 0.0;
28     private int minPresentCount = 0;
29     private boolean keepMarginal = false;
30     private boolean fractionIsSet = false;
31     private boolean countIsSet = false;
32     private boolean flagsSet = false;
33  
34     /***
35      * @param f the matrix containing the flags.
36      */
37     public void setFlagMatrix( StringMatrix2DNamed f ) {
38        if ( f == null ) {
39           throw new IllegalArgumentException( "Flag matrix is null" );
40        }
41        flags = f;
42        flagsSet = true;
43     }
44  
45     /***
46      * @param k the minimum fraction of present values that there must be, in order to keep the row.
47      */
48     public void setMinPresentFraction( double k ) {
49        if ( k < 0.0 || k > 1.0 )
50              throw new IllegalArgumentException(
51                    "Min present fraction must be between 0 and 1, got " + k );
52        minPresentFraction = k;
53        fractionIsSet = true;
54     }
55  
56     /***
57      * @param k the minimum number of present values there must be in order to keep the row.
58      */
59     public void setMinPresentCount( int k ) {
60        if ( k < 0 ) {
61           throw new IllegalArgumentException(
62                 "Minimum present count must be > 0." );
63        }
64        minPresentCount = k;
65        countIsSet = true;
66     }
67  
68     /***
69      * @param k whether to count 'marginal' as 'present'. Default is false.
70      */
71     public void setKeepMarginal( boolean k ) {
72        keepMarginal = k;
73     }
74  
75     /***
76      * The data is going to be filtered in accordance to strings in 'flags'. These are either 'A', 'P' or 'M' for absent,
77      * present and marginal.
78      * 
79      * @param data The input matrix
80      * @return Matrix after filtering.
81      */
82     public NamedMatrix filter( NamedMatrix data ) {
83  
84        int numRows = data.rows();
85        int numCols = data.columns();
86  
87        if ( minPresentCount > numCols ) {
88           throw new IllegalStateException( "Minimum present count is set to "
89                 + minPresentCount + " but there are only " + numCols
90                 + " columns in the matrix." );
91        }
92  
93        if ( flags == null ) {
94           throw new IllegalStateException( "Flag matrix is null" );
95        }
96  
97        // no filtering requested.
98        if ( !fractionIsSet && !countIsSet ) {
99           log.info( "No filtering was requested" );
100          return data;
101       }
102 
103       if ( !flagsSet ) {
104          log.info( "No flag matrix was provided." );
105          return data;
106       }
107 
108       validateFlags( data );
109 
110       // nothing will happen.
111       if ( minPresentFraction == 0.0 && minPresentCount == 0 ) {
112          log
113                .info( "Criteria are set too low to result in any changes to the input." );
114          return data;
115       }
116 
117       Vector MTemp = new Vector();
118       Vector rowNames = new Vector();
119 
120       int kept = 0;
121       for ( int i = 0; i < numRows; i++ ) {
122          String rowName = data.getRowName( i );
123 
124          if ( !flags.containsRowName( rowName ) ) {
125             log.debug( "Row " + rowName + " not found in flags, skipping." );
126             continue;
127          }
128 
129          int numPresent = 0;
130          for ( int j = 0; j < numCols; j++ ) {
131             String colName = data.getColName( j );
132 
133             if ( !flags.containsColumnName( colName ) ) {
134                log.debug( "Column " + colName
135                      + " not found in flags, skipping." );
136                continue;
137             }
138 
139             // count missing values in the data as "absent", whatever the
140             // flag really is.
141             if ( data.isMissing( i, j ) ) {
142                //       log.debug( "Found missing data, counting as absent." );
143                continue;
144             }
145 
146             String flag = ( String ) flags.get( flags
147                   .getRowIndexByName( rowName ), flags
148                   .getColIndexByName( colName ) );
149 
150             if ( flags.isMissing( flags.getRowIndexByName( rowName ), flags
151                   .getColIndexByName( colName ) ) ) {
152                log
153                      .warn( "Flags had no value for an item, counting as present." );
154             } else if ( flag.equals( "A" ) ) {
155                continue;
156             } else if ( flag.equals( "M" ) && !keepMarginal ) {
157                continue;
158             } else if ( !flag.equals( "P" ) && !flag.equals( "M" ) ) {
159                log.warn( "Found a flag I don't know about, ignoring " + flag
160                      + " and counting as present." );
161             }
162 
163             numPresent++;
164          }
165 
166          /* decide whether this row is a keeper */
167          if ( ( countIsSet && numPresent >= minPresentCount )
168                || ( fractionIsSet && ( double ) numPresent / numCols >= minPresentFraction ) ) {
169             MTemp.add( data.getRowObj( i ) );
170             rowNames.add( rowName );
171             kept++;
172          }
173       }
174 
175       NamedMatrix returnval = getOutputMatrix( data, MTemp.size(), numCols );
176       for ( int i = 0; i < MTemp.size(); i++ ) {
177          for ( int j = 0; j < numCols; j++ ) {
178             returnval.set( i, j, ( ( Object[] ) MTemp.get( i ) )[j] );
179          }
180       }
181       returnval.setColumnNames( data.getColNames() );
182       returnval.setRowNames( rowNames );
183 
184       log.info( "There are " + kept + " rows left after filtering." );
185 
186       return ( returnval );
187    }
188 
189    /***
190     * @param data NamedMatrix
191     * @todo this should check more carefully - actually test that the rows are all the same.
192     */
193    private void validateFlags( NamedMatrix data ) {
194       if ( flags == null || flags.rows() < data.rows()
195             || flags.columns() < data.columns() ) {
196          throw new IllegalStateException( "Flags do not match data." );
197       }
198    }
199 
200 }