View Javadoc

1   package baseCode.dataFilter;
2   
3   import java.util.Vector;
4   
5   import baseCode.dataStructure.matrix.NamedMatrix;
6   
7   /***
8    * Remove probes that have names meeting certain rules indicating they may have low reliability. This is targeted at
9    * cases like "AFFX", "_st", "_f_at" and so forth.
10   * <p>
11   * Copyright (c) 2004
12   * </p>
13   * <p>
14   * Institution:: Columbia University
15   * </p>
16   * 
17   * @author Paul Pavlidis
18   * @version $Id: RowAffyNameFilter.java,v 1.3 2004/07/27 03:18:58 pavlidis Exp $
19   */
20  public class RowAffyNameFilter extends AbstractFilter implements Filter {
21  
22     private boolean skip_ST = false;
23     private boolean skip_AFFX = false;
24     private boolean skip_F = false;
25     private boolean skip_X = false;
26     private boolean skip_G = false;
27  
28     /***
29      * Filter probes that contain the '_st' (sense strand) tag
30      */
31     public static final int ST = 1;
32  
33     /***
34      * Filter probes that have the AFFX prefix.
35      */
36     public static final int AFFX = 2;
37  
38     /***
39      * Filter probes that have the "_f_at" (family) tag.
40      */
41     public static final int F = 3;
42  
43     /***
44      * Filter probes that have the "_x_at" tag.
45      */
46     public static final int X = 4;
47  
48     /***
49      * Filter probes that have the "_g_at" (group) tag.
50      */
51     public static final int G = 5;
52  
53     /***
54      * @param criteria int[] of constants indicating the criteria to use.
55      */
56     public RowAffyNameFilter( int[] criteria ) {
57        this.setCriteria( criteria );
58     }
59  
60     private void setCriteria( int[] criteria ) {
61        for ( int i = 0; i < criteria.length; i++ ) {
62           switch ( criteria[i] ) {
63              case ST: {
64                 skip_ST = true;
65              }
66              case AFFX: {
67                 skip_AFFX = true;
68              }
69              case F: {
70                 skip_F = true;
71              }
72              case X: {
73                 skip_X = true;
74              }
75              case G: {
76                 skip_G = true;
77              }
78              default: {
79                 break;
80              }
81           }
82        }
83     }
84  
85     public NamedMatrix filter( NamedMatrix data ) {
86        Vector MTemp = new Vector();
87        Vector rowNames = new Vector();
88        int numRows = data.rows();
89        int numCols = data.columns();
90  
91        int kept = 0;
92        for ( int i = 0; i < numRows; i++ ) {
93           String name = data.getRowName( i );
94  
95           // apply the rules.
96           if ( skip_ST && name.endsWith( "_st" ) ) { // 'st' means sense strand.
97              continue;
98           }
99  
100          if ( skip_AFFX && name.startsWith( "AFFX" ) ) {
101             continue;
102          }
103 
104          if ( skip_F && name.endsWith( "_f_at" ) ) { // gene family. We don't
105             // like.
106             continue;
107          }
108 
109          if ( skip_X && name.endsWith( "_x_at" ) ) {
110             continue;
111          }
112          if ( skip_G && name.endsWith( "_g_at" ) ) {
113             continue;
114          }
115          MTemp.add( data.getRowObj( i ) );
116          rowNames.add( name );
117          kept++;
118       }
119 
120       NamedMatrix returnval = getOutputMatrix( data, MTemp.size(), numCols );
121 
122       for ( int i = 0; i < MTemp.size(); i++ ) {
123          for ( int j = 0; j < numCols; j++ ) {
124             returnval.set( i, j, ( ( Object[] ) MTemp.get( i ) )[j] );
125          }
126       }
127       returnval.setColumnNames( data.getColNames() );
128       returnval.setRowNames( rowNames );
129       log.info( "There are " + kept + " rows left after filtering." );
130 
131       return ( returnval );
132 
133    }
134 }