View Javadoc

1   package baseCode.dataFilter;
2   
3   import java.util.Vector;
4   
5   import baseCode.dataStructure.matrix.NamedMatrix;
6   
7   /***
8    * Remove probes that have names meeting certain rules indicating they may have low reliability. This is targeted at
9    * cases like "AFFX", "_st", "_f_at" and so forth.
10   * <p>
11   * Copyright (c) 2004
12   * </p>
13   * <p>
14   * Institution:: Columbia University
15   * </p>
16   * 
17   * @author Paul Pavlidis
18   * @version $Id: AffymetrixProbeNameFilter.java,v 1.12 2004/08/17 21:17:40 pavlidis Exp $
19   */
20  public class AffymetrixProbeNameFilter extends AbstractFilter implements Filter {
21  
22     private boolean skip_ST = false;
23     private boolean skip_AFFX = false;
24     private boolean skip_F = false;
25     private boolean skip_X = false;
26     private boolean skip_G = false;
27  
28     /***
29      * Filter probes that contain the '_st' (sense strand) tag
30      */
31     public static final int ST = 1;
32  
33     /***
34      * Filter probes that have the AFFX prefix.
35      */
36     public static final int AFFX = 2;
37  
38     /***
39      * Filter probes that have the "_f_at" (family) tag.
40      */
41     public static final int F = 3;
42  
43     /***
44      * Filter probes that have the "_x_at" tag.
45      */
46     public static final int X = 4;
47  
48     /***
49      * Filter probes that have the "_g_at" (group) tag.
50      */
51     public static final int G = 5;
52  
53     /***
54      * @param criteria int[] of constants indicating the criteria to use.
55      */
56     public AffymetrixProbeNameFilter( int[] criteria ) {
57        this.setCriteria( criteria );
58     }
59     
60     /***
61      * Filter probes with all criteria switched on.
62      *
63      */
64     public AffymetrixProbeNameFilter(   ) {
65        this.setCriteria( new int[] {1,2,3,4,5} );
66     }
67     
68  
69     private void setCriteria( int[] criteria ) {
70        for ( int i = 0; i < criteria.length; i++ ) {
71           switch ( criteria[i] ) {
72              case ST: {
73                 skip_ST = true;
74              }
75              case AFFX: {
76                 skip_AFFX = true;
77              }
78              case F: {
79                 skip_F = true;
80              }
81              case X: {
82                 skip_X = true;
83              }
84              case G: {
85                 skip_G = true;
86              }
87              default: {
88                 break;
89              }
90           }
91        }
92     }
93  
94     public NamedMatrix filter( NamedMatrix data ) {
95        Vector MTemp = new Vector();
96        Vector rowNames = new Vector();
97        int numRows = data.rows();
98        int numCols = data.columns();
99  
100       int kept = 0;
101       for ( int i = 0; i < numRows; i++ ) {
102          String name = data.getRowName( i );
103 
104          // apply the rules.
105          if ( skip_ST && name.endsWith( "_st" ) ) { // 'st' means sense strand.
106             continue;
107          }
108 
109          if ( skip_AFFX && name.startsWith( "AFFX" ) ) {
110             continue;
111          }
112 
113          if ( skip_F && name.endsWith( "_f_at" ) ) { // gene family. We don't
114             // like.
115             continue;
116          }
117 
118          if ( skip_X && name.endsWith( "_x_at" ) ) {
119             continue;
120          }
121          if ( skip_G && name.endsWith( "_g_at" ) ) {
122             continue;
123          }
124          MTemp.add( data.getRowObj( i ) );
125          rowNames.add( name );
126          kept++;
127       }
128 
129       NamedMatrix returnval = getOutputMatrix( data, MTemp.size(), numCols );
130 
131       for ( int i = 0; i < MTemp.size(); i++ ) {
132          for ( int j = 0; j < numCols; j++ ) {
133             returnval.set( i, j, ( ( Object[] ) MTemp.get( i ) )[j] );
134          }
135       }
136       returnval.setColumnNames( data.getColNames() );
137       returnval.setRowNames( rowNames );
138       log.info( "There are " + kept + " rows left after filtering." );
139 
140       return ( returnval );
141 
142    }
143 }