View Javadoc

1   package baseCode.bio.geneset;
2   
3   import java.io.BufferedInputStream;
4   import java.io.BufferedReader;
5   import java.io.FileInputStream;
6   import java.io.IOException;
7   import java.io.InputStream;
8   import java.io.InputStreamReader;
9   import java.io.Writer;
10  import java.util.ArrayList;
11  import java.util.Collection;
12  import java.util.Collections;
13  import java.util.Comparator;
14  import java.util.HashMap;
15  import java.util.HashSet;
16  import java.util.Iterator;
17  import java.util.LinkedHashMap;
18  import java.util.List;
19  import java.util.Map;
20  import java.util.Set;
21  import java.util.StringTokenizer;
22  import java.util.Vector;
23  
24  import javax.swing.table.AbstractTableModel;
25  import javax.swing.table.TableModel;
26  
27  import baseCode.util.FileTools;
28  import baseCode.util.StatusViewer;
29  
30  /***
31   * Reads tab-delimited file to create maps of probes to classes, classes to probes, probes to genes, genes to probes.
32   * <p>
33   * Maintains the following important data structures, all derived from the input file:
34   * 
35   * <pre>
36   * 
37   *  
38   *   
39   *    
40   *     
41   *      
42   *       
43   *        
44   *         
45   *          
46   *           
47   *                                                    probe-&gt;Classes -- each value is a Set of the Classes that a probe belongs to.
48   *                                                    Classes-&gt;probe -- each value is a Set of the probes that belong to a class
49   *                                                    probe-&gt;gene -- each value is the gene name corresponding to the probe.
50   *                                                    gene-&gt;list of probes -- each value is a list of probes corresponding to a gene
51   *                                                    probe-&gt;description -- each value is a text description of the probe (actually...of the gene)
52   *                
53   *            
54   *           
55   *          
56   *         
57   *        
58   *       
59   *      
60   *     
61   *    
62   *   
63   *  
64   * </pre>
65   * 
66   * <p>
67   * Copyright (c) 2004 Columbia University
68   * </p>
69   * 
70   * @author Paul Pavlidis
71   * @author Shamhil Merchant
72   * @author Homin Lee
73   * @version $Id: GeneAnnotations.java,v 1.7 2004/12/27 22:25:56 pavlidis Exp $
74   */
75  
76  public class GeneAnnotations {
77  
78     /***
79      * The maximum size of gene sets ever considered.
80      */
81     private static final int PRACTICAL_MAXIMUM_GENESET_SIZE = 1000;
82  
83     /***
84      * The minimum size of a 'set' of genes.
85      */
86     private static final int ABSOLUTE_MINIMUM_GENESET_SIZE = 2;
87  
88     private Map probeToGeneSetMap; //stores probe->Classes map todo why are contents an arraylist, should be a set?
89     private Map geneSetToProbeMap; //stores Classes->probes map
90     private Map probeToGeneName;
91     private Map probeToDescription;
92     private Map geneToProbeList;
93     private Map geneToGeneSetMap;
94     private Map geneSetToGeneMap; //stores Classes->genes map. use to get a list
95     // of classes.
96  
97     private Vector sortedGeneSets;
98     private Map geneSetToRedundantMap;
99     Vector selectedProbes;
100    private Vector selectedSets;
101 
102    private StatusViewer messenger;
103 
104    /***
105     * This is for creating GeneAnnotations by reading from a file
106     * 
107     * @param filename String
108     * @param messenger StatusViewer to print status updates to.
109     * @throws IOException
110     */
111    public GeneAnnotations( String filename, StatusViewer messenger )
112          throws IOException {
113 
114       setUpDataStructures();
115       this.messenger = messenger;
116 
117       this.read( filename );
118 
119       setUp();
120    }
121 
122    /***
123     * This is for creating GeneAnnotations by pruning a copy.
124     * 
125     * @param geneData GeneAnnotations copy to prune from
126     * @param activeProbes Set only include these probes
127     */
128    public GeneAnnotations( GeneAnnotations geneData, Set activeProbes ) {
129 
130       // FIXME - shallow copies here could cause problems.
131 
132       // CAREFUL this is a shallow copy! This is okay?
133       probeToGeneSetMap = new LinkedHashMap( geneData.probeToGeneSetMap );
134 
135       // make a deep copy of the classToProbeMap, which is a map of sets. Shallow copy is BAD.
136       this.geneSetToProbeMap = new LinkedHashMap();
137       for ( Iterator iter = geneData.geneSetToProbeMap.keySet().iterator(); iter
138             .hasNext(); ) {
139          String key = ( String ) iter.next();
140          this.geneSetToProbeMap.put( key, new ArrayList(
141                ( ArrayList ) geneData.geneSetToProbeMap.get( key ) ) );
142       }
143 
144       probeToGeneName = new HashMap( geneData.probeToGeneName ); // shallow copy, okay
145       probeToDescription = new HashMap( geneData.probeToDescription ); // shallow copy, okay
146       geneToProbeList = new HashMap( geneData.geneToProbeList ); // shallow copy, okay?
147       geneToGeneSetMap = new HashMap( geneData.geneToGeneSetMap ); // shallow copy, okay?
148       geneSetToRedundantMap = new HashMap( geneData.geneSetToRedundantMap );
149 
150       Vector allProbes = new Vector( probeToGeneName.keySet() );
151       for ( Iterator iter = allProbes.iterator(); iter.hasNext(); ) {
152          String probe = ( String ) iter.next();
153          if ( !activeProbes.contains( probe ) ) { // remove probes not in data set.
154             removeProbeFromMaps( probe );
155          }
156       }
157       setUp(); //creates the classToGene map.
158 
159       //      System.err.println( "Orig: GO:0019058 has probes: "
160       //            + geneData.numProbesInGeneSet( "GO:0019058" ) );
161       //      System.err.println( "New: GO:0019058 has probes: "
162       //            + this.numProbesInGeneSet( "GO:0019058" ) );
163    }
164 
165    /***
166     * Make a new GeneAnnotations that only includes the probes in the parameter 'probes'.
167     * 
168     * @param stream
169     * @param activeGenes Only genes in this set are left.
170     * @throws IOException
171     */
172    public GeneAnnotations( InputStream stream, Set activeGenes,
173          StatusViewer messenger ) throws IOException {
174       this.messenger = messenger;
175       setUpDataStructures();
176       this.read( stream, activeGenes );
177       setUp();
178    }
179 
180    /***
181     * @param fileName
182     */
183    public GeneAnnotations( String fileName, Set activeGenes,
184          StatusViewer messenger ) throws IOException {
185       this.messenger = messenger;
186       FileInputStream fis = new FileInputStream( fileName );
187       BufferedInputStream bis = new BufferedInputStream( fis );
188       setUpDataStructures();
189       this.read( bis, activeGenes );
190       setUp();
191    }
192 
193    /***
194     * @return Map
195     */
196    public Map getProbeToGeneMap() {
197       return probeToGeneName;
198    }
199 
200    /***
201     * @return Map
202     */
203    public Map getGeneToProbeList() {
204       return geneToProbeList;
205    }
206 
207    /***
208     * @return Map
209     */
210    public Map getGeneSetToProbeMap() {
211       return geneSetToProbeMap;
212    }
213 
214    /***
215     * @param id String class id
216     * @return ArrayList list of probes in class
217     */
218    public ArrayList getClassToProbes( String id ) {
219       return ( ArrayList ) geneSetToProbeMap.get( id );
220    }
221 
222    /***
223     * Sort the gene sets, filling out the sortedGeneSets. This should be called after any changes have been made to the
224     * classToProbeMap. The sort is just in order of id.
225     */
226    public void sortGeneSets() {
227 
228       if ( geneSetToProbeMap.size() == 0 ) {
229          throw new IllegalStateException(
230                "Could not sort because there are no gene sets in the classToProbeMap" );
231       }
232 
233       if ( sortedGeneSets == null ) {
234          sortedGeneSets = new Vector();
235       }
236 
237       Vector vec = new Vector( geneSetToProbeMap.keySet() );
238       Collections.sort( vec );
239       for ( Iterator iter = vec.iterator(); iter.hasNext(); ) {
240          sortedGeneSets.add( iter.next() );
241       }
242    }
243 
244    /***
245     * @return
246     */
247    public List sortGeneSetsBySize() {
248 
249       List sets = new Vector();
250       for ( Iterator iter = getGeneSetToGeneMap().keySet().iterator(); iter
251             .hasNext(); ) {
252          String name = ( String ) iter.next();
253          sets.add( new GeneSet( name, ( Set ) geneSetToGeneMap.get( name ) ) );
254       }
255 
256       Collections.sort( sets, new ClassSizeComparator() );
257 
258       List returnVal = new Vector();
259       for ( Iterator iter = sets.iterator(); iter.hasNext(); ) {
260          returnVal.add( ( ( GeneSet ) iter.next() ).getName() );
261       }
262 
263       return returnVal;
264    }
265 
266    /***
267     * @return Map
268     */
269    public Map getProbeToGeneSetMap() {
270       return probeToGeneSetMap;
271    }
272 
273    /***
274     * @return Map
275     */
276    public Map geneSetToRedundantMap() {
277       return geneSetToRedundantMap;
278    }
279 
280    /***
281     * Get the gene that a probe belongs to.
282     * 
283     * @param p String
284     * @return String
285     */
286    public String getProbeGeneName( String p ) {
287       return ( String ) probeToGeneName.get( p );
288    }
289 
290    /***
291     * Get the description for a gene.
292     * 
293     * @param p String
294     * @return String
295     */
296    public String getProbeDescription( String p ) {
297       return ( String ) probeToDescription.get( p );
298    }
299 
300    /***
301     * Get a list of the probes that correspond to a particular gene.
302     * 
303     * @param g String a gene name
304     * @return ArrayList list of the probes for gene g
305     */
306    public ArrayList getGeneProbeList( String g ) {
307       return ( ArrayList ) geneToProbeList.get( g );
308    }
309 
310    /***
311     * Get a class by an integer index i from the sorted list.
312     * 
313     * @param i
314     * @return
315     */
316    public String getGeneSetByIndex( int i ) {
317       return ( String ) sortedGeneSets.get( i );
318    }
319 
320    /***
321     * Returns true if the class is in the classToProbe map
322     * 
323     * @param id String a class id
324     * @return boolean
325     */
326    public boolean geneSetExists( String id ) {
327       return geneSetToProbeMap.containsKey( id );
328    }
329 
330    /***
331     * Get how many probes point to the same gene. This is like the old "numReplicates".
332     * 
333     * @param g
334     * @return
335     */
336    public int numProbesForGene( String g ) {
337       if ( !geneToProbeList.containsKey( g ) ) return 0;
338       return ( ( ArrayList ) geneToProbeList.get( g ) ).size();
339    }
340 
341    /***
342     * Get the number of classes. This is computed from the sortedGeneSets.
343     * 
344     * @return
345     */
346    public int numGeneSets() {
347       if ( geneSetToGeneMap == null ) {
348          throw new IllegalStateException( "classToGeneMap was null" );
349       }
350       return geneSetToGeneMap.size();
351    }
352 
353    /***
354     * How many genes are in the file?
355     */
356    public int numGenes() {
357       return geneToProbeList.size();
358    }
359 
360    /***
361     * Get the number of probes in a gene set, identified by id.
362     * 
363     * @param id String a class id
364     * @return int number of probes in the class
365     */
366    public int numProbesInGeneSet( String id ) {
367       if ( !geneSetToProbeMap.containsKey( id ) ) {
368          return 0;
369       }
370       //      System.err.println( "GO:0019058 has probes: "
371       //            + ( ( ArrayList ) classToProbeMap.get( "GO:0019058" ) ).size() );
372       return ( ( ArrayList ) geneSetToProbeMap.get( id ) ).size();
373    }
374 
375    /***
376     * Get the number of genes in a gene set, identified by id.
377     * 
378     * @param id String a class id
379     * @return int number of genes in the class
380     */
381    public int numGenesInGeneSet( String id ) {
382       if ( !geneSetToGeneMap.containsKey( id ) ) {
383          return 0;
384       }
385       return ( ( Set ) geneSetToGeneMap.get( id ) ).size();
386    }
387 
388    /***
389     * Add a class
390     * 
391     * @param id String class to be added
392     * @param probesForNew ArrayList user-defined list of members.
393     */
394    public void addClass( String id, ArrayList probesForNew ) {
395       geneSetToProbeMap.put( id, probesForNew );
396 
397       Iterator probe_it = probesForNew.iterator();
398       while ( probe_it.hasNext() ) {
399          String probe = new String( ( String ) probe_it.next() );
400          ( ( ArrayList ) probeToGeneSetMap.get( probe ) ).add( id );
401       }
402 
403       Set genes = new HashSet();
404       Iterator probe_it2 = probesForNew.iterator();
405       while ( probe_it2.hasNext() ) {
406          genes.add( probeToGeneName.get( probe_it2.next() ) );
407       }
408       geneSetToGeneMap.put( id, genes );
409 
410       geneToGeneSetMap.put( id, probeToGeneSetMap.get( id ) );
411 
412       resetSelectedSets();
413    }
414 
415    /***
416     * Redefine a class.
417     * 
418     * @param classId String class to be modified
419     * @param probesForNew ArrayList current user-defined list of members. The "real" version of the class is modified to
420     *        look like this one.
421     */
422    public void modifyClass( String classId, ArrayList probesForNew ) {
423       ArrayList orig_probes = ( ArrayList ) geneSetToProbeMap.get( classId );
424       Iterator orig_probe_it = orig_probes.iterator();
425       while ( orig_probe_it.hasNext() ) {
426          String orig_probe = new String( ( String ) orig_probe_it.next() );
427          if ( !probesForNew.contains( orig_probe ) ) {
428             Set ptc = new HashSet( ( Collection ) probeToGeneSetMap
429                   .get( orig_probe ) );
430             ptc.remove( classId );
431             probeToGeneSetMap.remove( orig_probe );
432             probeToGeneSetMap.put( orig_probe, new ArrayList( ptc ) );
433          }
434       }
435       Iterator probe_it = probesForNew.iterator();
436       while ( probe_it.hasNext() ) {
437          String probe = ( String ) probe_it.next();
438          if ( !orig_probes.contains( probe ) ) {
439             ( ( ArrayList ) probeToGeneSetMap.get( probe ) ).add( classId );
440          }
441       }
442       geneSetToProbeMap.put( classId, probesForNew );
443       resetSelectedSets();
444    }
445 
446    /***
447     * @return
448     */
449    public TableModel toTableModel() {
450       return new AbstractTableModel() {
451          private String[] columnNames = {
452                "Probe", "Gene", "Description"
453          };
454 
455          public String getColumnName( int i ) {
456             return columnNames[i];
457          }
458 
459          public int getColumnCount() {
460             return 3;
461          }
462 
463          public int getRowCount() {
464             return selectedProbes.size();
465          }
466 
467          public Object getValueAt( int i, int j ) {
468 
469             String probeid = ( String ) selectedProbes.get( i );
470             switch ( j ) {
471                case 0:
472                   return probeid;
473                case 1:
474                   return getProbeGeneName( probeid );
475                case 2:
476                   return getProbeDescription( probeid );
477                default:
478                   return null;
479             }
480          }
481 
482       };
483    }
484 
485    /***
486     * Create a selected probes list based on a search string.
487     * 
488     * @param searchOn A string to be searched.
489     */
490    public void selectProbes( String searchOn ) {
491 
492       String searchOnUp = searchOn.toUpperCase();
493       resetSelectedProbes();
494       Set removeUs = new HashSet();
495       for ( Iterator it = probeToGeneName.keySet().iterator(); it.hasNext(); ) {
496          String probe = ( String ) it.next();
497 
498          String candidate = ( ( String ) probeToGeneName.get( ( probe ) ) )
499                .toUpperCase();
500 
501          // look in descriptions.
502          String candidateD = ( ( String ) probeToDescription.get( ( probe ) ) )
503                .toUpperCase();
504 
505          if ( !candidate.startsWith( searchOnUp )
506                && candidateD.indexOf( searchOnUp ) < 0 ) {
507             removeUs.add( probe );
508          }
509 
510       }
511 
512       for ( Iterator it = removeUs.iterator(); it.hasNext(); ) {
513          selectedProbes.remove( it.next() );
514       }
515    }
516 
517    /***
518     * Set the selected gene set to be the entire set.
519     */
520    public void resetSelectedProbes() {
521       selectedProbes = new Vector( probeToGeneName.keySet() );
522    }
523 
524    /***
525     * @return the list of selected probes.
526     */
527    public List getSelectedProbes() {
528       return selectedProbes;
529    }
530 
531    /***
532     * @return the number of probes currently on the 'selected' list.
533     */
534    public int selectedProbes() {
535       return selectedProbes.size();
536    }
537 
538    /***
539     * @param searchOn
540     * @param goData
541     */
542    public void selectSets( String searchOn, GONames goData ) {
543 
544       String searchOnUp = searchOn.toUpperCase();
545       resetSelectedSets();
546       Set removeUs = new HashSet();
547       for ( Iterator it = geneSetToProbeMap.keySet().iterator(); it.hasNext(); ) {
548          String candidate = ( String ) it.next();
549 
550          // look in the name too
551          String candidateN = goData.getNameForId( candidate ).toUpperCase();
552 
553          if ( !candidate.toUpperCase().startsWith( searchOnUp )
554                && candidateN.indexOf( searchOnUp ) < 0 ) {
555             removeUs.add( candidate );
556          }
557       }
558 
559       for ( Iterator it = removeUs.iterator(); it.hasNext(); ) {
560          selectedSets.remove( it.next() );
561       }
562    }
563 
564    /***
565     * Set the selected gene set to be the entire set.
566     */
567    public void resetSelectedSets() {
568       selectedSets = new Vector( geneSetToProbeMap.keySet() );
569    }
570 
571    /***
572     * @return list of selected sets.
573     */
574    public List getSelectedSets() {
575       return selectedSets;
576    }
577 
578    /***
579     * @return the number of sets currently on the 'selected' list.
580     */
581    public int selectedSets() {
582       return selectedSets.size();
583    }
584 
585    /***
586     * Print out the gene annotations in the same format we got them in, but if the gene sets have been modified, this
587     * will be reflected.
588     * 
589     * @param out
590     * @throws IOException
591     */
592    public void print( Writer out ) throws IOException {
593       out.write( "Probe\tSymbol\tName\tGeneSets\n" );
594       out.flush();
595       for ( Iterator iter = probeToGeneName.keySet().iterator(); iter.hasNext(); ) {
596          String probe = ( String ) iter.next();
597          String gene = ( String ) probeToGeneName.get( probe );
598          String desc = getProbeDescription( probe );
599          out.write( probe + "\t" + gene + "\t" + desc + "\t" );
600          List geneSets = ( ArrayList ) probeToGeneSetMap.get( probe );
601 
602          for ( Iterator iterator = geneSets.iterator(); iterator.hasNext(); ) {
603             String element = ( String ) iterator.next();
604             out.write( element + "|" );
605          }
606          out.write( "\n" );
607       }
608    }
609 
610    /***
611     * @return Returns the classToGeneMap.
612     */
613    public Map getGeneSetToGeneMap() {
614       return geneSetToGeneMap;
615    }
616 
617    /***
618     * @return Returns the geneToClassMap.
619     */
620    public Map getGeneToGeneSetMap() {
621       return geneToGeneSetMap;
622    }
623 
624    /***
625     * Compute how many genes have Gene set annotations.
626     * 
627     * @return
628     */
629    public int numAnnotatedGenes() {
630       int count = 0;
631       for ( Iterator iter = probeToGeneSetMap.keySet().iterator(); iter
632             .hasNext(); ) {
633          List element = ( ArrayList ) probeToGeneSetMap.get( iter.next() );
634          if ( element.size() > 0 ) {
635             count++;
636          }
637       }
638       return count;
639    }
640 
641    /*********************************************************************************************************************
642     * Private or protected methods
643     *******************************************************************************************************************/
644 
645    /***
646     * 
647     */
648    private void setUpDataStructures() {
649       probeToGeneSetMap = new LinkedHashMap();
650       geneSetToProbeMap = new LinkedHashMap();
651       probeToGeneName = new HashMap();
652       probeToDescription = new HashMap();
653       geneToProbeList = new HashMap();
654       geneToGeneSetMap = new HashMap();
655       geneSetToRedundantMap = new HashMap();
656    }
657 
658    /***
659     * Initialize the gene sets and other data structures that needs special handling before use.
660     */
661    private void setUp() {
662       this.geneSetToGeneMap = makeClassToGeneMap();
663 
664       GeneSetMapTools.collapseGeneSets( this, messenger );
665       prune( ABSOLUTE_MINIMUM_GENESET_SIZE, PRACTICAL_MAXIMUM_GENESET_SIZE );
666       resetSelectedProbes();
667       resetSelectedSets();
668       sortGeneSets();
669    }
670 
671    /***
672     * Remove a gene set (class) from all the maps that reference it.
673     * 
674     * @param id
675     */
676    public void removeClassFromMaps( String id ) {
677       if ( geneSetToProbeMap.containsKey( id ) ) {
678          for ( Iterator pit = ( ( ArrayList ) geneSetToProbeMap.get( id ) )
679                .iterator(); pit.hasNext(); ) {
680             String probe = ( String ) pit.next();
681             if ( probeToGeneSetMap.containsKey( probe )
682                   && ( ( ArrayList ) probeToGeneSetMap.get( probe ) )
683                         .contains( id ) ) {
684                if ( !( ( ArrayList ) probeToGeneSetMap.get( probe ) )
685                      .remove( id ) ) {
686                   System.err.println( "Couldn't remove " + id
687                         + " from probe to class map for" + probe );
688                }
689             }
690          }
691          if ( geneSetToProbeMap.remove( id ) == null )
692                System.err.println( "Couldn't remove " + id
693                      + " from classToProbeMap" );
694 
695          if ( geneSetToGeneMap.remove( id ) == null )
696                System.err.println( "Couldn't remove " + id
697                      + " from classToGeneMap" );
698       }
699       if ( geneSetToRedundantMap.containsKey( id ) )
700             geneSetToRedundantMap.remove( id );
701    }
702 
703    /***
704     * @param probe
705     */
706    private void removeProbeFromMaps( String probe ) {
707       if ( probeToGeneName.containsKey( probe ) ) {
708          String gene = ( String ) probeToGeneName.get( probe );
709          probeToGeneName.remove( probe );
710          if ( geneToProbeList.containsKey( gene ) ) {
711             ( ( ArrayList ) geneToProbeList.get( gene ) ).remove( probe );
712          }
713       }
714       if ( probeToGeneSetMap.containsKey( probe ) ) {
715          Iterator cit = ( ( ArrayList ) probeToGeneSetMap.get( probe ) )
716                .iterator();
717          while ( cit.hasNext() ) {
718             String geneSet = ( String ) cit.next();
719             if ( geneSetToProbeMap.containsKey( geneSet ) ) {
720                ( ( ArrayList ) geneSetToProbeMap.get( geneSet ) )
721                      .remove( probe );
722             }
723          }
724          if ( probeToGeneSetMap.remove( probe ) == null ) {
725             System.err.println( "Could not remove " + probe
726                   + " from probeToClassMap" );
727          }
728       }
729       if ( probeToDescription.containsKey( probe ) )
730             probeToDescription.remove( probe );
731    }
732 
733    /***
734     * Fill in the classToGeneMap with information from the classToProbeMap.
735     * 
736     * @return mapping of gene sets to genes.
737     */
738    private Map makeClassToGeneMap() {
739       Map gsToGeneMap = new HashMap();
740       for ( Iterator iter = geneSetToProbeMap.keySet().iterator(); iter
741             .hasNext(); ) {
742          String geneSetId = ( String ) iter.next();
743          List probesInSet = ( ArrayList ) geneSetToProbeMap.get( geneSetId );
744 
745          Set genesInSet = new HashSet();
746          for ( Iterator biter = probesInSet.iterator(); biter.hasNext(); ) {
747             String probe = ( String ) biter.next();
748             genesInSet.add( probeToGeneName.get( probe ) );
749          }
750          gsToGeneMap.put( geneSetId, genesInSet );
751       }
752       return gsToGeneMap;
753    }
754 
755    private void read( InputStream bis ) throws IOException {
756       this.read( bis, null );
757    }
758 
759    private void read( InputStream bis, Set activeGenes ) throws IOException {
760       if ( bis == null ) {
761          throw new IOException( "Inputstream was null" );
762       }
763 
764       BufferedReader dis = new BufferedReader( new InputStreamReader( bis ) );
765       ArrayList probeIds = new ArrayList();
766       String classIds = null;
767 
768       // loop through rows. Makes hash map of probes to go, and map of go to
769       // probes.
770       int n = 0;
771       String line = "";
772 
773       while ( ( line = dis.readLine() ) != null ) {
774          if ( line.startsWith( "#" ) ) continue;
775          StringTokenizer st = new StringTokenizer( line, "\t" );
776 
777          if ( !st.hasMoreTokens() ) {
778             continue; // blank line
779          }
780 
781          String probe = st.nextToken().intern();
782 
783          /* read gene name */
784          if ( !st.hasMoreTokens() ) {
785             continue; // no gene name or anything else.
786          }
787 
788          String group = st.nextToken().intern();
789 
790          if ( activeGenes != null && !activeGenes.contains( group ) ) {
791             continue;
792          }
793 
794          probeToGeneName.put( probe.intern(), group.intern() );
795 
796          // create the list if need be.
797          if ( geneToProbeList.get( group ) == null ) {
798             geneToProbeList.put( group.intern(), new ArrayList() );
799          }
800          ( ( ArrayList ) geneToProbeList.get( group ) ).add( probe.intern() );
801 
802          probeIds.add( probe );
803          probeToGeneSetMap.put( probe.intern(), new ArrayList() );
804          geneToGeneSetMap.put( group, probeToGeneSetMap.get( probe ) );
805 
806          /* read gene description */
807          if ( st.hasMoreTokens() ) {
808             String description = st.nextToken().intern();
809             if ( !description.startsWith( "GO:" ) ) { // this happens when
810                // there is no
811                // desription and we
812                // skip to the GO
813                // terms.
814                probeToDescription.put( probe.intern(), description.intern() );
815             } else {
816                probeToDescription.put( probe.intern(), "[No description]" );
817             }
818          } else {
819             probeToDescription.put( probe.intern(), "[No description]" );
820          }
821 
822          /* read GO data */
823          if ( st.hasMoreTokens() ) {
824             classIds = st.nextToken();
825 
826             //another tokenizer is required since the ClassesID's are
827             // seperated by the | character
828             StringTokenizer st1 = new StringTokenizer( classIds, "|" );
829             while ( st1.hasMoreTokens() ) {
830                String go = st1.nextToken().intern();
831 
832                // add this go to the probe->go map.
833                ( ( ArrayList ) probeToGeneSetMap.get( probe ) ).add( go );
834 
835                // add this probe this go->probe map.
836                if ( !geneSetToProbeMap.containsKey( go ) ) {
837                   geneSetToProbeMap.put( go, new ArrayList() );
838                }
839                ( ( ArrayList ) geneSetToProbeMap.get( go ) ).add( probe );
840 
841             }
842          }
843          if ( messenger != null && n % 500 == 0 ) {
844             messenger.setStatus( "Read " + n + " probes" );
845          }
846          n++;
847       }
848 
849       /* Fill in the genegroupreader and the classmap */
850       dis.close();
851       resetSelectedProbes();
852       
853       if (probeToGeneName.size() == 0 || geneSetToProbeMap.size() == 0) {
854          throw new IllegalArgumentException("The gene annotations had invalid information. Please check the format.");
855       }
856       
857    }
858 
859    //read in from a file.
860    private void read( String filename ) throws IOException {
861 
862       if ( !FileTools.testFile( filename ) ) {
863          throw new IOException( "Could not read from " + filename );
864       }
865 
866       FileInputStream fis = new FileInputStream( filename );
867       BufferedInputStream bis = new BufferedInputStream( fis );
868       read( bis );
869    }
870 
871    /***
872     * remove classes that have too few members todo this doesn't affect the tree representation of the genesets. todo
873     * this overlaps with functionality in GeneSetMapTools
874     * 
875     * @param lowThreshold
876     * @param highThreshold
877     */
878    private void prune( int lowThreshold, int highThreshold ) {
879 
880       Set removeUs = new HashSet();
881       for ( Iterator it = geneSetToProbeMap.keySet().iterator(); it.hasNext(); ) {
882          String id = ( String ) it.next();
883          if ( numProbesInGeneSet( id ) < lowThreshold
884                || numGenesInGeneSet( id ) < lowThreshold
885                || numProbesInGeneSet( id ) > highThreshold
886                || numGenesInGeneSet( id ) > highThreshold ) {
887             removeUs.add( id );
888          }
889       }
890 
891       for ( Iterator it = removeUs.iterator(); it.hasNext(); ) {
892          String id = ( String ) it.next();
893          removeClassFromMaps( id );
894       }
895 
896       sortGeneSets();
897    }
898 
899 }
900 
901 class ClassSizeComparator implements Comparator {
902 
903    /*
904     * (non-Javadoc)
905     * 
906     * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
907     */
908    public int compare( Object o1, Object o2 ) {
909       GeneSet a = ( GeneSet ) o1;
910       GeneSet b = ( GeneSet ) o2;
911 
912       int sizea = a.size();
913       int sizeb = b.size();
914 
915       if ( sizea > sizeb ) {
916          return 1;
917       } else if ( sizeb < sizea ) {
918          return -1;
919       }
920 
921       return 0;
922    }
923 
924    public static void main( String[] args ) {
925    }
926 }
927 
928 // used for the comparator.
929 
930 class GeneSet {
931    private String name;
932    private Set items;
933 
934    public GeneSet( String name, Set items ) {
935       this.name = name;
936       this.items = items;
937    }
938 
939    /***
940     * @return Returns the items.
941     */
942    public Set getItems() {
943       return items;
944    }
945 
946    /***
947     * @param items The items to set.
948     */
949    public void setItems( Set items ) {
950       this.items = items;
951    }
952 
953    /***
954     * @return Returns the name.
955     */
956    public String getName() {
957       return name;
958    }
959 
960    /***
961     * @param name The name to set.
962     */
963    public void setName( String name ) {
964       this.name = name;
965    }
966 
967    public int size() {
968       return items.size();
969    }
970 }