1 package baseCode.bio.geneset;
2
3 import java.io.BufferedInputStream;
4 import java.io.BufferedReader;
5 import java.io.FileInputStream;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.io.InputStreamReader;
9 import java.io.Writer;
10 import java.util.ArrayList;
11 import java.util.Collection;
12 import java.util.Collections;
13 import java.util.Comparator;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Iterator;
17 import java.util.LinkedHashMap;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.StringTokenizer;
22 import java.util.Vector;
23
24 import javax.swing.table.AbstractTableModel;
25 import javax.swing.table.TableModel;
26
27 import baseCode.util.FileTools;
28 import baseCode.util.StatusViewer;
29
30 /***
31 * Reads tab-delimited file to create maps of probes to classes, classes to probes, probes to genes, genes to probes.
32 * <p>
33 * Maintains the following important data structures, all derived from the input file:
34 *
35 * <pre>
36 *
37 *
38 *
39 *
40 *
41 *
42 *
43 *
44 *
45 *
46 *
47 * probe->Classes -- each value is a Set of the Classes that a probe belongs to.
48 * Classes->probe -- each value is a Set of the probes that belong to a class
49 * probe->gene -- each value is the gene name corresponding to the probe.
50 * gene->list of probes -- each value is a list of probes corresponding to a gene
51 * probe->description -- each value is a text description of the probe (actually...of the gene)
52 *
53 *
54 *
55 *
56 *
57 *
58 *
59 *
60 *
61 *
62 *
63 *
64 * </pre>
65 *
66 * <p>
67 * Copyright (c) 2004 Columbia University
68 * </p>
69 *
70 * @author Paul Pavlidis
71 * @author Shamhil Merchant
72 * @author Homin Lee
73 * @version $Id: GeneAnnotations.java,v 1.7 2004/12/27 22:25:56 pavlidis Exp $
74 */
75
76 public class GeneAnnotations {
77
78 /***
79 * The maximum size of gene sets ever considered.
80 */
81 private static final int PRACTICAL_MAXIMUM_GENESET_SIZE = 1000;
82
83 /***
84 * The minimum size of a 'set' of genes.
85 */
86 private static final int ABSOLUTE_MINIMUM_GENESET_SIZE = 2;
87
88 private Map probeToGeneSetMap;
89 private Map geneSetToProbeMap;
90 private Map probeToGeneName;
91 private Map probeToDescription;
92 private Map geneToProbeList;
93 private Map geneToGeneSetMap;
94 private Map geneSetToGeneMap;
95
96
97 private Vector sortedGeneSets;
98 private Map geneSetToRedundantMap;
99 Vector selectedProbes;
100 private Vector selectedSets;
101
102 private StatusViewer messenger;
103
104 /***
105 * This is for creating GeneAnnotations by reading from a file
106 *
107 * @param filename String
108 * @param messenger StatusViewer to print status updates to.
109 * @throws IOException
110 */
111 public GeneAnnotations( String filename, StatusViewer messenger )
112 throws IOException {
113
114 setUpDataStructures();
115 this.messenger = messenger;
116
117 this.read( filename );
118
119 setUp();
120 }
121
122 /***
123 * This is for creating GeneAnnotations by pruning a copy.
124 *
125 * @param geneData GeneAnnotations copy to prune from
126 * @param activeProbes Set only include these probes
127 */
128 public GeneAnnotations( GeneAnnotations geneData, Set activeProbes ) {
129
130
131
132
133 probeToGeneSetMap = new LinkedHashMap( geneData.probeToGeneSetMap );
134
135
136 this.geneSetToProbeMap = new LinkedHashMap();
137 for ( Iterator iter = geneData.geneSetToProbeMap.keySet().iterator(); iter
138 .hasNext(); ) {
139 String key = ( String ) iter.next();
140 this.geneSetToProbeMap.put( key, new ArrayList(
141 ( ArrayList ) geneData.geneSetToProbeMap.get( key ) ) );
142 }
143
144 probeToGeneName = new HashMap( geneData.probeToGeneName );
145 probeToDescription = new HashMap( geneData.probeToDescription );
146 geneToProbeList = new HashMap( geneData.geneToProbeList );
147 geneToGeneSetMap = new HashMap( geneData.geneToGeneSetMap );
148 geneSetToRedundantMap = new HashMap( geneData.geneSetToRedundantMap );
149
150 Vector allProbes = new Vector( probeToGeneName.keySet() );
151 for ( Iterator iter = allProbes.iterator(); iter.hasNext(); ) {
152 String probe = ( String ) iter.next();
153 if ( !activeProbes.contains( probe ) ) {
154 removeProbeFromMaps( probe );
155 }
156 }
157 setUp();
158
159
160
161
162
163 }
164
165 /***
166 * Make a new GeneAnnotations that only includes the probes in the parameter 'probes'.
167 *
168 * @param stream
169 * @param activeGenes Only genes in this set are left.
170 * @throws IOException
171 */
172 public GeneAnnotations( InputStream stream, Set activeGenes,
173 StatusViewer messenger ) throws IOException {
174 this.messenger = messenger;
175 setUpDataStructures();
176 this.read( stream, activeGenes );
177 setUp();
178 }
179
180 /***
181 * @param fileName
182 */
183 public GeneAnnotations( String fileName, Set activeGenes,
184 StatusViewer messenger ) throws IOException {
185 this.messenger = messenger;
186 FileInputStream fis = new FileInputStream( fileName );
187 BufferedInputStream bis = new BufferedInputStream( fis );
188 setUpDataStructures();
189 this.read( bis, activeGenes );
190 setUp();
191 }
192
193 /***
194 * @return Map
195 */
196 public Map getProbeToGeneMap() {
197 return probeToGeneName;
198 }
199
200 /***
201 * @return Map
202 */
203 public Map getGeneToProbeList() {
204 return geneToProbeList;
205 }
206
207 /***
208 * @return Map
209 */
210 public Map getGeneSetToProbeMap() {
211 return geneSetToProbeMap;
212 }
213
214 /***
215 * @param id String class id
216 * @return ArrayList list of probes in class
217 */
218 public ArrayList getClassToProbes( String id ) {
219 return ( ArrayList ) geneSetToProbeMap.get( id );
220 }
221
222 /***
223 * Sort the gene sets, filling out the sortedGeneSets. This should be called after any changes have been made to the
224 * classToProbeMap. The sort is just in order of id.
225 */
226 public void sortGeneSets() {
227
228 if ( geneSetToProbeMap.size() == 0 ) {
229 throw new IllegalStateException(
230 "Could not sort because there are no gene sets in the classToProbeMap" );
231 }
232
233 if ( sortedGeneSets == null ) {
234 sortedGeneSets = new Vector();
235 }
236
237 Vector vec = new Vector( geneSetToProbeMap.keySet() );
238 Collections.sort( vec );
239 for ( Iterator iter = vec.iterator(); iter.hasNext(); ) {
240 sortedGeneSets.add( iter.next() );
241 }
242 }
243
244 /***
245 * @return
246 */
247 public List sortGeneSetsBySize() {
248
249 List sets = new Vector();
250 for ( Iterator iter = getGeneSetToGeneMap().keySet().iterator(); iter
251 .hasNext(); ) {
252 String name = ( String ) iter.next();
253 sets.add( new GeneSet( name, ( Set ) geneSetToGeneMap.get( name ) ) );
254 }
255
256 Collections.sort( sets, new ClassSizeComparator() );
257
258 List returnVal = new Vector();
259 for ( Iterator iter = sets.iterator(); iter.hasNext(); ) {
260 returnVal.add( ( ( GeneSet ) iter.next() ).getName() );
261 }
262
263 return returnVal;
264 }
265
266 /***
267 * @return Map
268 */
269 public Map getProbeToGeneSetMap() {
270 return probeToGeneSetMap;
271 }
272
273 /***
274 * @return Map
275 */
276 public Map geneSetToRedundantMap() {
277 return geneSetToRedundantMap;
278 }
279
280 /***
281 * Get the gene that a probe belongs to.
282 *
283 * @param p String
284 * @return String
285 */
286 public String getProbeGeneName( String p ) {
287 return ( String ) probeToGeneName.get( p );
288 }
289
290 /***
291 * Get the description for a gene.
292 *
293 * @param p String
294 * @return String
295 */
296 public String getProbeDescription( String p ) {
297 return ( String ) probeToDescription.get( p );
298 }
299
300 /***
301 * Get a list of the probes that correspond to a particular gene.
302 *
303 * @param g String a gene name
304 * @return ArrayList list of the probes for gene g
305 */
306 public ArrayList getGeneProbeList( String g ) {
307 return ( ArrayList ) geneToProbeList.get( g );
308 }
309
310 /***
311 * Get a class by an integer index i from the sorted list.
312 *
313 * @param i
314 * @return
315 */
316 public String getGeneSetByIndex( int i ) {
317 return ( String ) sortedGeneSets.get( i );
318 }
319
320 /***
321 * Returns true if the class is in the classToProbe map
322 *
323 * @param id String a class id
324 * @return boolean
325 */
326 public boolean geneSetExists( String id ) {
327 return geneSetToProbeMap.containsKey( id );
328 }
329
330 /***
331 * Get how many probes point to the same gene. This is like the old "numReplicates".
332 *
333 * @param g
334 * @return
335 */
336 public int numProbesForGene( String g ) {
337 if ( !geneToProbeList.containsKey( g ) ) return 0;
338 return ( ( ArrayList ) geneToProbeList.get( g ) ).size();
339 }
340
341 /***
342 * Get the number of classes. This is computed from the sortedGeneSets.
343 *
344 * @return
345 */
346 public int numGeneSets() {
347 if ( geneSetToGeneMap == null ) {
348 throw new IllegalStateException( "classToGeneMap was null" );
349 }
350 return geneSetToGeneMap.size();
351 }
352
353 /***
354 * How many genes are in the file?
355 */
356 public int numGenes() {
357 return geneToProbeList.size();
358 }
359
360 /***
361 * Get the number of probes in a gene set, identified by id.
362 *
363 * @param id String a class id
364 * @return int number of probes in the class
365 */
366 public int numProbesInGeneSet( String id ) {
367 if ( !geneSetToProbeMap.containsKey( id ) ) {
368 return 0;
369 }
370
371
372 return ( ( ArrayList ) geneSetToProbeMap.get( id ) ).size();
373 }
374
375 /***
376 * Get the number of genes in a gene set, identified by id.
377 *
378 * @param id String a class id
379 * @return int number of genes in the class
380 */
381 public int numGenesInGeneSet( String id ) {
382 if ( !geneSetToGeneMap.containsKey( id ) ) {
383 return 0;
384 }
385 return ( ( Set ) geneSetToGeneMap.get( id ) ).size();
386 }
387
388 /***
389 * Add a class
390 *
391 * @param id String class to be added
392 * @param probesForNew ArrayList user-defined list of members.
393 */
394 public void addClass( String id, ArrayList probesForNew ) {
395 geneSetToProbeMap.put( id, probesForNew );
396
397 Iterator probe_it = probesForNew.iterator();
398 while ( probe_it.hasNext() ) {
399 String probe = new String( ( String ) probe_it.next() );
400 ( ( ArrayList ) probeToGeneSetMap.get( probe ) ).add( id );
401 }
402
403 Set genes = new HashSet();
404 Iterator probe_it2 = probesForNew.iterator();
405 while ( probe_it2.hasNext() ) {
406 genes.add( probeToGeneName.get( probe_it2.next() ) );
407 }
408 geneSetToGeneMap.put( id, genes );
409
410 geneToGeneSetMap.put( id, probeToGeneSetMap.get( id ) );
411
412 resetSelectedSets();
413 }
414
415 /***
416 * Redefine a class.
417 *
418 * @param classId String class to be modified
419 * @param probesForNew ArrayList current user-defined list of members. The "real" version of the class is modified to
420 * look like this one.
421 */
422 public void modifyClass( String classId, ArrayList probesForNew ) {
423 ArrayList orig_probes = ( ArrayList ) geneSetToProbeMap.get( classId );
424 Iterator orig_probe_it = orig_probes.iterator();
425 while ( orig_probe_it.hasNext() ) {
426 String orig_probe = new String( ( String ) orig_probe_it.next() );
427 if ( !probesForNew.contains( orig_probe ) ) {
428 Set ptc = new HashSet( ( Collection ) probeToGeneSetMap
429 .get( orig_probe ) );
430 ptc.remove( classId );
431 probeToGeneSetMap.remove( orig_probe );
432 probeToGeneSetMap.put( orig_probe, new ArrayList( ptc ) );
433 }
434 }
435 Iterator probe_it = probesForNew.iterator();
436 while ( probe_it.hasNext() ) {
437 String probe = ( String ) probe_it.next();
438 if ( !orig_probes.contains( probe ) ) {
439 ( ( ArrayList ) probeToGeneSetMap.get( probe ) ).add( classId );
440 }
441 }
442 geneSetToProbeMap.put( classId, probesForNew );
443 resetSelectedSets();
444 }
445
446 /***
447 * @return
448 */
449 public TableModel toTableModel() {
450 return new AbstractTableModel() {
451 private String[] columnNames = {
452 "Probe", "Gene", "Description"
453 };
454
455 public String getColumnName( int i ) {
456 return columnNames[i];
457 }
458
459 public int getColumnCount() {
460 return 3;
461 }
462
463 public int getRowCount() {
464 return selectedProbes.size();
465 }
466
467 public Object getValueAt( int i, int j ) {
468
469 String probeid = ( String ) selectedProbes.get( i );
470 switch ( j ) {
471 case 0:
472 return probeid;
473 case 1:
474 return getProbeGeneName( probeid );
475 case 2:
476 return getProbeDescription( probeid );
477 default:
478 return null;
479 }
480 }
481
482 };
483 }
484
485 /***
486 * Create a selected probes list based on a search string.
487 *
488 * @param searchOn A string to be searched.
489 */
490 public void selectProbes( String searchOn ) {
491
492 String searchOnUp = searchOn.toUpperCase();
493 resetSelectedProbes();
494 Set removeUs = new HashSet();
495 for ( Iterator it = probeToGeneName.keySet().iterator(); it.hasNext(); ) {
496 String probe = ( String ) it.next();
497
498 String candidate = ( ( String ) probeToGeneName.get( ( probe ) ) )
499 .toUpperCase();
500
501
502 String candidateD = ( ( String ) probeToDescription.get( ( probe ) ) )
503 .toUpperCase();
504
505 if ( !candidate.startsWith( searchOnUp )
506 && candidateD.indexOf( searchOnUp ) < 0 ) {
507 removeUs.add( probe );
508 }
509
510 }
511
512 for ( Iterator it = removeUs.iterator(); it.hasNext(); ) {
513 selectedProbes.remove( it.next() );
514 }
515 }
516
517 /***
518 * Set the selected gene set to be the entire set.
519 */
520 public void resetSelectedProbes() {
521 selectedProbes = new Vector( probeToGeneName.keySet() );
522 }
523
524 /***
525 * @return the list of selected probes.
526 */
527 public List getSelectedProbes() {
528 return selectedProbes;
529 }
530
531 /***
532 * @return the number of probes currently on the 'selected' list.
533 */
534 public int selectedProbes() {
535 return selectedProbes.size();
536 }
537
538 /***
539 * @param searchOn
540 * @param goData
541 */
542 public void selectSets( String searchOn, GONames goData ) {
543
544 String searchOnUp = searchOn.toUpperCase();
545 resetSelectedSets();
546 Set removeUs = new HashSet();
547 for ( Iterator it = geneSetToProbeMap.keySet().iterator(); it.hasNext(); ) {
548 String candidate = ( String ) it.next();
549
550
551 String candidateN = goData.getNameForId( candidate ).toUpperCase();
552
553 if ( !candidate.toUpperCase().startsWith( searchOnUp )
554 && candidateN.indexOf( searchOnUp ) < 0 ) {
555 removeUs.add( candidate );
556 }
557 }
558
559 for ( Iterator it = removeUs.iterator(); it.hasNext(); ) {
560 selectedSets.remove( it.next() );
561 }
562 }
563
564 /***
565 * Set the selected gene set to be the entire set.
566 */
567 public void resetSelectedSets() {
568 selectedSets = new Vector( geneSetToProbeMap.keySet() );
569 }
570
571 /***
572 * @return list of selected sets.
573 */
574 public List getSelectedSets() {
575 return selectedSets;
576 }
577
578 /***
579 * @return the number of sets currently on the 'selected' list.
580 */
581 public int selectedSets() {
582 return selectedSets.size();
583 }
584
585 /***
586 * Print out the gene annotations in the same format we got them in, but if the gene sets have been modified, this
587 * will be reflected.
588 *
589 * @param out
590 * @throws IOException
591 */
592 public void print( Writer out ) throws IOException {
593 out.write( "Probe\tSymbol\tName\tGeneSets\n" );
594 out.flush();
595 for ( Iterator iter = probeToGeneName.keySet().iterator(); iter.hasNext(); ) {
596 String probe = ( String ) iter.next();
597 String gene = ( String ) probeToGeneName.get( probe );
598 String desc = getProbeDescription( probe );
599 out.write( probe + "\t" + gene + "\t" + desc + "\t" );
600 List geneSets = ( ArrayList ) probeToGeneSetMap.get( probe );
601
602 for ( Iterator iterator = geneSets.iterator(); iterator.hasNext(); ) {
603 String element = ( String ) iterator.next();
604 out.write( element + "|" );
605 }
606 out.write( "\n" );
607 }
608 }
609
610 /***
611 * @return Returns the classToGeneMap.
612 */
613 public Map getGeneSetToGeneMap() {
614 return geneSetToGeneMap;
615 }
616
617 /***
618 * @return Returns the geneToClassMap.
619 */
620 public Map getGeneToGeneSetMap() {
621 return geneToGeneSetMap;
622 }
623
624 /***
625 * Compute how many genes have Gene set annotations.
626 *
627 * @return
628 */
629 public int numAnnotatedGenes() {
630 int count = 0;
631 for ( Iterator iter = probeToGeneSetMap.keySet().iterator(); iter
632 .hasNext(); ) {
633 List element = ( ArrayList ) probeToGeneSetMap.get( iter.next() );
634 if ( element.size() > 0 ) {
635 count++;
636 }
637 }
638 return count;
639 }
640
641 /*********************************************************************************************************************
642 * Private or protected methods
643 *******************************************************************************************************************/
644
645 /***
646 *
647 */
648 private void setUpDataStructures() {
649 probeToGeneSetMap = new LinkedHashMap();
650 geneSetToProbeMap = new LinkedHashMap();
651 probeToGeneName = new HashMap();
652 probeToDescription = new HashMap();
653 geneToProbeList = new HashMap();
654 geneToGeneSetMap = new HashMap();
655 geneSetToRedundantMap = new HashMap();
656 }
657
658 /***
659 * Initialize the gene sets and other data structures that needs special handling before use.
660 */
661 private void setUp() {
662 this.geneSetToGeneMap = makeClassToGeneMap();
663
664 GeneSetMapTools.collapseGeneSets( this, messenger );
665 prune( ABSOLUTE_MINIMUM_GENESET_SIZE, PRACTICAL_MAXIMUM_GENESET_SIZE );
666 resetSelectedProbes();
667 resetSelectedSets();
668 sortGeneSets();
669 }
670
671 /***
672 * Remove a gene set (class) from all the maps that reference it.
673 *
674 * @param id
675 */
676 public void removeClassFromMaps( String id ) {
677 if ( geneSetToProbeMap.containsKey( id ) ) {
678 for ( Iterator pit = ( ( ArrayList ) geneSetToProbeMap.get( id ) )
679 .iterator(); pit.hasNext(); ) {
680 String probe = ( String ) pit.next();
681 if ( probeToGeneSetMap.containsKey( probe )
682 && ( ( ArrayList ) probeToGeneSetMap.get( probe ) )
683 .contains( id ) ) {
684 if ( !( ( ArrayList ) probeToGeneSetMap.get( probe ) )
685 .remove( id ) ) {
686 System.err.println( "Couldn't remove " + id
687 + " from probe to class map for" + probe );
688 }
689 }
690 }
691 if ( geneSetToProbeMap.remove( id ) == null )
692 System.err.println( "Couldn't remove " + id
693 + " from classToProbeMap" );
694
695 if ( geneSetToGeneMap.remove( id ) == null )
696 System.err.println( "Couldn't remove " + id
697 + " from classToGeneMap" );
698 }
699 if ( geneSetToRedundantMap.containsKey( id ) )
700 geneSetToRedundantMap.remove( id );
701 }
702
703 /***
704 * @param probe
705 */
706 private void removeProbeFromMaps( String probe ) {
707 if ( probeToGeneName.containsKey( probe ) ) {
708 String gene = ( String ) probeToGeneName.get( probe );
709 probeToGeneName.remove( probe );
710 if ( geneToProbeList.containsKey( gene ) ) {
711 ( ( ArrayList ) geneToProbeList.get( gene ) ).remove( probe );
712 }
713 }
714 if ( probeToGeneSetMap.containsKey( probe ) ) {
715 Iterator cit = ( ( ArrayList ) probeToGeneSetMap.get( probe ) )
716 .iterator();
717 while ( cit.hasNext() ) {
718 String geneSet = ( String ) cit.next();
719 if ( geneSetToProbeMap.containsKey( geneSet ) ) {
720 ( ( ArrayList ) geneSetToProbeMap.get( geneSet ) )
721 .remove( probe );
722 }
723 }
724 if ( probeToGeneSetMap.remove( probe ) == null ) {
725 System.err.println( "Could not remove " + probe
726 + " from probeToClassMap" );
727 }
728 }
729 if ( probeToDescription.containsKey( probe ) )
730 probeToDescription.remove( probe );
731 }
732
733 /***
734 * Fill in the classToGeneMap with information from the classToProbeMap.
735 *
736 * @return mapping of gene sets to genes.
737 */
738 private Map makeClassToGeneMap() {
739 Map gsToGeneMap = new HashMap();
740 for ( Iterator iter = geneSetToProbeMap.keySet().iterator(); iter
741 .hasNext(); ) {
742 String geneSetId = ( String ) iter.next();
743 List probesInSet = ( ArrayList ) geneSetToProbeMap.get( geneSetId );
744
745 Set genesInSet = new HashSet();
746 for ( Iterator biter = probesInSet.iterator(); biter.hasNext(); ) {
747 String probe = ( String ) biter.next();
748 genesInSet.add( probeToGeneName.get( probe ) );
749 }
750 gsToGeneMap.put( geneSetId, genesInSet );
751 }
752 return gsToGeneMap;
753 }
754
755 private void read( InputStream bis ) throws IOException {
756 this.read( bis, null );
757 }
758
759 private void read( InputStream bis, Set activeGenes ) throws IOException {
760 if ( bis == null ) {
761 throw new IOException( "Inputstream was null" );
762 }
763
764 BufferedReader dis = new BufferedReader( new InputStreamReader( bis ) );
765 ArrayList probeIds = new ArrayList();
766 String classIds = null;
767
768
769
770 int n = 0;
771 String line = "";
772
773 while ( ( line = dis.readLine() ) != null ) {
774 if ( line.startsWith( "#" ) ) continue;
775 StringTokenizer st = new StringTokenizer( line, "\t" );
776
777 if ( !st.hasMoreTokens() ) {
778 continue;
779 }
780
781 String probe = st.nextToken().intern();
782
783
784 if ( !st.hasMoreTokens() ) {
785 continue;
786 }
787
788 String group = st.nextToken().intern();
789
790 if ( activeGenes != null && !activeGenes.contains( group ) ) {
791 continue;
792 }
793
794 probeToGeneName.put( probe.intern(), group.intern() );
795
796
797 if ( geneToProbeList.get( group ) == null ) {
798 geneToProbeList.put( group.intern(), new ArrayList() );
799 }
800 ( ( ArrayList ) geneToProbeList.get( group ) ).add( probe.intern() );
801
802 probeIds.add( probe );
803 probeToGeneSetMap.put( probe.intern(), new ArrayList() );
804 geneToGeneSetMap.put( group, probeToGeneSetMap.get( probe ) );
805
806
807 if ( st.hasMoreTokens() ) {
808 String description = st.nextToken().intern();
809 if ( !description.startsWith( "GO:" ) ) {
810
811
812
813
814 probeToDescription.put( probe.intern(), description.intern() );
815 } else {
816 probeToDescription.put( probe.intern(), "[No description]" );
817 }
818 } else {
819 probeToDescription.put( probe.intern(), "[No description]" );
820 }
821
822
823 if ( st.hasMoreTokens() ) {
824 classIds = st.nextToken();
825
826
827
828 StringTokenizer st1 = new StringTokenizer( classIds, "|" );
829 while ( st1.hasMoreTokens() ) {
830 String go = st1.nextToken().intern();
831
832
833 ( ( ArrayList ) probeToGeneSetMap.get( probe ) ).add( go );
834
835
836 if ( !geneSetToProbeMap.containsKey( go ) ) {
837 geneSetToProbeMap.put( go, new ArrayList() );
838 }
839 ( ( ArrayList ) geneSetToProbeMap.get( go ) ).add( probe );
840
841 }
842 }
843 if ( messenger != null && n % 500 == 0 ) {
844 messenger.setStatus( "Read " + n + " probes" );
845 }
846 n++;
847 }
848
849
850 dis.close();
851 resetSelectedProbes();
852
853 if (probeToGeneName.size() == 0 || geneSetToProbeMap.size() == 0) {
854 throw new IllegalArgumentException("The gene annotations had invalid information. Please check the format.");
855 }
856
857 }
858
859
860 private void read( String filename ) throws IOException {
861
862 if ( !FileTools.testFile( filename ) ) {
863 throw new IOException( "Could not read from " + filename );
864 }
865
866 FileInputStream fis = new FileInputStream( filename );
867 BufferedInputStream bis = new BufferedInputStream( fis );
868 read( bis );
869 }
870
871 /***
872 * remove classes that have too few members todo this doesn't affect the tree representation of the genesets. todo
873 * this overlaps with functionality in GeneSetMapTools
874 *
875 * @param lowThreshold
876 * @param highThreshold
877 */
878 private void prune( int lowThreshold, int highThreshold ) {
879
880 Set removeUs = new HashSet();
881 for ( Iterator it = geneSetToProbeMap.keySet().iterator(); it.hasNext(); ) {
882 String id = ( String ) it.next();
883 if ( numProbesInGeneSet( id ) < lowThreshold
884 || numGenesInGeneSet( id ) < lowThreshold
885 || numProbesInGeneSet( id ) > highThreshold
886 || numGenesInGeneSet( id ) > highThreshold ) {
887 removeUs.add( id );
888 }
889 }
890
891 for ( Iterator it = removeUs.iterator(); it.hasNext(); ) {
892 String id = ( String ) it.next();
893 removeClassFromMaps( id );
894 }
895
896 sortGeneSets();
897 }
898
899 }
900
901 class ClassSizeComparator implements Comparator {
902
903
904
905
906
907
908 public int compare( Object o1, Object o2 ) {
909 GeneSet a = ( GeneSet ) o1;
910 GeneSet b = ( GeneSet ) o2;
911
912 int sizea = a.size();
913 int sizeb = b.size();
914
915 if ( sizea > sizeb ) {
916 return 1;
917 } else if ( sizeb < sizea ) {
918 return -1;
919 }
920
921 return 0;
922 }
923
924 public static void main( String[] args ) {
925 }
926 }
927
928
929
930 class GeneSet {
931 private String name;
932 private Set items;
933
934 public GeneSet( String name, Set items ) {
935 this.name = name;
936 this.items = items;
937 }
938
939 /***
940 * @return Returns the items.
941 */
942 public Set getItems() {
943 return items;
944 }
945
946 /***
947 * @param items The items to set.
948 */
949 public void setItems( Set items ) {
950 this.items = items;
951 }
952
953 /***
954 * @return Returns the name.
955 */
956 public String getName() {
957 return name;
958 }
959
960 /***
961 * @param name The name to set.
962 */
963 public void setName( String name ) {
964 this.name = name;
965 }
966
967 public int size() {
968 return items.size();
969 }
970 }