View Javadoc

1   package baseCode.misc;
2   
3   import java.io.IOException;
4   import java.util.ArrayList;
5   import java.util.HashMap;
6   import java.util.HashSet;
7   import java.util.Iterator;
8   import java.util.Map;
9   import java.util.Set;
10  
11  import org.apache.commons.logging.Log;
12  import org.apache.commons.logging.LogFactory;
13  
14  import baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed;
15  import baseCode.io.reader.MapReader;
16  
17  /***
18   * A data structure representing a map between items and a single set of keys. For example, a set of probes and the
19   * genes they map to. Probes that point to the same gene are in the same "group".
20   * <p>
21   * Copyright (c) 2004
22   * </p>
23   * <p>
24   * Institution: Columbia University
25   * </p>
26   * 
27   * @author Paul Pavlidis
28   * @version $Id: GroupMap.java,v 1.3 2004/07/27 03:18:58 pavlidis Exp $
29   */
30  
31  public class GroupMap {
32  
33     private int uniqueItems;
34     private Map duplicateMap;
35     private static Log log = LogFactory.getLog( GroupMap.class );
36  
37     /***
38      * @return int The number of unique items in the GroupMap.
39      */
40     public int getUniqueItems() {
41        return uniqueItems;
42     }
43  
44     /***
45      * For a given key, return true if it has duplicates.
46      * 
47      * @param k String
48      * @return boolean
49      */
50     public boolean hasDuplicates( String k ) {
51        return numDuplicates( k ) > 0;
52     }
53  
54     /***
55      * For a given key, return the number of duplicates it has (not counting itself).
56      * 
57      * @param k String
58      * @return int
59      */
60     public int numDuplicates( String k ) {
61        return ( ( HashSet ) duplicateMap.get( k ) ).size();
62     }
63  
64     /***
65      * The input file format is that used by {@link baseCode.io.reader.MapReader}.
66      * 
67      * @param filename Duplicate map file name to be read by a MapReader.
68      * @param dataMatrix Data file this the map refers to.
69      * @return Map
70      * @throws IOException
71      */
72     public Map read( String filename, DenseDoubleMatrix2DNamed dataMatrix )
73           throws IOException {
74  
75        if ( filename == null || dataMatrix == null ) {
76           throw new IllegalArgumentException(
77                 "You must give a valid file name and data matrix." );
78        }
79  
80        MapReader m = new MapReader();
81        Map initialMap = m.read( filename );
82        Map insideOutMap = new HashMap();
83  
84        // first we turn the map inside-out so we have gene --> probeA, probeB,
85        // probeC.
86        Set keys = initialMap.keySet();
87        for ( Iterator it = keys.iterator(); it.hasNext(); ) {
88           String p = ( String ) it.next();
89  
90           if ( !dataMatrix.hasRow( p ) ) {
91              continue;
92           }
93  
94           String v = ( String ) initialMap.get( p );
95  
96           if ( insideOutMap.get( v ) == null ) {
97              insideOutMap.put( v, new ArrayList() );
98           }
99           ( ( ArrayList ) insideOutMap.get( v ) ).add( p );
100       }
101 
102       uniqueItems = insideOutMap.size();
103       log.info( uniqueItems + " unique items read from duplicate map" );
104 
105       // turn that map into a map of probeA --> probe1, probe2, where probes1
106       // and probes2 are in the same 'group' as probeA
107       this.duplicateMap = new HashMap();
108       keys = insideOutMap.keySet();
109       for ( Iterator it = keys.iterator(); it.hasNext(); ) {
110          String g = ( String ) it.next();
111          ArrayList v = ( ArrayList ) insideOutMap.get( g );
112 
113          for ( Iterator vit = v.iterator(); vit.hasNext(); ) {
114             String p = ( String ) vit.next();
115             if ( duplicateMap.get( p ) == null ) {
116                duplicateMap.put( p, new HashSet() );
117             }
118 
119             for ( Iterator kit = v.iterator(); kit.hasNext(); ) {
120                String pp = ( String ) kit.next();
121                if ( p.equals( pp ) ) {
122                   continue;
123                }
124 
125                ( ( Set ) duplicateMap.get( p ) ).add( pp );
126             }
127          }
128       }
129 
130       // now a sanity check. Make sure every item in our data is also in the
131       // duplicate map.
132       for ( Iterator it = dataMatrix.getRowNameMapIterator(); it.hasNext(); ) {
133          boolean foundProblem = false;
134          String rowName = ( String ) it.next();
135          if ( !duplicateMap.containsKey( rowName ) ) {
136             duplicateMap.put( rowName, new HashSet() );
137             foundProblem = true;
138          }
139          if ( foundProblem ) {
140             throw new IllegalStateException(
141                   "The data has item(s) that aren't in the duplicate map." );
142          }
143       }
144 
145       return this.duplicateMap;
146 
147    }
148 
149 }