View Javadoc

1   package baseCode.xml;
2   
3   import java.io.IOException;
4   import java.io.InputStream;
5   import java.util.HashMap;
6   import java.util.Iterator;
7   import java.util.Map;
8   
9   import org.xml.sax.Attributes;
10  import org.xml.sax.InputSource;
11  import org.xml.sax.SAXException;
12  import org.xml.sax.XMLReader;
13  import org.xml.sax.helpers.DefaultHandler;
14  import org.xml.sax.helpers.XMLReaderFactory;
15  
16  import baseCode.bio.GOEntry;
17  import baseCode.dataStructure.graph.DirectedGraph;
18  import baseCode.dataStructure.graph.DirectedGraphNode;
19  
20  /***
21   * Read in the GO XML file provided by the Gene Ontology Consortium.
22   * <p>
23   * Copyright (c) Columbia University
24   * 
25   * @author Paul Pavlidis
26   * @version $Id: GOParser.java,v 1.14 2004/08/04 09:47:44 pavlidis Exp $
27   */
28  public class GOParser {
29  
30     private DirectedGraph m;
31  
32     /***
33      * Get the graph that was created.
34      * 
35      * @return a DirectedGraph. Nodes contain OntologyEntry instances.
36      */
37     public DirectedGraph getGraph() {
38        return m;
39     }
40  
41     /***
42      * Get a simple Map that contains keys that are the GO ids, values are the names. This can replace the functionality
43      * of the GONameReader in classScore.
44      * 
45      * @return Map
46      */
47     public Map getGONameMap() {
48        Map nodes = m.getItems();
49        Map result = new HashMap();
50        for ( Iterator it = nodes.keySet().iterator(); it.hasNext(); ) {
51           DirectedGraphNode node = ( DirectedGraphNode ) nodes.get( it.next() );
52           GOEntry e = ( GOEntry ) node.getItem();
53           result.put( e.getId().intern(), e.getName().intern() );
54        }
55        return result;
56     }
57  
58     public GOParser( InputStream i ) throws IOException, SAXException {
59  
60        if ( i.available() == 0 ) {
61           throw new IOException( "XML stream contains no data." );
62        }
63  
64        System.setProperty( "org.xml.sax.driver",
65              "org.apache.xerces.parsers.SAXParser" );
66  
67        XMLReader xr = XMLReaderFactory.createXMLReader();
68        GOHandler handler = new GOHandler();
69        xr.setFeature( "http://xml.org/sax/features/validation", false );
70        xr.setFeature( "http://xml.org/sax/features/external-general-entities",
71              false );
72        xr.setFeature(
73              "http://apache.org/xml/features/nonvalidating/load-external-dtd",
74              false );
75        xr.setContentHandler( handler );
76        xr.setErrorHandler( handler );
77        xr.setEntityResolver( handler );
78        xr.setDTDHandler( handler );
79        xr.parse( new InputSource( i ) );
80  
81        m = handler.getResults();
82     }
83  
84  }
85  
86  class GOHandler extends DefaultHandler {
87  
88     private DirectedGraph m;
89  
90     public DirectedGraph getResults() {
91        return m;
92     }
93  
94     public GOHandler() {
95        super();
96        m = new DirectedGraph();
97     }
98  
99     private boolean inTerm = false;
100    private boolean inDef = false;
101    private boolean inAcc = false;
102    private boolean inName = false;
103    private boolean inPartOf = false;
104    private boolean inIsa = false;
105    private boolean inSyn = false;
106 
107    private String currentAspect;
108    private StringBuffer nameBuf;
109    private StringBuffer accBuf;
110    private StringBuffer defBuf;
111 
112    public void startElement( String uri, String name, String qName,
113          Attributes atts ) {
114 
115       if ( name.equals( "term" ) ) {
116          inTerm = true;
117       } else if ( name.equals( "accession" ) ) {
118          accBuf = new StringBuffer();
119          inAcc = true;
120       } else if ( name.equals( "definition" ) ) {
121          defBuf = new StringBuffer();
122          inDef = true;
123       } else if ( name.equals( "is_a" ) ) {
124          inIsa = true;
125          String res = atts.getValue( "rdf:resource" );
126          String parent = res.substring( res.lastIndexOf( '#' ) + 1, res
127                .length() );
128 
129          if ( !m.containsKey( parent ) ) {
130             m.addNode( parent, new GOEntry( parent, "no name yet",
131                   "no definition yet", "no aspect yet" ) );
132          }
133          String currentTerm = accBuf.toString();
134          m.addParentTo( currentTerm, parent );
135 
136       } else if ( name.equals( "part_of" ) ) {
137          inPartOf = true;
138          String res = atts.getValue( "rdf:resource" );
139          String parent = res.substring( res.lastIndexOf( '#' ) + 1, res
140                .length() );
141 
142          if ( !m.containsKey( parent ) ) {
143             m.addNode( parent, new GOEntry( parent, "no name yet",
144                   "no definition yet", "no aspect yet" ) );
145          }
146          String currentTerm = accBuf.toString();
147          m.addParentTo( currentTerm, parent );
148       } else if ( name.equals( "synonym" ) ) {
149          inSyn = true;
150       } else if ( name.equals( "name" ) ) {
151          nameBuf = new StringBuffer();
152          inName = true;
153       }
154    }
155 
156    public void endElement( String uri, String name, String qName ) {
157       if ( name.equals( "term" ) ) {
158          inTerm = false;
159       } else if ( name.equals( "accession" ) ) {
160          inAcc = false;
161          String currentTerm = accBuf.toString();
162          m.addNode( currentTerm, new GOEntry( currentTerm, "no name yet",
163                "no definition yet", "no aspect yet" ) );
164       } else if ( name.equals( "definition" ) ) {
165          String currentTerm = accBuf.toString();
166          ( ( GOEntry ) m.getNodeContents( currentTerm ) ).setDefinition( defBuf
167                .toString().intern() );
168          inDef = false;
169       } else if ( name.equals( "is_a" ) ) {
170          inIsa = false;
171       } else if ( name.equals( "part_of" ) ) {
172          inPartOf = false;
173       } else if ( name.equals( "synonym" ) ) {
174          inSyn = false;
175       } else if ( name.equals( "name" ) ) {
176          inName = false;
177          String currentTerm = accBuf.toString();
178 
179          String currentName = nameBuf.toString().intern();
180 
181          ( ( GOEntry ) m.getNodeContents( currentTerm ) ).setName( currentName );
182 
183          if ( currentName.equals( "molecular_function" )
184                || currentName.equals( "biological_process" )
185                || currentName.equals( "cellular_component" ) ) {
186             currentAspect = currentName;
187          }
188 
189          ( ( GOEntry ) m.getNodeContents( currentTerm ) )
190                .setAspect( currentAspect );
191 
192       }
193    }
194 
195    public void characters( char ch[], int start, int length ) {
196 
197       if ( inTerm ) {
198          if ( inAcc ) {
199             accBuf.append( ch, start, length );
200          } else if ( inDef ) {
201             defBuf.append( ch, start, length );
202          } else if ( inName ) {
203             nameBuf.append( ch, start, length );
204          }
205       }
206    }
207 
208 }