1 package baseCode.xml;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.util.HashMap;
6 import java.util.Iterator;
7 import java.util.Map;
8
9 import org.xml.sax.Attributes;
10 import org.xml.sax.InputSource;
11 import org.xml.sax.SAXException;
12 import org.xml.sax.XMLReader;
13 import org.xml.sax.helpers.DefaultHandler;
14 import org.xml.sax.helpers.XMLReaderFactory;
15
16 import baseCode.bio.GOEntry;
17 import baseCode.dataStructure.graph.DirectedGraph;
18 import baseCode.dataStructure.graph.DirectedGraphNode;
19
20 /***
21 * Read in the GO XML file provided by the Gene Ontology Consortium.
22 * <p>
23 * Copyright (c) Columbia University
24 *
25 * @author Paul Pavlidis
26 * @version $Id: GOParser.java,v 1.14 2004/08/04 09:47:44 pavlidis Exp $
27 */
28 public class GOParser {
29
30 private DirectedGraph m;
31
32 /***
33 * Get the graph that was created.
34 *
35 * @return a DirectedGraph. Nodes contain OntologyEntry instances.
36 */
37 public DirectedGraph getGraph() {
38 return m;
39 }
40
41 /***
42 * Get a simple Map that contains keys that are the GO ids, values are the names. This can replace the functionality
43 * of the GONameReader in classScore.
44 *
45 * @return Map
46 */
47 public Map getGONameMap() {
48 Map nodes = m.getItems();
49 Map result = new HashMap();
50 for ( Iterator it = nodes.keySet().iterator(); it.hasNext(); ) {
51 DirectedGraphNode node = ( DirectedGraphNode ) nodes.get( it.next() );
52 GOEntry e = ( GOEntry ) node.getItem();
53 result.put( e.getId().intern(), e.getName().intern() );
54 }
55 return result;
56 }
57
58 public GOParser( InputStream i ) throws IOException, SAXException {
59
60 if ( i.available() == 0 ) {
61 throw new IOException( "XML stream contains no data." );
62 }
63
64 System.setProperty( "org.xml.sax.driver",
65 "org.apache.xerces.parsers.SAXParser" );
66
67 XMLReader xr = XMLReaderFactory.createXMLReader();
68 GOHandler handler = new GOHandler();
69 xr.setFeature( "http://xml.org/sax/features/validation", false );
70 xr.setFeature( "http://xml.org/sax/features/external-general-entities",
71 false );
72 xr.setFeature(
73 "http://apache.org/xml/features/nonvalidating/load-external-dtd",
74 false );
75 xr.setContentHandler( handler );
76 xr.setErrorHandler( handler );
77 xr.setEntityResolver( handler );
78 xr.setDTDHandler( handler );
79 xr.parse( new InputSource( i ) );
80
81 m = handler.getResults();
82 }
83
84 }
85
86 class GOHandler extends DefaultHandler {
87
88 private DirectedGraph m;
89
90 public DirectedGraph getResults() {
91 return m;
92 }
93
94 public GOHandler() {
95 super();
96 m = new DirectedGraph();
97 }
98
99 private boolean inTerm = false;
100 private boolean inDef = false;
101 private boolean inAcc = false;
102 private boolean inName = false;
103 private boolean inPartOf = false;
104 private boolean inIsa = false;
105 private boolean inSyn = false;
106
107 private String currentAspect;
108 private StringBuffer nameBuf;
109 private StringBuffer accBuf;
110 private StringBuffer defBuf;
111
112 public void startElement( String uri, String name, String qName,
113 Attributes atts ) {
114
115 if ( name.equals( "term" ) ) {
116 inTerm = true;
117 } else if ( name.equals( "accession" ) ) {
118 accBuf = new StringBuffer();
119 inAcc = true;
120 } else if ( name.equals( "definition" ) ) {
121 defBuf = new StringBuffer();
122 inDef = true;
123 } else if ( name.equals( "is_a" ) ) {
124 inIsa = true;
125 String res = atts.getValue( "rdf:resource" );
126 String parent = res.substring( res.lastIndexOf( '#' ) + 1, res
127 .length() );
128
129 if ( !m.containsKey( parent ) ) {
130 m.addNode( parent, new GOEntry( parent, "no name yet",
131 "no definition yet", "no aspect yet" ) );
132 }
133 String currentTerm = accBuf.toString();
134 m.addParentTo( currentTerm, parent );
135
136 } else if ( name.equals( "part_of" ) ) {
137 inPartOf = true;
138 String res = atts.getValue( "rdf:resource" );
139 String parent = res.substring( res.lastIndexOf( '#' ) + 1, res
140 .length() );
141
142 if ( !m.containsKey( parent ) ) {
143 m.addNode( parent, new GOEntry( parent, "no name yet",
144 "no definition yet", "no aspect yet" ) );
145 }
146 String currentTerm = accBuf.toString();
147 m.addParentTo( currentTerm, parent );
148 } else if ( name.equals( "synonym" ) ) {
149 inSyn = true;
150 } else if ( name.equals( "name" ) ) {
151 nameBuf = new StringBuffer();
152 inName = true;
153 }
154 }
155
156 public void endElement( String uri, String name, String qName ) {
157 if ( name.equals( "term" ) ) {
158 inTerm = false;
159 } else if ( name.equals( "accession" ) ) {
160 inAcc = false;
161 String currentTerm = accBuf.toString();
162 m.addNode( currentTerm, new GOEntry( currentTerm, "no name yet",
163 "no definition yet", "no aspect yet" ) );
164 } else if ( name.equals( "definition" ) ) {
165 String currentTerm = accBuf.toString();
166 ( ( GOEntry ) m.getNodeContents( currentTerm ) ).setDefinition( defBuf
167 .toString().intern() );
168 inDef = false;
169 } else if ( name.equals( "is_a" ) ) {
170 inIsa = false;
171 } else if ( name.equals( "part_of" ) ) {
172 inPartOf = false;
173 } else if ( name.equals( "synonym" ) ) {
174 inSyn = false;
175 } else if ( name.equals( "name" ) ) {
176 inName = false;
177 String currentTerm = accBuf.toString();
178
179 String currentName = nameBuf.toString().intern();
180
181 ( ( GOEntry ) m.getNodeContents( currentTerm ) ).setName( currentName );
182
183 if ( currentName.equals( "molecular_function" )
184 || currentName.equals( "biological_process" )
185 || currentName.equals( "cellular_component" ) ) {
186 currentAspect = currentName;
187 }
188
189 ( ( GOEntry ) m.getNodeContents( currentTerm ) )
190 .setAspect( currentAspect );
191
192 }
193 }
194
195 public void characters( char ch[], int start, int length ) {
196
197 if ( inTerm ) {
198 if ( inAcc ) {
199 accBuf.append( ch, start, length );
200 } else if ( inDef ) {
201 defBuf.append( ch, start, length );
202 } else if ( inName ) {
203 nameBuf.append( ch, start, length );
204 }
205 }
206 }
207
208 }