1 package baseCode.io.reader;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.io.InputStreamReader;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Set;
13 import java.util.StringTokenizer;
14 import java.util.Vector;
15
16 import baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed;
17 import baseCode.dataStructure.matrix.NamedMatrix;
18 import cern.colt.list.DoubleArrayList;
19
20 /***
21 * Reader for {@link baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed}.
22 * <p>
23 * Copyright (c) 2004
24 * </p>
25 * <p>
26 * Institution: Columbia University
27 * </p>
28 *
29 * @author Paul Pavlidis
30 * @version $Id: DoubleMatrixReader.java,v 1.7 2005/03/21 18:01:04 pavlidis Exp $
31 */
32 public class DoubleMatrixReader extends AbstractNamedMatrixReader {
33
34 private int numHeadings;
35 private List colNames;
36
37 /***
38 * @param filename data file to read from
39 * @return NamedMatrix object constructed from the data file
40 * @throws IOException
41 */
42 public NamedMatrix read( String filename ) throws IOException {
43 return read( filename, null );
44 }
45
46 /***
47 * @param stream InputStream stream to read from
48 * @return NamedMatrix object constructed from the data file
49 * @throws IOException
50 */
51 public NamedMatrix read( InputStream stream ) throws IOException {
52 return read( stream, null );
53 }
54
55 /***
56 * @param stream InputStream
57 * @param wantedRowNames Set
58 * @return <code>read( stream, wantedRowNames, createEmptyRows )</code> with <code>createEmptyRows</code> set to
59 * true.
60 * @throws IOException
61 */
62 public NamedMatrix read( InputStream stream, Set wantedRowNames )
63 throws IOException {
64 return read( stream, wantedRowNames, true );
65 }
66
67 /***
68 * @param stream InputStream
69 * @param wantedRowNames Set
70 * @param createEmptyRows if a row contained in <code>wantedRowNames</code> is not found in the file, create an
71 * empty row filled with Double.NaN iff this param is true.
72 * @return matrix
73 * @throws IOException
74 */
75 public NamedMatrix read( InputStream stream, Set wantedRowNames,
76 boolean createEmptyRows ) throws IOException {
77
78 BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
79
80 List MTemp = new Vector();
81
82 List rowNames = new Vector();
83
84
85
86 int rowNumber = 0;
87 String row;
88
89
90
91
92
93
94 Set wantedRowsFound = null;
95 if ( wantedRowNames != null && createEmptyRows ) {
96 wantedRowsFound = new HashSet();
97 }
98
99 colNames = readHeader( dis );
100 numHeadings = colNames.size();
101
102 while ( ( row = dis.readLine() ) != null ) {
103
104 String rowName = parseRow( row, rowNames, MTemp, wantedRowNames );
105
106 if ( wantedRowNames != null ) {
107
108
109 if ( rowNumber >= wantedRowNames.size() ) {
110 return createMatrix( MTemp, rowNumber, numHeadings, rowNames,
111 colNames );
112 }
113
114 else if ( !wantedRowNames.contains( rowName ) ) {
115 continue;
116 } else if ( createEmptyRows ) {
117
118 wantedRowsFound.add( rowName );
119 }
120 }
121 rowNumber++;
122 }
123 stream.close();
124
125
126
127
128 if ( wantedRowNames != null && createEmptyRows ) {
129 Iterator iterator = wantedRowNames.iterator();
130 while ( iterator.hasNext() ) {
131 String s = ( String ) iterator.next();
132 if ( !wantedRowsFound.contains( s ) ) {
133
134 DoubleArrayList emptyRow = createEmptyRow( numHeadings );
135 rowNames.add( s );
136 MTemp.add( emptyRow );
137 rowNumber++;
138 }
139 }
140 }
141
142 return createMatrix( MTemp, rowNumber, numHeadings, rowNames, colNames );
143
144 }
145
146
147
148
149
150
151 public NamedMatrix readOneRow( BufferedReader dis ) throws IOException {
152 String row = dis.readLine();
153 Vector MTemp = new Vector();
154
155 Vector rowNames = new Vector();
156 parseRow( row, rowNames, MTemp, null );
157 return createMatrix( MTemp, 1, numHeadings, rowNames, colNames );
158 }
159
160 /***
161 * @param wantedRowNames
162 * @throws IOException
163 * @param numHeadings
164 * @param MTemp
165 * @param rowNames
166 * @param rowNumber
167 * @param wantedRowNames
168 * @param row
169 * @return name of the row
170 */
171 private String parseRow( String row, List rowNames, List MTemp,
172 Set wantedRowNames ) throws IOException {
173
174 StringTokenizer st = new StringTokenizer( row, "\t", true );
175
176 DoubleArrayList rowTemp = new DoubleArrayList();
177 int columnNumber = 0;
178 String previousToken = "";
179 String s = null;
180
181 while ( st.hasMoreTokens() ) {
182
183
184 s = st.nextToken();
185 boolean missing = false;
186
187 if ( s.compareTo( "\t" ) == 0 ) {
188
189 if ( previousToken.compareTo( "\t" ) == 0 ) {
190 missing = true;
191 } else if ( !st.hasMoreTokens() ) {
192 missing = true;
193 } else {
194 previousToken = s;
195 continue;
196 }
197 } else if ( s.compareTo( " " ) == 0 ) {
198 if ( previousToken.compareTo( "\t" ) == 0 ) {
199 missing = true;
200 } else {
201 throw new IOException( "Spaces not allowed after values" );
202
203 }
204 } else if ( s.compareToIgnoreCase( "NaN" ) == 0 || s.compareToIgnoreCase("NA") == 0) {
205 if ( previousToken.compareTo( "\t" ) == 0 ) {
206 missing = true;
207 } else {
208 throw new IOException( "NaN found where it isn't supposed to be" );
209
210 }
211 }
212
213 if ( columnNumber > 0 ) {
214 if ( missing ) {
215 rowTemp.add( Double.NaN );
216 } else {
217 rowTemp.add( Double.parseDouble( s ) );
218 }
219 } else {
220 if ( missing ) {
221 throw new IOException(
222 "Missing values not allowed for row labels" );
223 }
224 if ( wantedRowNames != null && !wantedRowNames.contains( s ) ) {
225 return s;
226 }
227 rowNames.add( s.intern() );
228 }
229
230 columnNumber++;
231 previousToken = s;
232 }
233
234
235 if ( rowTemp.size() > numHeadings ) {
236 throw new IOException( "Too many values (" + rowTemp.size()
237 + ") in row (based on headings count of " + numHeadings + ")" );
238 }
239
240 MTemp.add( rowTemp );
241 return s;
242
243 }
244
245 /***
246 * Read a matrix from a file, subject to filtering criteria.
247 *
248 * @param filename data file to read from
249 * @param wantedRowNames contains names of rows we want to get
250 * @return NamedMatrix object constructed from the data file
251 * @throws IOException
252 */
253 public NamedMatrix read( String filename, Set wantedRowNames )
254 throws IOException {
255 File infile = new File( filename );
256 if ( !infile.exists() || !infile.canRead() ) {
257 throw new IOException( "Could not read from file " + filename );
258 }
259 FileInputStream stream = new FileInputStream( infile );
260 return read( stream, wantedRowNames );
261 }
262
263
264
265
266
267 protected DenseDoubleMatrix2DNamed createMatrix( List MTemp, int rowCount,
268 int colCount, List rowNames, List colNames ) {
269
270 DenseDoubleMatrix2DNamed matrix = new DenseDoubleMatrix2DNamed( rowCount,
271 colCount );
272
273 for ( int i = 0; i < matrix.rows(); i++ ) {
274 for ( int j = 0; j < matrix.columns(); j++ ) {
275 if ( ( ( DoubleArrayList ) MTemp.get( i ) ).size() < j + 1 ) {
276 matrix.set( i, j, Double.NaN );
277
278
279 } else {
280 matrix.set( i, j, ( ( DoubleArrayList ) MTemp.get( i ) )
281 .elements()[j] );
282 }
283 }
284 }
285 matrix.setRowNames( rowNames );
286 matrix.setColumnNames( colNames );
287 return matrix;
288
289 }
290
291 protected DoubleArrayList createEmptyRow( int numColumns ) {
292
293 DoubleArrayList row = new DoubleArrayList();
294 for ( int i = 0; i < numColumns; i++ ) {
295 row.add( Double.NaN );
296 }
297 return row;
298 }
299
300 }