View Javadoc

1   package baseCode.io.reader;
2   
3   import java.io.BufferedReader;
4   import java.io.File;
5   import java.io.FileInputStream;
6   import java.io.IOException;
7   import java.io.InputStream;
8   import java.io.InputStreamReader;
9   import java.util.HashSet;
10  import java.util.Iterator;
11  import java.util.List;
12  import java.util.Set;
13  import java.util.StringTokenizer;
14  import java.util.Vector;
15  
16  import baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed;
17  import baseCode.dataStructure.matrix.NamedMatrix;
18  import cern.colt.list.DoubleArrayList;
19  
20  /***
21   * Reader for {@link baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed}.
22   * <p>
23   * Copyright (c) 2004
24   * </p>
25   * <p>
26   * Institution: Columbia University
27   * </p>
28   * 
29   * @author Paul Pavlidis
30   * @version $Id: DoubleMatrixReader.java,v 1.7 2005/03/21 18:01:04 pavlidis Exp $
31   */
32  public class DoubleMatrixReader extends AbstractNamedMatrixReader {
33  
34     private int numHeadings;
35     private List colNames;
36  
37     /***
38      * @param filename data file to read from
39      * @return NamedMatrix object constructed from the data file
40      * @throws IOException
41      */
42     public NamedMatrix read( String filename ) throws IOException {
43        return read( filename, null );
44     }
45  
46     /***
47      * @param stream InputStream stream to read from
48      * @return NamedMatrix object constructed from the data file
49      * @throws IOException
50      */
51     public NamedMatrix read( InputStream stream ) throws IOException {
52        return read( stream, null );
53     }
54  
55     /***
56      * @param stream InputStream
57      * @param wantedRowNames Set
58      * @return <code>read( stream, wantedRowNames, createEmptyRows )</code> with <code>createEmptyRows</code> set to
59      *         true.
60      * @throws IOException
61      */
62     public NamedMatrix read( InputStream stream, Set wantedRowNames )
63           throws IOException {
64        return read( stream, wantedRowNames, true );
65     }
66  
67     /***
68      * @param stream InputStream
69      * @param wantedRowNames Set
70      * @param createEmptyRows if a row contained in <code>wantedRowNames</code> is not found in the file, create an
71      *        empty row filled with Double.NaN iff this param is true.
72      * @return matrix
73      * @throws IOException
74      */
75     public NamedMatrix read( InputStream stream, Set wantedRowNames,
76           boolean createEmptyRows ) throws IOException {
77  
78        BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
79  
80        List MTemp = new Vector();
81  
82        List rowNames = new Vector();
83  
84        //BufferedReader dis = new BufferedReader( new FileReader( filename ) );
85        //   int columnNumber = 0;
86        int rowNumber = 0;
87        String row;
88  
89        //
90        // We need to keep track of which row names we actually found in the file
91        // because will want to add empty rows for each row name we didn't find
92        // (if createEmptyRows == true).
93        //
94        Set wantedRowsFound = null;
95        if ( wantedRowNames != null && createEmptyRows ) {
96           wantedRowsFound = new HashSet();
97        }
98  
99        colNames = readHeader( dis );
100       numHeadings = colNames.size();
101 
102       while ( ( row = dis.readLine() ) != null ) {
103 
104          String rowName = parseRow( row, rowNames, MTemp, wantedRowNames );
105 
106          if ( wantedRowNames != null ) {
107 
108             // if we already have all the rows we want, then bail out
109             if ( rowNumber >= wantedRowNames.size() ) {
110                return createMatrix( MTemp, rowNumber, numHeadings, rowNames,
111                      colNames );
112             }
113             // skip this row if it's not in wantedRowNames
114             else if ( !wantedRowNames.contains( rowName ) ) {
115                continue;
116             } else if ( createEmptyRows ) {
117                // we found the row we want in the file
118                wantedRowsFound.add( rowName );
119             }
120          }
121          rowNumber++;
122       }
123       stream.close();
124 
125       //
126       // Add empty rows for each row name we didn't find in the file
127       //
128       if ( wantedRowNames != null && createEmptyRows ) {
129          Iterator iterator = wantedRowNames.iterator();
130          while ( iterator.hasNext() ) {
131             String s = ( String ) iterator.next();
132             if ( !wantedRowsFound.contains( s ) ) {
133                // add an empty row
134                DoubleArrayList emptyRow = createEmptyRow( numHeadings );
135                rowNames.add( s );
136                MTemp.add( emptyRow );
137                rowNumber++;
138             }
139          }
140       }
141 
142       return createMatrix( MTemp, rowNumber, numHeadings, rowNames, colNames );
143 
144    }
145 
146    /*
147     * (non-Javadoc)
148     * 
149     * @see baseCode.io.reader.AbstractNamedMatrixReader#readOneRow(java.io.BufferedReader)
150     */
151    public NamedMatrix readOneRow( BufferedReader dis ) throws IOException {
152       String row = dis.readLine();
153       Vector MTemp = new Vector();
154 
155       Vector rowNames = new Vector();
156       parseRow( row, rowNames, MTemp, null );
157       return createMatrix( MTemp, 1, numHeadings, rowNames, colNames );
158    }
159 
160    /***
161     * @param wantedRowNames
162     * @throws IOException
163     * @param numHeadings
164     * @param MTemp
165     * @param rowNames
166     * @param rowNumber
167     * @param wantedRowNames
168     * @param row
169     * @return name of the row
170     */
171    private String parseRow( String row, List rowNames, List MTemp,
172          Set wantedRowNames ) throws IOException {
173 
174       StringTokenizer st = new StringTokenizer( row, "\t", true );
175 
176       DoubleArrayList rowTemp = new DoubleArrayList();
177       int columnNumber = 0;
178       String previousToken = "";
179       String s = null;
180 
181       while ( st.hasMoreTokens() ) {
182          // Iterate through the row, parsing it into row name and values
183 
184          s = st.nextToken();
185          boolean missing = false;
186 
187          if ( s.compareTo( "\t" ) == 0 ) {
188             /* two tabs in a row */
189             if ( previousToken.compareTo( "\t" ) == 0 ) {
190                missing = true;
191             } else if ( !st.hasMoreTokens() ) { // at end of line.
192                missing = true;
193             } else {
194                previousToken = s;
195                continue;
196             }
197          } else if ( s.compareTo( " " ) == 0 ) {
198             if ( previousToken.compareTo( "\t" ) == 0 ) {
199                missing = true;
200             } else {
201                throw new IOException( "Spaces not allowed after values" );
202                // bad, not allowed.
203             }
204          } else if ( s.compareToIgnoreCase( "NaN" ) == 0 || s.compareToIgnoreCase("NA") == 0) {
205             if ( previousToken.compareTo( "\t" ) == 0 ) {
206                missing = true;
207             } else {
208                throw new IOException( "NaN found where it isn't supposed to be" );
209                // bad, not allowed - missing a tab?
210             }
211          }
212 
213          if ( columnNumber > 0 ) {
214             if ( missing ) {
215                rowTemp.add( Double.NaN );
216             } else {
217                rowTemp.add( Double.parseDouble( s ) );
218             }
219          } else {
220             if ( missing ) {
221                throw new IOException(
222                      "Missing values not allowed for row labels" );
223             }
224             if ( wantedRowNames != null && !wantedRowNames.contains( s ) ) {
225                return s;
226             }
227             rowNames.add( s.intern() );
228          }
229 
230          columnNumber++;
231          previousToken = s;
232       } // end while (st.hasMoreTokens())
233       // done parsing one row -- no more tokens
234 
235       if ( rowTemp.size() > numHeadings ) {
236          throw new IOException( "Too many values (" + rowTemp.size()
237                + ") in row  (based on headings count of " + numHeadings + ")" );
238       }
239 
240       MTemp.add( rowTemp );
241       return s;
242 
243    }
244 
245    /***
246     * Read a matrix from a file, subject to filtering criteria.
247     * 
248     * @param filename data file to read from
249     * @param wantedRowNames contains names of rows we want to get
250     * @return NamedMatrix object constructed from the data file
251     * @throws IOException
252     */
253    public NamedMatrix read( String filename, Set wantedRowNames )
254          throws IOException {
255       File infile = new File( filename );
256       if ( !infile.exists() || !infile.canRead() ) {
257          throw new IOException( "Could not read from file " + filename );
258       }
259       FileInputStream stream = new FileInputStream( infile );
260       return read( stream, wantedRowNames );
261    } // end read
262 
263    //-----------------------------------------------------------------
264    // protected methods
265    // -----------------------------------------------------------------
266 
267    protected DenseDoubleMatrix2DNamed createMatrix( List MTemp, int rowCount,
268          int colCount, List rowNames, List colNames ) {
269 
270       DenseDoubleMatrix2DNamed matrix = new DenseDoubleMatrix2DNamed( rowCount,
271             colCount );
272 
273       for ( int i = 0; i < matrix.rows(); i++ ) {
274          for ( int j = 0; j < matrix.columns(); j++ ) {
275             if ( ( ( DoubleArrayList ) MTemp.get( i ) ).size() < j + 1 ) {
276                matrix.set( i, j, Double.NaN );
277                // this allows the input file to have ragged ends.
278                // todo I'm not sure allowing ragged inputs is a good idea -PP
279             } else {
280                matrix.set( i, j, ( ( DoubleArrayList ) MTemp.get( i ) )
281                      .elements()[j] );
282             }
283          }
284       }
285       matrix.setRowNames( rowNames );
286       matrix.setColumnNames( colNames );
287       return matrix;
288 
289    } // end createMatrix
290 
291    protected DoubleArrayList createEmptyRow( int numColumns ) {
292 
293       DoubleArrayList row = new DoubleArrayList();
294       for ( int i = 0; i < numColumns; i++ ) {
295          row.add( Double.NaN );
296       }
297       return row;
298    }
299 
300 } // end class DoubleMatrixReader