|
|||||||||||||||||||
| 30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover | |||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| DoubleMatrixReader.java | 48.1% | 58.9% | 55.6% | 55.1% |
|
||||||||||||||
| 1 |
package baseCode.io.reader;
|
|
| 2 |
|
|
| 3 |
import java.io.BufferedReader;
|
|
| 4 |
import java.io.File;
|
|
| 5 |
import java.io.FileInputStream;
|
|
| 6 |
import java.io.IOException;
|
|
| 7 |
import java.io.InputStream;
|
|
| 8 |
import java.io.InputStreamReader;
|
|
| 9 |
import java.util.HashSet;
|
|
| 10 |
import java.util.Iterator;
|
|
| 11 |
import java.util.List;
|
|
| 12 |
import java.util.Set;
|
|
| 13 |
import java.util.StringTokenizer;
|
|
| 14 |
import java.util.Vector;
|
|
| 15 |
|
|
| 16 |
import baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed;
|
|
| 17 |
import baseCode.dataStructure.matrix.NamedMatrix;
|
|
| 18 |
import cern.colt.list.DoubleArrayList;
|
|
| 19 |
|
|
| 20 |
/**
|
|
| 21 |
* Reader for {@link baseCode.dataStructure.matrix.DenseDoubleMatrix2DNamed}.
|
|
| 22 |
* <p>
|
|
| 23 |
* Copyright (c) 2004
|
|
| 24 |
* </p>
|
|
| 25 |
* <p>
|
|
| 26 |
* Institution: Columbia University
|
|
| 27 |
* </p>
|
|
| 28 |
*
|
|
| 29 |
* @author Paul Pavlidis
|
|
| 30 |
* @version $Id: DoubleMatrixReader.java,v 1.7 2005/03/21 18:01:04 pavlidis Exp $
|
|
| 31 |
*/
|
|
| 32 |
public class DoubleMatrixReader extends AbstractNamedMatrixReader { |
|
| 33 |
|
|
| 34 |
private int numHeadings; |
|
| 35 |
private List colNames;
|
|
| 36 |
|
|
| 37 |
/**
|
|
| 38 |
* @param filename data file to read from
|
|
| 39 |
* @return NamedMatrix object constructed from the data file
|
|
| 40 |
* @throws IOException
|
|
| 41 |
*/
|
|
| 42 | 0 |
public NamedMatrix read( String filename ) throws IOException { |
| 43 | 0 |
return read( filename, null ); |
| 44 |
} |
|
| 45 |
|
|
| 46 |
/**
|
|
| 47 |
* @param stream InputStream stream to read from
|
|
| 48 |
* @return NamedMatrix object constructed from the data file
|
|
| 49 |
* @throws IOException
|
|
| 50 |
*/
|
|
| 51 | 116 |
public NamedMatrix read( InputStream stream ) throws IOException { |
| 52 | 116 |
return read( stream, null ); |
| 53 |
} |
|
| 54 |
|
|
| 55 |
/**
|
|
| 56 |
* @param stream InputStream
|
|
| 57 |
* @param wantedRowNames Set
|
|
| 58 |
* @return <code>read( stream, wantedRowNames, createEmptyRows )</code> with <code>createEmptyRows</code> set to
|
|
| 59 |
* true.
|
|
| 60 |
* @throws IOException
|
|
| 61 |
*/
|
|
| 62 | 116 |
public NamedMatrix read( InputStream stream, Set wantedRowNames )
|
| 63 |
throws IOException {
|
|
| 64 | 116 |
return read( stream, wantedRowNames, true ); |
| 65 |
} |
|
| 66 |
|
|
| 67 |
/**
|
|
| 68 |
* @param stream InputStream
|
|
| 69 |
* @param wantedRowNames Set
|
|
| 70 |
* @param createEmptyRows if a row contained in <code>wantedRowNames</code> is not found in the file, create an
|
|
| 71 |
* empty row filled with Double.NaN iff this param is true.
|
|
| 72 |
* @return matrix
|
|
| 73 |
* @throws IOException
|
|
| 74 |
*/
|
|
| 75 | 116 |
public NamedMatrix read( InputStream stream, Set wantedRowNames,
|
| 76 |
boolean createEmptyRows ) throws IOException { |
|
| 77 |
|
|
| 78 | 116 |
BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) ); |
| 79 |
|
|
| 80 | 116 |
List MTemp = new Vector();
|
| 81 |
|
|
| 82 | 116 |
List rowNames = new Vector();
|
| 83 |
|
|
| 84 |
//BufferedReader dis = new BufferedReader( new FileReader( filename ) );
|
|
| 85 |
// int columnNumber = 0;
|
|
| 86 | 116 |
int rowNumber = 0;
|
| 87 | 116 |
String row; |
| 88 |
|
|
| 89 |
//
|
|
| 90 |
// We need to keep track of which row names we actually found in the file
|
|
| 91 |
// because will want to add empty rows for each row name we didn't find
|
|
| 92 |
// (if createEmptyRows == true).
|
|
| 93 |
//
|
|
| 94 | 116 |
Set wantedRowsFound = null;
|
| 95 | 116 |
if ( wantedRowNames != null && createEmptyRows ) { |
| 96 | 0 |
wantedRowsFound = new HashSet();
|
| 97 |
} |
|
| 98 |
|
|
| 99 | 116 |
colNames = readHeader( dis ); |
| 100 | 115 |
numHeadings = colNames.size(); |
| 101 |
|
|
| 102 | ? |
while ( ( row = dis.readLine() ) != null ) { |
| 103 |
|
|
| 104 | 3450 |
String rowName = parseRow( row, rowNames, MTemp, wantedRowNames ); |
| 105 |
|
|
| 106 | 3450 |
if ( wantedRowNames != null ) { |
| 107 |
|
|
| 108 |
// if we already have all the rows we want, then bail out
|
|
| 109 | 0 |
if ( rowNumber >= wantedRowNames.size() ) {
|
| 110 | 0 |
return createMatrix( MTemp, rowNumber, numHeadings, rowNames,
|
| 111 |
colNames ); |
|
| 112 |
} |
|
| 113 |
// skip this row if it's not in wantedRowNames
|
|
| 114 | 0 |
else if ( !wantedRowNames.contains( rowName ) ) { |
| 115 | 0 |
continue;
|
| 116 | 0 |
} else if ( createEmptyRows ) { |
| 117 |
// we found the row we want in the file
|
|
| 118 | 0 |
wantedRowsFound.add( rowName ); |
| 119 |
} |
|
| 120 |
} |
|
| 121 | 3450 |
rowNumber++; |
| 122 |
} |
|
| 123 | 115 |
stream.close(); |
| 124 |
|
|
| 125 |
//
|
|
| 126 |
// Add empty rows for each row name we didn't find in the file
|
|
| 127 |
//
|
|
| 128 | 115 |
if ( wantedRowNames != null && createEmptyRows ) { |
| 129 | 0 |
Iterator iterator = wantedRowNames.iterator(); |
| 130 | 0 |
while ( iterator.hasNext() ) {
|
| 131 | 0 |
String s = ( String ) iterator.next(); |
| 132 | 0 |
if ( !wantedRowsFound.contains( s ) ) {
|
| 133 |
// add an empty row
|
|
| 134 | 0 |
DoubleArrayList emptyRow = createEmptyRow( numHeadings ); |
| 135 | 0 |
rowNames.add( s ); |
| 136 | 0 |
MTemp.add( emptyRow ); |
| 137 | 0 |
rowNumber++; |
| 138 |
} |
|
| 139 |
} |
|
| 140 |
} |
|
| 141 |
|
|
| 142 | 115 |
return createMatrix( MTemp, rowNumber, numHeadings, rowNames, colNames );
|
| 143 |
|
|
| 144 |
} |
|
| 145 |
|
|
| 146 |
/*
|
|
| 147 |
* (non-Javadoc)
|
|
| 148 |
*
|
|
| 149 |
* @see baseCode.io.reader.AbstractNamedMatrixReader#readOneRow(java.io.BufferedReader)
|
|
| 150 |
*/
|
|
| 151 | 0 |
public NamedMatrix readOneRow( BufferedReader dis ) throws IOException { |
| 152 | 0 |
String row = dis.readLine(); |
| 153 | 0 |
Vector MTemp = new Vector();
|
| 154 |
|
|
| 155 | 0 |
Vector rowNames = new Vector();
|
| 156 | 0 |
parseRow( row, rowNames, MTemp, null );
|
| 157 | 0 |
return createMatrix( MTemp, 1, numHeadings, rowNames, colNames );
|
| 158 |
} |
|
| 159 |
|
|
| 160 |
/**
|
|
| 161 |
* @param wantedRowNames
|
|
| 162 |
* @throws IOException
|
|
| 163 |
* @param numHeadings
|
|
| 164 |
* @param MTemp
|
|
| 165 |
* @param rowNames
|
|
| 166 |
* @param rowNumber
|
|
| 167 |
* @param wantedRowNames
|
|
| 168 |
* @param row
|
|
| 169 |
* @return name of the row
|
|
| 170 |
*/
|
|
| 171 | 3450 |
private String parseRow( String row, List rowNames, List MTemp,
|
| 172 |
Set wantedRowNames ) throws IOException {
|
|
| 173 |
|
|
| 174 | 3450 |
StringTokenizer st = new StringTokenizer( row, "\t", true ); |
| 175 |
|
|
| 176 | 3450 |
DoubleArrayList rowTemp = new DoubleArrayList();
|
| 177 | 3450 |
int columnNumber = 0;
|
| 178 | 3450 |
String previousToken = "";
|
| 179 | 3450 |
String s = null;
|
| 180 |
|
|
| 181 | 3450 |
while ( st.hasMoreTokens() ) {
|
| 182 |
// Iterate through the row, parsing it into row name and values
|
|
| 183 |
|
|
| 184 | 86565 |
s = st.nextToken(); |
| 185 | 86565 |
boolean missing = false; |
| 186 |
|
|
| 187 | 86565 |
if ( s.compareTo( "\t" ) == 0 ) { |
| 188 |
/* two tabs in a row */
|
|
| 189 | 41940 |
if ( previousToken.compareTo( "\t" ) == 0 ) { |
| 190 | 663 |
missing = true;
|
| 191 | 41277 |
} else if ( !st.hasMoreTokens() ) { // at end of line. |
| 192 | 51 |
missing = true;
|
| 193 |
} else {
|
|
| 194 | 41226 |
previousToken = s; |
| 195 | 41226 |
continue;
|
| 196 |
} |
|
| 197 | 44625 |
} else if ( s.compareTo( " " ) == 0 ) { |
| 198 | 0 |
if ( previousToken.compareTo( "\t" ) == 0 ) { |
| 199 | 0 |
missing = true;
|
| 200 |
} else {
|
|
| 201 | 0 |
throw new IOException( "Spaces not allowed after values" ); |
| 202 |
// bad, not allowed.
|
|
| 203 |
} |
|
| 204 | 44625 |
} else if ( s.compareToIgnoreCase( "NaN" ) == 0 || s.compareToIgnoreCase("NA") == 0) { |
| 205 | 0 |
if ( previousToken.compareTo( "\t" ) == 0 ) { |
| 206 | 0 |
missing = true;
|
| 207 |
} else {
|
|
| 208 | 0 |
throw new IOException( "NaN found where it isn't supposed to be" ); |
| 209 |
// bad, not allowed - missing a tab?
|
|
| 210 |
} |
|
| 211 |
} |
|
| 212 |
|
|
| 213 | 45339 |
if ( columnNumber > 0 ) {
|
| 214 | 41889 |
if ( missing ) {
|
| 215 | 714 |
rowTemp.add( Double.NaN ); |
| 216 |
} else {
|
|
| 217 | 41175 |
rowTemp.add( Double.parseDouble( s ) ); |
| 218 |
} |
|
| 219 |
} else {
|
|
| 220 | 3450 |
if ( missing ) {
|
| 221 | 0 |
throw new IOException( |
| 222 |
"Missing values not allowed for row labels" );
|
|
| 223 |
} |
|
| 224 | 3450 |
if ( wantedRowNames != null && !wantedRowNames.contains( s ) ) { |
| 225 | 0 |
return s;
|
| 226 |
} |
|
| 227 | 3450 |
rowNames.add( s.intern() ); |
| 228 |
} |
|
| 229 |
|
|
| 230 | 45339 |
columnNumber++; |
| 231 | 45339 |
previousToken = s; |
| 232 |
} // end while (st.hasMoreTokens())
|
|
| 233 |
// done parsing one row -- no more tokens
|
|
| 234 |
|
|
| 235 | 3450 |
if ( rowTemp.size() > numHeadings ) {
|
| 236 | 0 |
throw new IOException( "Too many values (" + rowTemp.size() |
| 237 |
+ ") in row (based on headings count of " + numHeadings + ")" ); |
|
| 238 |
} |
|
| 239 |
|
|
| 240 | 3450 |
MTemp.add( rowTemp ); |
| 241 | 3450 |
return s;
|
| 242 |
|
|
| 243 |
} |
|
| 244 |
|
|
| 245 |
/**
|
|
| 246 |
* Read a matrix from a file, subject to filtering criteria.
|
|
| 247 |
*
|
|
| 248 |
* @param filename data file to read from
|
|
| 249 |
* @param wantedRowNames contains names of rows we want to get
|
|
| 250 |
* @return NamedMatrix object constructed from the data file
|
|
| 251 |
* @throws IOException
|
|
| 252 |
*/
|
|
| 253 | 0 |
public NamedMatrix read( String filename, Set wantedRowNames )
|
| 254 |
throws IOException {
|
|
| 255 | 0 |
File infile = new File( filename );
|
| 256 | 0 |
if ( !infile.exists() || !infile.canRead() ) {
|
| 257 | 0 |
throw new IOException( "Could not read from file " + filename ); |
| 258 |
} |
|
| 259 | 0 |
FileInputStream stream = new FileInputStream( infile );
|
| 260 | 0 |
return read( stream, wantedRowNames );
|
| 261 |
} // end read
|
|
| 262 |
|
|
| 263 |
//-----------------------------------------------------------------
|
|
| 264 |
// protected methods
|
|
| 265 |
// -----------------------------------------------------------------
|
|
| 266 |
|
|
| 267 | 115 |
protected DenseDoubleMatrix2DNamed createMatrix( List MTemp, int rowCount, |
| 268 |
int colCount, List rowNames, List colNames ) {
|
|
| 269 |
|
|
| 270 | 115 |
DenseDoubleMatrix2DNamed matrix = new DenseDoubleMatrix2DNamed( rowCount,
|
| 271 |
colCount ); |
|
| 272 |
|
|
| 273 | 115 |
for ( int i = 0; i < matrix.rows(); i++ ) { |
| 274 | 3450 |
for ( int j = 0; j < matrix.columns(); j++ ) { |
| 275 | 41940 |
if ( ( ( DoubleArrayList ) MTemp.get( i ) ).size() < j + 1 ) {
|
| 276 | 51 |
matrix.set( i, j, Double.NaN ); |
| 277 |
// this allows the input file to have ragged ends.
|
|
| 278 |
// todo I'm not sure allowing ragged inputs is a good idea -PP
|
|
| 279 |
} else {
|
|
| 280 | 41889 |
matrix.set( i, j, ( ( DoubleArrayList ) MTemp.get( i ) ) |
| 281 |
.elements()[j] ); |
|
| 282 |
} |
|
| 283 |
} |
|
| 284 |
} |
|
| 285 | 115 |
matrix.setRowNames( rowNames ); |
| 286 | 115 |
matrix.setColumnNames( colNames ); |
| 287 | 115 |
return matrix;
|
| 288 |
|
|
| 289 |
} // end createMatrix
|
|
| 290 |
|
|
| 291 | 0 |
protected DoubleArrayList createEmptyRow( int numColumns ) { |
| 292 |
|
|
| 293 | 0 |
DoubleArrayList row = new DoubleArrayList();
|
| 294 | 0 |
for ( int i = 0; i < numColumns; i++ ) { |
| 295 | 0 |
row.add( Double.NaN ); |
| 296 |
} |
|
| 297 | 0 |
return row;
|
| 298 |
} |
|
| 299 |
|
|
| 300 |
} // end class DoubleMatrixReader
|
|
| 301 |
|
|
||||||||||