|
|||||||||||||||||||
| 30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover | |||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| SparseDoubleMatrixReader.java | 67.9% | 87.1% | 33.3% | 79.8% |
|
||||||||||||||
| 1 |
package baseCode.io.reader;
|
|
| 2 |
|
|
| 3 |
import java.io.BufferedReader;
|
|
| 4 |
import java.io.File;
|
|
| 5 |
import java.io.FileInputStream;
|
|
| 6 |
import java.io.IOException;
|
|
| 7 |
import java.io.InputStream;
|
|
| 8 |
import java.io.InputStreamReader;
|
|
| 9 |
import java.util.Collections;
|
|
| 10 |
import java.util.HashMap;
|
|
| 11 |
import java.util.HashSet;
|
|
| 12 |
import java.util.Iterator;
|
|
| 13 |
import java.util.Map;
|
|
| 14 |
import java.util.Set;
|
|
| 15 |
import java.util.StringTokenizer;
|
|
| 16 |
import java.util.Vector;
|
|
| 17 |
|
|
| 18 |
import baseCode.dataStructure.matrix.NamedMatrix;
|
|
| 19 |
import baseCode.dataStructure.matrix.SparseDoubleMatrix2DNamed;
|
|
| 20 |
|
|
| 21 |
import com.braju.beta.format.Format;
|
|
| 22 |
import com.braju.beta.format.FormatReader;
|
|
| 23 |
import com.braju.beta.format.FormatString;
|
|
| 24 |
import com.braju.beta.format.Parameters;
|
|
| 25 |
import com.braju.beta.lang.DoubleVariable;
|
|
| 26 |
import com.braju.beta.lang.IntegerVariable;
|
|
| 27 |
import com.braju.beta.lang.NumberVariable;
|
|
| 28 |
|
|
| 29 |
/**
|
|
| 30 |
* <hr>
|
|
| 31 |
* <p>
|
|
| 32 |
* Copyright (c) 2004 Columbia University
|
|
| 33 |
*
|
|
| 34 |
* @author pavlidis
|
|
| 35 |
* @version $Id: SparseDoubleMatrixReader.java,v 1.4 2004/08/16 02:06:01 pavlidis Exp $
|
|
| 36 |
*/
|
|
| 37 |
public class SparseDoubleMatrixReader extends AbstractNamedMatrixReader { |
|
| 38 |
|
|
| 39 |
/**
|
|
| 40 |
* @param filename data file to read from
|
|
| 41 |
* @return NamedMatrix object constructed from the data file
|
|
| 42 |
* @throws IOException
|
|
| 43 |
*/
|
|
| 44 | 0 |
public NamedMatrix read( String filename ) throws IOException { |
| 45 | 0 |
return read( filename, null ); |
| 46 |
} |
|
| 47 |
|
|
| 48 |
/**
|
|
| 49 |
* @param stream InputStream stream to read from
|
|
| 50 |
* @return NamedMatrix object constructed from the data file
|
|
| 51 |
* @throws IOException
|
|
| 52 |
*/
|
|
| 53 | 0 |
public NamedMatrix read( InputStream stream ) throws IOException { |
| 54 | 0 |
return read( stream, null ); |
| 55 |
} |
|
| 56 |
|
|
| 57 |
/**
|
|
| 58 |
* Read a matrix from a file, subject to filtering criteria.
|
|
| 59 |
*
|
|
| 60 |
* @param filename data file to read from
|
|
| 61 |
* @param wantedRowNames contains names of rows we want to get
|
|
| 62 |
* @return NamedMatrix object constructed from the data file
|
|
| 63 |
* @throws IOException
|
|
| 64 |
*/
|
|
| 65 | 0 |
public NamedMatrix read( String filename, Set wantedRowNames )
|
| 66 |
throws IOException {
|
|
| 67 | 0 |
File infile = new File( filename );
|
| 68 | 0 |
if ( !infile.exists() || !infile.canRead() ) {
|
| 69 | 0 |
throw new IOException( "Could not read from file " + filename ); |
| 70 |
} |
|
| 71 | 0 |
FileInputStream stream = new FileInputStream( infile );
|
| 72 | 0 |
return read( stream, wantedRowNames );
|
| 73 |
} // end read
|
|
| 74 |
|
|
| 75 |
/**
|
|
| 76 |
* Read a sparse matrix in "JW" (Jason Weston) format. The format is like this:
|
|
| 77 |
*
|
|
| 78 |
* <pre>
|
|
| 79 |
* 2 <--- number of items - the first line of the file only. NOTE - this line is often blank or not present.
|
|
| 80 |
* 1 2 <--- items 1 has 2 edges
|
|
| 81 |
* 1 2 <--- edge indices are to items 1 & 2
|
|
| 82 |
* 0.1 100 <--- with the following weights
|
|
| 83 |
* 2 2 <--- items 2 also has 2 edges
|
|
| 84 |
* 1 2 <--- edge indices are also to items 1 & 2 (fully connected)
|
|
| 85 |
* 100 0.1 <--- with the following weights
|
|
| 86 |
* </pre>
|
|
| 87 |
*
|
|
| 88 |
* <p>
|
|
| 89 |
* Note that the item numbering starts at 1. This is a requirement.
|
|
| 90 |
* <p>
|
|
| 91 |
* Note that this cannot handle very large matrices - the limit to rows x columns is the number Integer.MAX_VALUE.
|
|
| 92 |
* This is an implementation problem for colt's sparse matrix.
|
|
| 93 |
*
|
|
| 94 |
* @param stream
|
|
| 95 |
* @param wantedRowNames
|
|
| 96 |
* @return @throws IOException
|
|
| 97 |
*/
|
|
| 98 | 1 |
public NamedMatrix readJW( InputStream stream ) throws IOException, |
| 99 |
IllegalAccessException, NoSuchFieldException {
|
|
| 100 |
|
|
| 101 | 1 |
BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) ); |
| 102 | 1 |
FormatReader ff = new FormatReader( dis );
|
| 103 | 1 |
Parameters p = new Parameters();
|
| 104 | 1 |
FormatString fmtdd = ff.compileFormatString( "%d %d" );
|
| 105 | 1 |
FormatString fmtd = ff.compileFormatString( "%d" );
|
| 106 | 1 |
FormatString fmtg = ff.compileFormatString( "%g" );
|
| 107 |
|
|
| 108 | 1 |
NumberVariable index = new IntegerVariable();
|
| 109 | 1 |
NumberVariable amount = new IntegerVariable();
|
| 110 | 1 |
DoubleVariable eval = new DoubleVariable();
|
| 111 |
|
|
| 112 | 1 |
int dim = 10;
|
| 113 |
|
|
| 114 | 1 |
dim = new Integer( Integer.parseInt( dis.readLine() ) ).intValue();
|
| 115 | 1 |
SparseDoubleMatrix2DNamed returnVal = new SparseDoubleMatrix2DNamed( dim,
|
| 116 |
dim ); |
|
| 117 |
|
|
| 118 | 1 |
for ( int k = 1; k <= dim; k++ ) { |
| 119 |
|
|
| 120 | 3 |
returnVal.addColumnName( new Integer( k ).toString(), k - 1 );
|
| 121 | 3 |
returnVal.addRowName( new Integer( k ).toString(), k - 1 );
|
| 122 |
|
|
| 123 | 3 |
ff.read( fmtdd, p.add( index ).add( amount ) ); // "item 1 has 2 edges"
|
| 124 |
|
|
| 125 | 3 |
if ( ( index.intValue() % 500 ) == 0 ) {
|
| 126 | 0 |
log.debug( Format.sprintf( |
| 127 |
"loading %2.1f%% complete (%dth entry)... \n", p.add(
|
|
| 128 |
100.0 * ( ( index.intValue() ) / ( ( float ) dim ) ) )
|
|
| 129 |
.add( index ) ) ); |
|
| 130 |
} |
|
| 131 |
|
|
| 132 | 3 |
int[] rowind = new int[amount.intValue()]; |
| 133 | 3 |
for ( int i = 0; i < amount.intValue(); i++ ) { // "edge indices are to 1 and 2" |
| 134 |
|
|
| 135 | 6 |
Format.fscanf( ff, fmtd, p.add( index ) ); |
| 136 | 6 |
int ind = index.intValue();
|
| 137 |
|
|
| 138 | 6 |
if ( ind > dim || ind < 1 ) {
|
| 139 | 0 |
throw new IllegalStateException( "Illegal value " + ind |
| 140 |
+ " found in index list for item " + k );
|
|
| 141 |
} |
|
| 142 | 6 |
rowind[i] = ind; |
| 143 |
} |
|
| 144 |
|
|
| 145 | 3 |
for ( int i = 0; i < amount.intValue(); i++ ) { // "with the following weights" |
| 146 | 6 |
Format.fscanf( ff, fmtg, p.add( eval ) ); |
| 147 | 6 |
returnVal.setQuick( k - 1, rowind[i] - 1, eval.doubleValue() ); |
| 148 |
// returnVal.setQuick( rowind[i] - 1, k - 1, eval.doubleValue() );
|
|
| 149 |
} |
|
| 150 |
|
|
| 151 |
} |
|
| 152 | 1 |
ff.close(); |
| 153 | 1 |
return returnVal;
|
| 154 |
} |
|
| 155 |
|
|
| 156 |
/**
|
|
| 157 |
* Read a sparse matrix that is expressed as an adjacency list in a tab-delimited file:
|
|
| 158 |
*
|
|
| 159 |
* <pre>
|
|
| 160 |
* item1 item2 weight
|
|
| 161 |
* item1 item5 weight
|
|
| 162 |
* </pre>
|
|
| 163 |
*
|
|
| 164 |
* <p>
|
|
| 165 |
* By definition the resulting matrix is square and symmetric.
|
|
| 166 |
* </p>
|
|
| 167 |
* <p>
|
|
| 168 |
* Note that the ordering of the items will be as they are encountered in the file.
|
|
| 169 |
*
|
|
| 170 |
* @param stream InputStream
|
|
| 171 |
* @param wantedRowNames Set
|
|
| 172 |
* @return NamedMatrix
|
|
| 173 |
* @throws IOException
|
|
| 174 |
*/
|
|
| 175 | 1 |
public NamedMatrix read( InputStream stream, Set wantedRowNames )
|
| 176 |
throws IOException {
|
|
| 177 |
|
|
| 178 | 1 |
Set itemNames = new HashSet();
|
| 179 | 1 |
Map rows = new HashMap();
|
| 180 |
|
|
| 181 | 1 |
BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) ); |
| 182 |
|
|
| 183 | 1 |
String row; |
| 184 | 1 |
int index = 0;
|
| 185 | 1 |
Map nameIndexMap = new HashMap(); // name --> eventual row index |
| 186 | ? |
while ( ( row = dis.readLine() ) != null ) { |
| 187 | 6 |
StringTokenizer st = new StringTokenizer( row, " \t", false ); |
| 188 |
|
|
| 189 | 6 |
String itemA = "";
|
| 190 |
|
|
| 191 | 6 |
if ( st.hasMoreTokens() ) {
|
| 192 | 6 |
itemA = st.nextToken(); |
| 193 |
|
|
| 194 | 6 |
if ( !itemNames.contains( itemA ) ) {
|
| 195 | 2 |
rows.put( itemA, new HashSet() );
|
| 196 | 2 |
itemNames.add( itemA ); |
| 197 | 2 |
nameIndexMap.put( itemA, new Integer( index ) );
|
| 198 | 2 |
index++; |
| 199 |
} |
|
| 200 |
} else {
|
|
| 201 |
// continue;
|
|
| 202 |
} |
|
| 203 |
|
|
| 204 | 6 |
String itemB = "";
|
| 205 | 6 |
if ( st.hasMoreTokens() ) {
|
| 206 | 6 |
itemB = st.nextToken(); |
| 207 | 6 |
if ( !itemNames.contains( itemB ) ) {
|
| 208 | 1 |
rows.put( itemB, new HashSet() );
|
| 209 | 1 |
itemNames.add( itemB ); |
| 210 | 1 |
nameIndexMap.put( itemB, new Integer( index ) );
|
| 211 | 1 |
index++; |
| 212 |
} |
|
| 213 |
} else {
|
|
| 214 |
// continue;
|
|
| 215 |
} |
|
| 216 |
|
|
| 217 | 6 |
double weight;
|
| 218 | 6 |
if ( st.hasMoreTokens() ) {
|
| 219 | 6 |
weight = Double.parseDouble( st.nextToken() ); |
| 220 |
} else {
|
|
| 221 | 0 |
weight = 1.0; // just make it a binary matrix.
|
| 222 |
} |
|
| 223 |
|
|
| 224 | 6 |
( ( Set ) rows.get( itemA ) ).add( new IndexScoreDyad(
|
| 225 |
( ( Integer ) nameIndexMap.get( itemB ) ).intValue(), weight ) ); |
|
| 226 | 6 |
( ( Set ) rows.get( itemB ) ).add( new IndexScoreDyad(
|
| 227 |
( ( Integer ) nameIndexMap.get( itemA ) ).intValue(), weight ) ); |
|
| 228 |
} |
|
| 229 |
|
|
| 230 | 1 |
SparseDoubleMatrix2DNamed matrix = new SparseDoubleMatrix2DNamed(
|
| 231 |
itemNames.size(), itemNames.size() ); |
|
| 232 |
|
|
| 233 | 1 |
Vector itemVec = new Vector( itemNames );
|
| 234 | 1 |
Collections.sort(itemVec); |
| 235 |
|
|
| 236 | 1 |
matrix.setColumnNames( itemVec ); |
| 237 | 1 |
matrix.setRowNames( itemVec ); |
| 238 | 1 |
for ( Iterator iter = itemNames.iterator(); iter.hasNext(); ) {
|
| 239 | 3 |
String itemA = ( String ) iter.next(); |
| 240 | 3 |
int rowIndex = matrix.getRowIndexByName( itemA );
|
| 241 | 3 |
Set arow = ( Set ) rows.get( itemA ); |
| 242 | 3 |
for ( Iterator iterator = arow.iterator(); iterator.hasNext(); ) {
|
| 243 | 12 |
IndexScoreDyad element = ( IndexScoreDyad ) iterator.next(); |
| 244 | 12 |
int ind = element.getKey();
|
| 245 | 12 |
double weight = element.getValue();
|
| 246 |
|
|
| 247 | 12 |
matrix.setQuick( rowIndex, ind, weight ); |
| 248 | 12 |
matrix.setQuick( ind, rowIndex, weight ); |
| 249 |
} |
|
| 250 |
|
|
| 251 |
} |
|
| 252 |
|
|
| 253 | 1 |
dis.close(); |
| 254 | 1 |
return matrix;
|
| 255 |
} |
|
| 256 |
|
|
| 257 |
/* (non-Javadoc)
|
|
| 258 |
* @see baseCode.io.reader.AbstractNamedMatrixReader#readOneRow(java.io.BufferedReader)
|
|
| 259 |
*/
|
|
| 260 | 0 |
public NamedMatrix readOneRow( BufferedReader dis ) throws IOException { |
| 261 |
// TODO Auto-generated method stub
|
|
| 262 |
// this is impossible for the pair method.
|
|
| 263 | 0 |
throw new UnsupportedOperationException(); |
| 264 |
} |
|
| 265 |
|
|
| 266 |
} |
|
| 267 |
|
|
| 268 |
|
|
| 269 |
|
|
||||||||||