Clover coverage report - baseCode - 0.2.5
Coverage timestamp: Tue Apr 12 2005 11:31:58 EDT
file stats: LOC: 414   Methods: 8
NCLOC: 185   Classes: 1
30 day Evaluation Version distributed via the Maven Jar Repository. Clover is not free. You have 30 days to evaluate it. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover
 
 Source file Conditionals Statements Methods TOTAL
SparseRaggedDouble2DNamedMatrixReader.java 65.8% 80.8% 37.5% 75.3%
coverage coverage
 1   
 package baseCode.io.reader;
 2   
 
 3   
 import java.io.BufferedReader;
 4   
 import java.io.FileInputStream;
 5   
 import java.io.IOException;
 6   
 import java.io.InputStream;
 7   
 import java.io.InputStreamReader;
 8   
 import java.util.HashMap;
 9   
 import java.util.HashSet;
 10   
 import java.util.Map;
 11   
 import java.util.Set;
 12   
 import java.util.StringTokenizer;
 13   
 
 14   
 import baseCode.dataStructure.matrix.NamedMatrix;
 15   
 import baseCode.dataStructure.matrix.RCDoubleMatrix1D;
 16   
 import baseCode.dataStructure.matrix.SparseRaggedDoubleMatrix2DNamed;
 17   
 import baseCode.util.FileTools;
 18   
 import cern.colt.list.DoubleArrayList;
 19   
 import cern.colt.list.IntArrayList;
 20   
 import cern.colt.map.OpenIntDoubleHashMap;
 21   
 import cern.colt.map.OpenIntIntHashMap;
 22   
 import cern.colt.map.OpenIntObjectHashMap;
 23   
 import cern.colt.matrix.DoubleMatrix1D;
 24   
 
 25   
 /**
 26   
  * Best data structure for reading really big, really sparse matrices when a matrix represetation is needed. *
 27   
  * <p>
 28   
  * The standard format looks like this:
 29   
  * 
 30   
  * <pre>
 31   
  * 
 32   
  *  
 33   
  *   
 34   
  *    
 35   
  *     
 36   
  *      
 37   
  *       
 38   
  *        
 39   
  *         
 40   
  *          
 41   
  *           
 42   
  *            
 43   
  *             
 44   
  *              
 45   
  *               
 46   
  *                
 47   
  *                                         2          &lt;--- number of items - the first line of the file only. NOTE - this line is often blank or not present.
 48   
  *                                         1 2        &lt;--- items 1 has 2 edges
 49   
  *                                         1 2        &lt;--- edge indices are to items 1 &amp; 2
 50   
  *                                         0.1 100    &lt;--- with the following weights
 51   
  *                                         2 2        &lt;--- items 2 also has 2 edges
 52   
  *                                         1 2        &lt;--- edge indices are also to items 1 &amp; 2 (fully connected)
 53   
  *                                         100 0.1    &lt;--- with the following weights
 54   
  *                
 55   
  *               
 56   
  *              
 57   
  *             
 58   
  *            
 59   
  *           
 60   
  *          
 61   
  *         
 62   
  *        
 63   
  *       
 64   
  *      
 65   
  *     
 66   
  *    
 67   
  *   
 68   
  *  
 69   
  * </pre>
 70   
  * 
 71   
  * <hr>
 72   
  * <p>
 73   
  * Copyright (c) 2004 Columbia University
 74   
  * 
 75   
  * @author pavlidis
 76   
  * @version $Id: SparseRaggedDouble2DNamedMatrixReader.java,v 1.22 2005/01/05 02:01:02 pavlidis Exp $
 77   
  */
 78   
 public class SparseRaggedDouble2DNamedMatrixReader extends
 79   
       AbstractNamedMatrixReader {
 80   
 
 81   
    /**
 82   
     * Read a sparse symmetric square matrix that is expressed as an adjacency list in a tab-delimited file:
 83   
     * 
 84   
     * <pre>
 85   
     * 
 86   
     *  
 87   
     *   
 88   
     *    
 89   
     *     
 90   
     *      
 91   
     *       
 92   
     *        
 93   
     *         
 94   
     *          
 95   
     *           
 96   
     *            
 97   
     *                           item1 item2 weight
 98   
     *                           item1 item5 weight
 99   
     *             
 100   
     *            
 101   
     *           
 102   
     *          
 103   
     *         
 104   
     *        
 105   
     *       
 106   
     *      
 107   
     *     
 108   
     *    
 109   
     *   
 110   
     *  
 111   
     * </pre>
 112   
     * 
 113   
     * <p>
 114   
     * IMPORTANT: By definition the resulting matrix is square and symmetric, even if the symmetric edges are not
 115   
     * explicitly listed.
 116   
     * 
 117   
     * @param name of file
 118   
     * @return
 119   
     */
 120  0
    public NamedMatrix readFromAdjList( String fileName ) throws IOException {
 121  0
       if ( !FileTools.testFile( fileName ) ) {
 122  0
          throw new IOException( "Could not read from file " + fileName );
 123   
       }
 124  0
       FileInputStream stream = new FileInputStream( fileName );
 125  0
       return readFromAdjList( stream );
 126   
    }
 127   
 
 128   
    /**
 129   
     * @throws IOException
 130   
     * @throws NumberFormatException Read a sparse symmetric square matrix that is expressed as an adjacency list in a
 131   
     *         tab-delimited file:
 132   
     * 
 133   
     * <pre>
 134   
     * 
 135   
     *  
 136   
     *   
 137   
     *    
 138   
     *     
 139   
     *      
 140   
     *       
 141   
     *        
 142   
     *         
 143   
     *          
 144   
     *           
 145   
     *            
 146   
     *              item1 item2 weight
 147   
     *              item1 item5 weight
 148   
     *             
 149   
     *            
 150   
     *           
 151   
     *          
 152   
     *         
 153   
     *        
 154   
     *       
 155   
     *      
 156   
     *     
 157   
     *    
 158   
     *   
 159   
     *  
 160   
     * </pre>
 161   
     * 
 162   
     * <p>
 163   
     *         IMPORTANT: By definition the resulting matrix is square and symmetric, even if the symmetric edges are not
 164   
     *         explicitly listed.
 165   
     * @param stream
 166   
     * @return
 167   
     */
 168  2
    public NamedMatrix readFromAdjList( InputStream stream )
 169   
          throws NumberFormatException, IOException {
 170  2
       Set itemNames = new HashSet();
 171  2
       Map rows = new HashMap();
 172   
 
 173  2
       BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
 174   
 
 175  2
       OpenIntObjectHashMap indexNameMap = new OpenIntObjectHashMap(); // eventual row index --> name
 176  2
       Map nameIndexMap = new HashMap(); // name --> eventual row index
 177   
 
 178   
       /*
 179   
        * Store the information about the matrix in a temporary set of data structures, the most important of which is a
 180   
        * map of nodes to edge information. Each edge information object contains the index and the weight of the edge.
 181   
        */
 182  2
       String row;
 183  2
       int index = 0;
 184  ?
       while ( ( row = dis.readLine() ) != null ) {
 185  11742
          StringTokenizer st = new StringTokenizer( row, " \t", false );
 186   
 
 187  11742
          String itemA = "";
 188  11742
          if ( st.hasMoreTokens() ) {
 189  11742
             itemA = st.nextToken();
 190  11742
             if ( !itemNames.contains( itemA ) ) {
 191  490
                rows.put( itemA, new OpenIntDoubleHashMap() );
 192  490
                itemNames.add( itemA );
 193  490
                indexNameMap.put( index, itemA );
 194  490
                nameIndexMap.put( itemA, new Integer( index ) );
 195  490
                ( ( OpenIntDoubleHashMap ) rows.get( itemA ) ).put( index, 0 ); // to itself. - in case it isn't there.
 196  490
                index++;
 197   
             }
 198   
          } else
 199  0
             continue;
 200   
 
 201  11742
          String itemB = "";
 202  11742
          if ( st.hasMoreTokens() ) {
 203  11742
             itemB = st.nextToken();
 204  11742
             if ( !itemNames.contains( itemB ) ) {
 205  1271
                rows.put( itemB, new OpenIntDoubleHashMap() );
 206  1271
                itemNames.add( itemB );
 207  1271
                indexNameMap.put( index, itemB );
 208  1271
                nameIndexMap.put( itemB, new Integer( index ) );
 209  1271
                ( ( OpenIntDoubleHashMap ) rows.get( itemB ) ).put( index, 0 ); // to itself. - in case it isn't there.
 210  1271
                index++;
 211   
             }
 212   
          } else
 213  0
             continue;
 214   
 
 215  11742
          double weight;
 216  11742
          if ( st.hasMoreTokens() ) {
 217  11742
             weight = Double.parseDouble( st.nextToken() );
 218   
          } else {
 219  0
             weight = 1.0; // just make it a binary matrix.
 220   
          }
 221   
 
 222  11742
          int aind = ( ( Integer ) nameIndexMap.get( itemA ) ).intValue();
 223  11742
          int bind = ( ( Integer ) nameIndexMap.get( itemB ) ).intValue();
 224   
 
 225   
      //    if (itemA.equals("CYP4A11") || itemB.equals("CYP4A11")) 
 226   
    //      System.err.println( itemA + " " + itemB + " " + aind + " " + bind );
 227   
 
 228  11742
          ( ( OpenIntDoubleHashMap ) rows.get( itemA ) ).put( bind, weight ); // link a to b.
 229  11742
          ( ( OpenIntDoubleHashMap ) rows.get( itemB ) ).put( aind, weight ); // link b to a.
 230   
          
 231  11742
          if ( ( rows.size() % 500 ) == 0 ) {
 232  3
             log.info( new String( "loading  " + index + "th pair" ) );
 233   
          }
 234   
       }
 235  2
       dis.close();
 236   
 
 237  2
       SparseRaggedDoubleMatrix2DNamed matrix = new SparseRaggedDoubleMatrix2DNamed();
 238   
 
 239  2
       for ( int i = 0; i < indexNameMap.size(); i++ ) {
 240  1761
          String itemName = ( String ) indexNameMap.get( i );
 241   
 
 242  1761
          OpenIntDoubleHashMap arow = ( OpenIntDoubleHashMap ) rows
 243   
                .get( itemName );
 244   
 
 245  1761
          DoubleArrayList finalValues = new DoubleArrayList( arow.size() );
 246   
 
 247   
     //     System.err.println( itemName + " has " + arow.size() + " links" );
 248  1761
          IntArrayList inB = arow.keys();
 249  1761
          inB.sort();
 250  1761
          int[] rowMemberIndexes = inB.elements();
 251   
        //  System.err.println( itemName + " " + i + " " + inB );
 252   
          
 253  1761
          for ( int j = 0; j < rowMemberIndexes.length; j++ ) {
 254  25237
             int itemNumber = rowMemberIndexes[j]; // keys
 255  25237
             double weight = arow.get( itemNumber );
 256  25237
             finalValues.add( weight );
 257   
          }
 258   
 
 259  1761
          DoubleMatrix1D rowMatrix = new RCDoubleMatrix1D( inB, finalValues );
 260  1761
          matrix.addRow( itemName, rowMatrix );
 261   
 
 262  1761
          if ( i > 0 && ( i % 500 ) == 0 ) {
 263  3
             log.info( new String( "Adding  " + i + "th row" ) );
 264   
          }
 265   
       }
 266  2
       return matrix;
 267   
    }
 268   
 
 269   
    /*
 270   
     * (non-Javadoc)
 271   
     * 
 272   
     * @see baseCode.io.reader.AbstractNamedMatrixReader#read(java.lang.String)
 273   
     */
 274  0
    public NamedMatrix read( String fileName ) throws IOException {
 275  0
       if ( !FileTools.testFile( fileName ) ) {
 276  0
          throw new IOException( "Could not read from file " + fileName );
 277   
       }
 278  0
       FileInputStream stream = new FileInputStream( fileName );
 279  0
       return read( stream );
 280   
    }
 281   
 
 282  0
    public NamedMatrix readOneRow( BufferedReader dis ) throws IOException {
 283  0
       return this.readOneRow( dis, 0 );
 284   
    }
 285   
 
 286   
    /**
 287   
     * Use this to read one row from a matrix (JW format). It does not close the reader. (this actually has to read
 288   
     * several lines to get the data for one matrix row)
 289   
     * 
 290   
     * @param stream
 291   
     * @param offset A value indicating the lowest value for the indexes listed. This is here in case the indexes in the
 292   
     *        stream are numbered starting from 1 instead of zero.
 293   
     * @return @throws IOException
 294   
     */
 295  0
    public NamedMatrix readOneRow( BufferedReader dis, int offset )
 296   
          throws IOException {
 297  0
       SparseRaggedDoubleMatrix2DNamed returnVal = new SparseRaggedDoubleMatrix2DNamed();
 298   
 
 299  0
       String row = dis.readLine(); // line containing the id and the number of edges.
 300  0
       StringTokenizer tok = new StringTokenizer( row, " \t" );
 301   
 
 302  0
       int index = Integer.parseInt( tok.nextToken() );
 303  0
       int amount = Integer.parseInt( tok.nextToken() );
 304  0
       String rowName = new Integer( index ).toString();
 305  0
       returnVal.addRow( rowName, readOneRow( dis, amount, offset ) );
 306  0
       return returnVal;
 307   
    }
 308   
 
 309   
    /**
 310   
     * Read an entire sparse matrix from a stream (JW format).
 311   
     * 
 312   
     * @param stream
 313   
     * @return @throws IOException
 314   
     */
 315  0
    public NamedMatrix read( InputStream stream ) throws IOException {
 316  0
       return this.read( stream, 0 );
 317   
    }
 318   
 
 319   
    /**
 320   
     * Read an entire sparse matrix from a stream (JW format).
 321   
     * 
 322   
     * @param stream
 323   
     * @param offset A value indicating the lowest value for the indexes listed. This is here in case the indexes in the
 324   
     *        stream are numbered starting from 1 instead of zero.
 325   
     * @return @throws IOException
 326   
     */
 327  6
    public NamedMatrix read( InputStream stream, int offset ) throws IOException {
 328  6
       BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
 329  6
       SparseRaggedDoubleMatrix2DNamed returnVal = new SparseRaggedDoubleMatrix2DNamed();
 330   
 
 331  6
       String row;
 332  6
       int k = 1;
 333   
 
 334  ?
       while ( ( row = dis.readLine() ) != null ) {
 335   
 
 336  24
          if ( row.equals( "" ) ) { // in case there is a blank line at the top.
 337  0
             continue;
 338   
          }
 339   
 
 340  24
          StringTokenizer tok = new StringTokenizer( row, " \t" );
 341  24
          if ( tok.countTokens() != 2 ) { // in case the row count is there.
 342  6
             continue;
 343   
          }
 344   
 
 345  18
          int index = Integer.parseInt( tok.nextToken() ) - offset;
 346  18
          int amount = Integer.parseInt( tok.nextToken() );
 347   
 
 348  18
          if ( ( index % 500 ) == 0 ) {
 349  6
             log.info( new String( "loading  " + index + "th entry" ) );
 350   
          }
 351   
 
 352  18
          returnVal.addRow( new Integer( k ).toString(), readOneRow( dis,
 353   
                amount, offset ) );
 354   
 
 355  18
          k++;
 356   
       }
 357   
 
 358  6
       dis.close();
 359  6
       return returnVal;
 360   
    }
 361   
 
 362  18
    private DoubleMatrix1D readOneRow( BufferedReader dis, int amount, int offset )
 363   
          throws IOException {
 364   
 
 365   
       /*
 366   
        * we have to be careful to skip any lines that invalid. Each line should have at least two characters. In the
 367   
        * files JW provided there are some lines that are just " ".
 368   
        */
 369  18
       String rowInd = "";
 370  18
       String rowWei = "";
 371   
 
 372   
       //     while ( rowInd.length() < 2 ) {
 373  18
       rowInd = dis.readLine(); // row with indices.
 374   
       //    }
 375   
 
 376   
       //    while ( rowWei.length() < 2 ) {
 377  18
       rowWei = dis.readLine(); // row with weights.
 378   
       //    }
 379   
 
 380  18
       StringTokenizer tokw = new StringTokenizer( rowWei, " \t" );
 381  18
       StringTokenizer toki = new StringTokenizer( rowInd, " \t" );
 382   
 
 383  18
       OpenIntIntHashMap map = new OpenIntIntHashMap( amount, 0.4, 0.8 );
 384  18
       DoubleArrayList values = new DoubleArrayList( amount );
 385  18
       DoubleArrayList finalValues = new DoubleArrayList( amount );
 386   
 
 387  18
       int i = 0;
 388  18
       while ( toki.hasMoreTokens() ) {
 389   
 
 390  36
          double weight = Double.parseDouble( tokw.nextToken() );
 391  36
          int ind = Integer.parseInt( toki.nextToken() ) - offset;
 392   
 
 393  36
          if ( ind < 0 ) {
 394  0
             throw new IllegalStateException(
 395   
                   "Can't have negative index - check offset." );
 396   
          }
 397   
 
 398  36
          map.put( ind, i );
 399  36
          values.add( weight );
 400  36
          i++;
 401   
       }
 402   
 
 403  18
       IntArrayList indexes = map.keys();
 404  18
       indexes.sort();
 405  18
       int[] ix = indexes.elements();
 406  18
       int size = ix.length;
 407  18
       for ( int j = 0; j < size; j++ ) {
 408  36
          finalValues.add( values.get( map.get( ix[j] ) ) );
 409   
       }
 410   
 
 411  18
       return new RCDoubleMatrix1D( indexes, finalValues );
 412   
    }
 413   
 
 414   
 }