com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

Source

/**
 * (C) Copyright IBM Corp. 2010, 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
*/

package com.ibm.bi.dml.runtime.io;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.JobConf;

import com.ibm.bi.dml.conf.ConfigurationManager;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException;
import com.ibm.bi.dml.runtime.matrix.data.IJV;
import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock;
import com.ibm.bi.dml.runtime.matrix.data.SparseRowsIterator;
import com.ibm.bi.dml.runtime.util.MapReduceTool;

/**
 * 
 */
public class WriterMatrixMarket extends MatrixWriter {
    @Override
    public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
            long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
        //validity check matrix dimensions
        if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
            throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x"
                    + src.getNumColumns() + " vs " + rlen + "x" + clen + ".");
        }

        //prepare file access
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);

        //if the file already exists on HDFS, remove it.
        MapReduceTool.deleteFileIfExistOnHDFS(fname);

        //core write
        writeMatrixMarketMatrixToHDFS(path, job, src, rlen, clen, nnz);
    }

    @Override
    public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen)
            throws IOException, DMLRuntimeException {
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());

        FSDataOutputStream writer = fs.create(path);
        writer.writeBytes("1 1 0");
        writer.close();
    }

    /**
     * 
     * @param fileName
     * @param src
     * @param rlen
     * @param clen
     * @param nnz
     * @throws IOException
     */
    protected void writeMatrixMarketMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen,
            long nnz) throws IOException {
        boolean sparse = src.isInSparseFormat();
        boolean entriesWritten = false;
        FileSystem fs = FileSystem.get(job);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

        int rows = src.getNumRows();
        int cols = src.getNumColumns();

        //bound check per block
        if (rows > rlen || cols > clen) {
            throw new IOException("Matrix block [1:" + rows + ",1:" + cols + "] "
                    + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        }

        try {
            //for obj reuse and preventing repeated buffer re-allocations
            StringBuilder sb = new StringBuilder();

            // First output MM header
            sb.append("%%MatrixMarket matrix coordinate real general\n");

            // output number of rows, number of columns and number of nnz
            sb.append(rlen + " " + clen + " " + nnz + "\n");
            br.write(sb.toString());
            sb.setLength(0);

            // output matrix cell
            if (sparse) //SPARSE
            {
                SparseRowsIterator iter = src.getSparseRowsIterator();
                while (iter.hasNext()) {
                    IJV cell = iter.next();

                    sb.append(cell.i + 1);
                    sb.append(' ');
                    sb.append(cell.j + 1);
                    sb.append(' ');
                    sb.append(cell.v);
                    sb.append('\n');
                    br.write(sb.toString()); //same as append
                    sb.setLength(0);
                    entriesWritten = true;
                }
            } else //DENSE
            {
                for (int i = 0; i < rows; i++) {
                    String rowIndex = Integer.toString(i + 1);
                    for (int j = 0; j < cols; j++) {
                        double lvalue = src.getValueDenseUnsafe(i, j);
                        if (lvalue != 0) //for nnz
                        {
                            sb.append(rowIndex);
                            sb.append(' ');
                            sb.append(j + 1);
                            sb.append(' ');
                            sb.append(lvalue);
                            sb.append('\n');
                            br.write(sb.toString()); //same as append
                            sb.setLength(0);
                            entriesWritten = true;
                        }
                    }
                }
            }

            //handle empty result
            if (!entriesWritten) {
                br.write("1 1 0\n");
            }
        } finally {
            IOUtilFunctions.closeSilently(br);
        }
    }

    /**
     * 
     * @param srcFileName
     * @param fileName
     * @param rlen
     * @param clen
     * @param nnz
     * @throws IOException
     */
    public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
            throws IOException {
        Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

        Path src = new Path(srcFileName);
        Path merge = new Path(fileName);
        FileSystem hdfs = FileSystem.get(conf);

        if (hdfs.exists(merge)) {
            hdfs.delete(merge, true);
        }

        OutputStream out = hdfs.create(merge, true);

        // write out the header first 
        StringBuilder sb = new StringBuilder();
        sb.append("%%MatrixMarket matrix coordinate real general\n");

        // output number of rows, number of columns and number of nnz
        sb.append(rlen + " " + clen + " " + nnz + "\n");
        out.write(sb.toString().getBytes());

        // if the source is a directory
        if (hdfs.getFileStatus(src).isDirectory()) {
            try {
                FileStatus[] contents = hdfs.listStatus(src);
                for (int i = 0; i < contents.length; i++) {
                    if (!contents[i].isDirectory()) {
                        InputStream in = hdfs.open(contents[i].getPath());
                        try {
                            IOUtils.copyBytes(in, out, conf, false);
                        } finally {
                            IOUtilFunctions.closeSilently(in);
                        }
                    }
                }
            } finally {
                IOUtilFunctions.closeSilently(out);
            }
        } else if (hdfs.isFile(src)) {
            InputStream in = null;
            try {
                in = hdfs.open(src);
                IOUtils.copyBytes(in, out, conf, true);
            } finally {
                IOUtilFunctions.closeSilently(in);
                IOUtilFunctions.closeSilently(out);
            }
        } else {
            throw new IOException(src.toString() + ": No such file or directory");
        }
    }
}