com.ricemap.spateDB.core.GridRecordWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.ricemap.spateDB.core.GridRecordWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
 * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License is
 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */
package com.ricemap.spateDB.core;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils;

import com.ricemap.spateDB.shape.Prism;
import com.ricemap.spateDB.shape.Shape;

/**
 * Writes a spatial file where objects are of type S.
 * @author tonyren, Ahmed Eldawy
 *
 * @param <S>
 */
public class GridRecordWriter<S extends Shape> implements ShapeRecordWriter<S> {
    public static final Log LOG = LogFactory.getLog(GridRecordWriter.class);
    /**The spatial boundaries for each cell*/
    protected CellInfo[] cells;

    /**Paths of intermediate files*/
    protected Path[] intermediateCellPath;

    /**An output stream for each grid cell*/
    protected OutputStream[] intermediateCellStreams;

    /**MBR of the records written so far to each cell*/
    protected Prism[] cellsMbr;

    /**Job configuration if part of a MapReduce job*/
    protected JobConf jobConf;

    /**Path of the output directory if not part of a MapReduce job*/
    protected Path outDir;

    /**File system for output path*/
    protected final FileSystem fileSystem;

    /**Temporary text to serialize one object*/
    protected Text text;

    /**Block size for grid file written*/
    protected long blockSize;

    /**A stock object used for serialization/deserialization*/
    protected S stockObject;

    /**An output stream to the master file*/
    protected OutputStream masterFile;

    /**A list of threads closing cells in background*/
    protected ArrayList<Thread> closingThreads;

    /**New line marker to separate records*/
    protected static byte[] NEW_LINE;

    static {
        try {
            NEW_LINE = System.getProperty("line.separator").getBytes("utf-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
    }

    /**A unique prefix to all files written by this writer*/
    protected String prefix;

    /**Pack MBR of each cell around its content after it's written to disk*/
    protected boolean pack;

    /**Expand MBR of each cell to totally cover all of its contents*/
    private boolean expand;
    private int counter;

    /**
     * Creates a new GridRecordWriter that will write all data files to the
     * given directory
     * @param outDir - The directory in which all files will be stored
     * @param job - The MapReduce job associated with this output
     * @param prefix - A unique prefix to be associated with files of this writer
     * @param cells - Cells to partition the file
     * @param pack - After writing each cell, pack its MBR around contents
     * @throws IOException
     */
    public GridRecordWriter(Path outDir, JobConf job, String prefix, CellInfo[] cells, boolean pack, boolean expand)
            throws IOException {
        this.pack = pack;
        this.expand = expand;
        this.prefix = prefix;
        this.fileSystem = outDir == null ? FileOutputFormat.getOutputPath(job).getFileSystem(job)
                : outDir.getFileSystem(job != null ? job : new Configuration());
        this.outDir = outDir;
        this.jobConf = job;

        if (cells != null) {
            // Make sure cellIndex maps to array index. This is necessary for calls that
            // call directly write(int, Text)
            int highest_index = 0;

            for (CellInfo cell : cells) {
                if (cell.cellId > highest_index)
                    highest_index = (int) cell.cellId;
            }

            // Create a master file that contains meta information about partitions
            masterFile = fileSystem.create(getMasterFilePath());

            this.cells = new CellInfo[highest_index + 1];
            for (CellInfo cell : cells)
                this.cells[(int) cell.cellId] = cell;

            // Prepare arrays that hold cells information
            intermediateCellStreams = new OutputStream[this.cells.length];
            intermediateCellPath = new Path[this.cells.length];
            cellsMbr = new Prism[this.cells.length];
        } else {
            intermediateCellStreams = new OutputStream[1];
            intermediateCellPath = new Path[1];
            cellsMbr = new Prism[1];
        }
        for (int i = 0; i < cellsMbr.length; i++) {
            cellsMbr[i] = new Prism(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE,
                    -Double.MAX_VALUE, -Double.MAX_VALUE);
        }

        this.blockSize = job == null ? fileSystem.getDefaultBlockSize(this.outDir)
                : job.getLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, fileSystem.getDefaultBlockSize(this.outDir));

        closingThreads = new ArrayList<Thread>();
        text = new Text();
    }

    protected Path getMasterFilePath() throws IOException {
        String extension;
        if (pack)
            extension = ".r+tree";
        else if (expand)
            extension = ".rtree";
        else
            extension = ".grid";
        return getFilePath("_master" + extension);
    }

    /**
     * Returns a path to a file with the given name in the output directory
     * of the record writer.
     * @param filename
     * @return
     * @throws IOException
     */
    protected Path getFilePath(String filename) throws IOException {
        if (prefix != null)
            filename = prefix + "_" + filename;
        return outDir != null ? new Path(outDir, filename) : FileOutputFormat.getTaskOutputPath(jobConf, filename);
    }

    public void setBlockSize(long _block_size) {
        this.blockSize = _block_size;
    }

    public void setStockObject(S stockObject) {
        this.stockObject = stockObject;
    }

    @Override
    public synchronized void write(NullWritable dummy, S shape) throws IOException {
        if (cells == null) {
            // No cells. Write to the only stream open to this file
            writeInternal(0, shape);
        } else {
            // Check which cells should contain the given shape
            Prism mbr = shape.getMBR();
            for (int cellIndex = 0; cellIndex < cells.length; cellIndex++) {
                if (cells[cellIndex] != null && mbr.isIntersected(cells[cellIndex])) {
                    writeInternal(cellIndex, shape);
                }
            }
        }
    }

    /**
     * Write the given shape to a specific cell. The shape is not replicated to any other cells.
     * It's just written to the given cell. This is useful when shapes are already assigned
     * and replicated to grid cells another way, e.g. from a map phase that partitions.
     * @param cellInfo
     * @param shape
     * @throws IOException
     */
    @Override
    public synchronized void write(CellInfo cellInfo, S shape) throws IOException {
        for (int i_cell = 0; i_cell < cells.length; i_cell++) {
            if (cellInfo.equals(cells[i_cell]))
                write(i_cell, shape);
        }
    }

    @Override
    public void write(int cellId, S shape) throws IOException {
        writeInternal(cellId, shape);
    }

    /**
     * Write the given shape to the cellIndex indicated.
     * @param cellIndex
     * @param shape
     * @throws IOException
     */
    protected synchronized void writeInternal(int cellIndex, S shape) throws IOException {
        if (cellIndex < 0) {
            // A special marker to close a cell
            closeCell(-cellIndex);
            return;
        }
        cellsMbr[cellIndex].expand(shape.getMBR());
        // Convert shape to text
        text.clear();
        shape.toText(text);
        // Write text representation to the file
        OutputStream cellStream = getIntermediateCellStream(cellIndex);
        cellStream.write(text.getBytes(), 0, text.getLength());
        cellStream.write(NEW_LINE);
    }

    /**
     * Returns an output stream in which records are written as they come before
     * they are finally flushed to the cell file.
     * @param cellIndex
     * @return
     * @throws IOException
     */
    protected OutputStream getIntermediateCellStream(int cellIndex) throws IOException {
        if (intermediateCellStreams[cellIndex] == null) {
            // For grid file, we write directly to the final file
            intermediateCellPath[cellIndex] = getFinalCellPath(cellIndex);
            intermediateCellStreams[cellIndex] = createFinalCellStream(intermediateCellPath[cellIndex]);
        }
        return intermediateCellStreams[cellIndex];
    }

    /**
     * Creates an output stream that will be used to write the final cell file
     * @param cellFilePath
     * @return
     * @throws IOException 
     */
    protected OutputStream createFinalCellStream(Path cellFilePath) throws IOException {
        OutputStream cellStream;
        boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);

        if (!isCompressed) {
            // Create new file
            cellStream = fileSystem.create(cellFilePath, true,
                    fileSystem.getConf().getInt("io.file.buffer.size", 4096),
                    fileSystem.getDefaultReplication(outDir), this.blockSize);
        } else {
            Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf,
                    GzipCodec.class);
            // create the named codec
            CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf);

            // Open a stream to the output file
            cellStream = fileSystem.create(cellFilePath, true,
                    fileSystem.getConf().getInt("io.file.buffer.size", 4096),
                    fileSystem.getDefaultReplication(outDir), this.blockSize);

            // Encode the output stream using the codec
            cellStream = new DataOutputStream(codec.createOutputStream(cellStream));
        }

        return cellStream;
    }

    /**
     * Closes (or initiates a close command) for the cell with the given index.
     * Once this method returns, it should be safe to reuse the same cell index
     * to write more data in a new file.
     * @param cellIndex
     * @throws IOException
     */
    protected void closeCell(int cellIndex) throws IOException {
        Prism cell = cells[cellIndex];
        if (expand)
            cell = cellsMbr[cellIndex];
        else if (pack)
            cell = cell.getIntersection(cellsMbr[cellIndex]);

        closeCellBackground(intermediateCellPath[cellIndex], getFinalCellPath(cellIndex),
                intermediateCellStreams[cellIndex], masterFile, cell);
        cellsMbr[cellIndex] = new Prism(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE,
                -Double.MAX_VALUE, -Double.MAX_VALUE);
        intermediateCellPath[cellIndex] = null;
        intermediateCellStreams[cellIndex] = null;
    }

    /**
     * Close the given cell freeing all memory reserved by it.
     * Once a cell is closed, we should not write more data to it.
     * @param cellInfo
     * @throws IOException
     */
    protected void closeCellBackground(final Path intermediateCellPath, final Path finalCellPath,
            final OutputStream intermediateCellStream, final OutputStream masterFile, final Prism cellMbr)
            throws IOException {

        Thread closingThread = new Thread() {
            @Override
            public void run() {
                try {
                    Path finalfinalCellPath = flushAllEntries(intermediateCellPath, intermediateCellStream,
                            finalCellPath);
                    // Write an entry to the master file

                    // Write a line to the master file including file name and cellInfo
                    if (masterFile != null) {
                        Partition partition = new Partition(finalfinalCellPath.getName(), cellMbr);
                        Text line = partition.toText(new Text());
                        masterFile.write(line.getBytes(), 0, line.getLength());
                        masterFile.write(NEW_LINE);
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Error closing thread", e);
                }
            }
        };

        closingThreads.add(closingThread);
        // Remove previously terminated threads
        while (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.TERMINATED) {
            closingThreads.remove(0);
        }
        // Start first thread (if exists)
        if (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.NEW)
            closingThreads.get(0).start();
    }

    /**
     * Flushes all shapes that were written to one cell to the final file.
     * It returns a path to a (closed) file that contains all entries written.
     * @param cellIndex
     * @return
     * @throws IOException
     */
    protected Path flushAllEntries(Path intermediateCellPath, OutputStream intermediateCellStream,
            Path finalCellPath) throws IOException {
        // For global-only indexed file, the intermediate file is the final file
        intermediateCellStream.close();
        return intermediateCellPath;
    }

    /**
     * Close the whole writer. Finalize all cell files and concatenate them
     * into the output file.
     */
    public synchronized void close(Progressable progressable) throws IOException {
        // Close all output files
        for (int cellIndex = 0; cellIndex < intermediateCellStreams.length; cellIndex++) {
            if (intermediateCellStreams[cellIndex] != null) {
                closeCell(cellIndex);
            }
            // Indicate progress. Useful if closing a single cell takes a long time
            if (progressable != null)
                progressable.progress();
        }

        while (!closingThreads.isEmpty()) {
            try {
                Thread t = closingThreads.get(0);
                switch (t.getState()) {
                case NEW:
                    t.start();
                    break;
                case TERMINATED:
                    closingThreads.remove(0);
                    break;
                default:
                    t.join(10000);
                }
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        if (masterFile != null)
            masterFile.close();
    }

    /**
     * Returns path to a file in which the final cell will be written.
     * @param column
     * @param row
     * @return
     * @throws IOException 
     */
    protected Path getFinalCellPath(int cellIndex) throws IOException {
        Path path = null;
        do {
            String filename = counter == 0 ? String.format("data_%05d", cellIndex)
                    : String.format("data_%05d_%d", cellIndex, counter);
            boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);
            if (isCompressed) {
                Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf,
                        GzipCodec.class);
                // create the named codec
                CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf);
                filename += codec.getDefaultExtension();
            }

            path = getFilePath(filename);
            counter++;
        } while (fileSystem.exists(path));
        return path;
    }
}