com.ricemap.spateDB.mapred.IndexedPrism.java Source code

Introduction

Here is the source code for com.ricemap.spateDB.mapred.IndexedPrism.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
 * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License is
 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */
package com.ricemap.spateDB.mapred;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
import org.apache.hadoop.net.NetworkTopology;

import com.ricemap.spateDB.core.GlobalIndex;
import com.ricemap.spateDB.core.Partition;
import com.ricemap.spateDB.core.ResultCollector2;
import com.ricemap.spateDB.core.SpatialSite;
import com.ricemap.spateDB.shape.Prism;

class IndexedPrism extends Prism {
    int index;

    public IndexedPrism(int index, Prism r) {
        super(r);
        this.index = index;
    }

    @Override
    public boolean equals(Object obj) {
        return index == ((IndexedPrism) obj).index;
    }
}

/**
 * An input format that reads a pair of files simultaneously and returns
 * a key for one of them and the value as a pair of values.
 * It generates a CombineFileSplit for each pair of blocks returned by the
 * BlockFilter. 
 * @author tonyren, Ahmed Eldawy
 *
 */
public abstract class BinarySpatialInputFormat<K extends Writable, V extends Writable>
        extends FileInputFormat<PairWritable<K>, PairWritable<V>> {

    private static final Log LOG = LogFactory.getLog(BinarySpatialInputFormat.class);

    private static final double SPLIT_SLOP = 1.1; // 10% slop

    @SuppressWarnings("unchecked")
    @Override
    public InputSplit[] getSplits(final JobConf job, int numSplits) throws IOException {
        // Get a list of all input files. There should be exactly two files.
        final Path[] inputFiles = getInputPaths(job);
        GlobalIndex<Partition> gIndexes[] = new GlobalIndex[inputFiles.length];

        BlockFilter blockFilter = null;
        try {
            Class<? extends BlockFilter> blockFilterClass = job.getClass(SpatialSite.FilterClass, null,
                    BlockFilter.class);
            if (blockFilterClass != null) {
                // Get all blocks the user wants to process
                blockFilter = blockFilterClass.newInstance();
                blockFilter.configure(job);
            }
        } catch (InstantiationException e1) {
            e1.printStackTrace();
        } catch (IllegalAccessException e1) {
            e1.printStackTrace();
        }

        if (blockFilter != null) {
            // Extract global indexes from input files

            for (int i_file = 0; i_file < inputFiles.length; i_file++) {
                FileSystem fs = inputFiles[i_file].getFileSystem(job);
                gIndexes[i_file] = SpatialSite.getGlobalIndex(fs, inputFiles[i_file]);
            }
        }

        final Vector<CombineFileSplit> matchedSplits = new Vector<CombineFileSplit>();
        if (gIndexes[0] == null || gIndexes[1] == null) {
            // Join every possible pair (Cartesian product)
            BlockLocation[][] fileBlockLocations = new BlockLocation[inputFiles.length][];
            for (int i_file = 0; i_file < inputFiles.length; i_file++) {
                FileSystem fs = inputFiles[i_file].getFileSystem(job);
                FileStatus fileStatus = fs.getFileStatus(inputFiles[i_file]);
                fileBlockLocations[i_file] = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
            }
            LOG.info("Doing a Cartesian product of blocks: " + fileBlockLocations[0].length + "x"
                    + fileBlockLocations[1].length);
            for (BlockLocation block1 : fileBlockLocations[0]) {
                for (BlockLocation block2 : fileBlockLocations[1]) {
                    FileSplit fsplit1 = new FileSplit(inputFiles[0], block1.getOffset(), block1.getLength(),
                            block1.getHosts());
                    FileSplit fsplit2 = new FileSplit(inputFiles[1], block2.getOffset(), block2.getLength(),
                            block2.getHosts());
                    CombineFileSplit combinedSplit = (CombineFileSplit) FileSplitUtil.combineFileSplits(job,
                            fsplit1, fsplit2);
                    matchedSplits.add(combinedSplit);
                }
            }
        } else {
            // Filter block pairs by the BlockFilter
            blockFilter.selectCellPairs(gIndexes[0], gIndexes[1], new ResultCollector2<Partition, Partition>() {
                @Override
                public void collect(Partition p1, Partition p2) {
                    try {
                        List<FileSplit> splits1 = new ArrayList<FileSplit>();
                        Path path1 = new Path(inputFiles[0], p1.filename);
                        splitFile(job, path1, splits1);

                        List<FileSplit> splits2 = new ArrayList<FileSplit>();
                        Path path2 = new Path(inputFiles[1], p2.filename);
                        splitFile(job, path2, splits2);

                        for (FileSplit split1 : splits1) {
                            for (FileSplit split2 : splits2) {
                                matchedSplits.add(
                                        (CombineFileSplit) FileSplitUtil.combineFileSplits(job, split1, split2));
                            }
                        }

                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            });
        }

        LOG.info("Matched " + matchedSplits.size() + " combine splits");

        // Return all matched splits
        return matchedSplits.toArray(new InputSplit[matchedSplits.size()]);
    }

    public void splitFile(JobConf job, Path path, List<FileSplit> splits) throws IOException {
        NetworkTopology clusterMap = new NetworkTopology();
        FileSystem fs = path.getFileSystem(job);
        FileStatus file = fs.getFileStatus(path);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if (length != 0) {
            long blockSize = file.getBlockSize();
            long splitSize = blockSize;

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize, clusterMap);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize, splitHosts));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

}