edu.umn.cs.spatialHadoop.operations.ConvexHull.java Source code

Java tutorial

Introduction

Here is the source code for edu.umn.cs.spatialHadoop.operations.ConvexHull.java

Source

/***********************************************************************
* Copyright (c) 2015 by Regents of the University of Minnesota.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Apache License, Version 2.0 which 
* accompanies this distribution and is available at
* http://www.opensource.org/licenses/apache2.0.php.
*
*************************************************************************/
package edu.umn.cs.spatialHadoop.operations;

import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import java.util.Vector;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.util.GenericOptionsParser;

import edu.umn.cs.spatialHadoop.OperationsParams;
import edu.umn.cs.spatialHadoop.core.GridRecordWriter;
import edu.umn.cs.spatialHadoop.core.Point;
import edu.umn.cs.spatialHadoop.core.Rectangle;
import edu.umn.cs.spatialHadoop.core.ResultCollector;
import edu.umn.cs.spatialHadoop.core.Shape;
import edu.umn.cs.spatialHadoop.core.SpatialSite;
import edu.umn.cs.spatialHadoop.indexing.GlobalIndex;
import edu.umn.cs.spatialHadoop.indexing.Partition;
import edu.umn.cs.spatialHadoop.mapred.BlockFilter;
import edu.umn.cs.spatialHadoop.mapred.DefaultBlockFilter;
import edu.umn.cs.spatialHadoop.mapred.GridOutputFormat2;
import edu.umn.cs.spatialHadoop.mapred.ShapeInputFormat;
import edu.umn.cs.spatialHadoop.mapreduce.RTreeRecordReader3;
import edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3;
import edu.umn.cs.spatialHadoop.mapreduce.SpatialRecordReader3;
import edu.umn.cs.spatialHadoop.nasa.HDFRecordReader;
import edu.umn.cs.spatialHadoop.util.MemoryReporter;
import edu.umn.cs.spatialHadoop.util.Parallel;
import edu.umn.cs.spatialHadoop.util.Parallel.RunnableRange;

/**
 * Computes the convex hull for a set of shapes
 * @author Ahmed Eldawy
 *
 */
public class ConvexHull {

    private static final Log LOG = LogFactory.getLog(ConvexHull.class);

    /**
     * Computes the convex hull of a set of points using a divide and conquer
     * in-memory algorithm. This function implements Andrew's modification to
     * the Graham scan algorithm.
     * 
     * @param points
     * @return
     */
    public static <P extends Point> P[] convexHullInMemory(P[] points) {
        Stack<P> s1 = new Stack<P>();
        Stack<P> s2 = new Stack<P>();

        Arrays.sort(points);

        // Lower chain
        for (int i = 0; i < points.length; i++) {
            while (s1.size() > 1) {
                P p1 = s1.get(s1.size() - 2);
                P p2 = s1.get(s1.size() - 1);
                P p3 = points[i];
                double crossProduct = (p2.x - p1.x) * (p3.y - p1.y) - (p2.y - p1.y) * (p3.x - p1.x);
                if (crossProduct <= 0)
                    s1.pop();
                else
                    break;
            }
            s1.push(points[i]);
        }

        // Upper chain
        for (int i = points.length - 1; i >= 0; i--) {
            while (s2.size() > 1) {
                P p1 = s2.get(s2.size() - 2);
                P p2 = s2.get(s2.size() - 1);
                P p3 = points[i];
                double crossProduct = (p2.x - p1.x) * (p3.y - p1.y) - (p2.y - p1.y) * (p3.x - p1.x);
                if (crossProduct <= 0)
                    s2.pop();
                else
                    break;
            }
            s2.push(points[i]);
        }

        s1.pop();
        s2.pop();
        s1.addAll(s2);
        return s1.toArray((P[]) Array.newInstance(s1.firstElement().getClass(), s1.size()));
    }

    /**
     * Computes the convex hull of an input file using a single machine algorithm.
     * The output is written to the output file. If output file is null, the
     * output is just thrown away.
     * @param inFile
     * @param outFile
     * @param params
     * @throws IOException
     * @throws InterruptedException
     */
    public static void convexHullLocal(Path inFile, Path outFile, final OperationsParams params)
            throws IOException, InterruptedException {
        if (params.getBoolean("mem", false))
            MemoryReporter.startReporting();
        // 1- Split the input path/file to get splits that can be processed
        // independently
        final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
        Job job = Job.getInstance(params);
        SpatialInputFormat3.setInputPaths(job, inFile);
        final List<InputSplit> splits = inputFormat.getSplits(job);

        // 2- Read all input points in memory
        LOG.info("Reading points from " + splits.size() + " splits");
        List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() {
            @Override
            public Point[] run(int i1, int i2) {
                try {
                    List<Point> finalPoints = new ArrayList<Point>();
                    final int MaxSize = 100000;
                    Point[] points = new Point[MaxSize];
                    int size = 0;
                    for (int i = i1; i < i2; i++) {
                        org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                                .get(i);
                        final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                                .createRecordReader(fsplit, null);
                        if (reader instanceof SpatialRecordReader3) {
                            ((SpatialRecordReader3) reader).initialize(fsplit, params);
                        } else if (reader instanceof RTreeRecordReader3) {
                            ((RTreeRecordReader3) reader).initialize(fsplit, params);
                        } else if (reader instanceof HDFRecordReader) {
                            ((HDFRecordReader) reader).initialize(fsplit, params);
                        } else {
                            throw new RuntimeException("Unknown record reader");
                        }
                        while (reader.nextKeyValue()) {
                            Iterable<Point> pts = reader.getCurrentValue();
                            for (Point p : pts) {
                                points[size++] = p.clone();
                                if (size >= points.length) {
                                    // Perform convex hull and write the result to finalPoints
                                    Point[] chPoints = convexHullInMemory(points);
                                    for (Point skylinePoint : chPoints)
                                        finalPoints.add(skylinePoint);
                                    size = 0; // reset
                                }
                            }
                        }
                        reader.close();
                    }
                    while (size-- > 0)
                        finalPoints.add(points[size]);
                    return finalPoints.toArray(new Point[finalPoints.size()]);
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
                return null;
            }
        }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

        int totalNumPoints = 0;
        for (Point[] list : allLists)
            totalNumPoints += list.length;

        LOG.info("Read " + totalNumPoints + " points and merging into one list");
        Point[] allPoints = new Point[totalNumPoints];
        int pointer = 0;

        for (Point[] list : allLists) {
            System.arraycopy(list, 0, allPoints, pointer, list.length);
            pointer += list.length;
        }
        allLists.clear(); // To the let the GC collect it

        Point[] ch = convexHullInMemory(allPoints);

        if (outFile != null) {
            if (params.getBoolean("overwrite", false)) {
                FileSystem outFs = outFile.getFileSystem(new Configuration());
                outFs.delete(outFile, true);
            }
            GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null);
            for (Point pt : ch) {
                out.write(NullWritable.get(), pt);
            }
            out.close(null);
        }
    }

    /**
     * Filters partitions to remove ones that do not contribute to answer.
     * A partition is pruned if it does not have any points in any of the four
     * skylines.
     * @author Ahmed Eldawy
     *
     */
    public static class ConvexHullFilter extends DefaultBlockFilter {

        @Override
        public void selectCells(GlobalIndex<Partition> gIndex, ResultCollector<Partition> output) {
            Set<Partition> non_dominated_partitions_all = new HashSet<Partition>();
            for (OperationsParams.Direction dir : OperationsParams.Direction.values()) {
                Vector<Partition> non_dominated_partitions = new Vector<Partition>();
                for (Partition p : gIndex) {
                    boolean dominated = false;
                    int i = 0;
                    while (!dominated && i < non_dominated_partitions.size()) {
                        Partition p2 = non_dominated_partitions.get(i);
                        dominated = Skyline.skylineDominate(p2, p, dir, gIndex.isCompact());

                        // Check if the new partition dominates the previously selected one
                        if (Skyline.skylineDominate(p, p2, dir, gIndex.isCompact())) {
                            // p2 is no longer non-dominated
                            non_dominated_partitions.remove(i);
                        } else {
                            // Skip to next non-dominated partition
                            i++;
                        }
                    }
                    if (!dominated) {
                        non_dominated_partitions.add(p);
                    }
                }
                non_dominated_partitions_all.addAll(non_dominated_partitions);
            }

            LOG.info("Processing " + non_dominated_partitions_all.size() + " out of " + gIndex.size()
                    + " partition");
            System.out.println("Processing " + non_dominated_partitions_all.size() + " out of " + gIndex.size()
                    + " partition");
            // Output all non-dominated partitions
            for (Partition p : non_dominated_partitions_all) {
                output.collect(p);
            }
        }
    }

    /**
     * An identity map function that returns values as-is with a null key. This
     * ensures that all values are reduced in one reducer.
     * @author Ahmed Eldawy
     */
    public static class IdentityMapper extends MapReduceBase
            implements Mapper<Rectangle, Point, NullWritable, Point> {
        @Override
        public void map(Rectangle dummy, Point point, OutputCollector<NullWritable, Point> output,
                Reporter reporter) throws IOException {
            output.collect(NullWritable.get(), point);
        }

    }

    public static class ConvexHullReducer extends MapReduceBase
            implements Reducer<NullWritable, Point, NullWritable, Point> {

        @Override
        public void reduce(NullWritable dummy, Iterator<Point> points, OutputCollector<NullWritable, Point> output,
                Reporter reporter) throws IOException {
            Vector<Point> vpoints = new Vector<Point>();
            while (points.hasNext()) {
                vpoints.add(points.next().clone());
            }
            Point[] convex_hull = convexHullInMemory(vpoints.toArray(new Point[vpoints.size()]));
            for (Point pt : convex_hull) {
                output.collect(dummy, pt);
            }
        }
    }

    public static void convexHullMapReduce(Path inFile, Path userOutPath, OperationsParams params)
            throws IOException {
        JobConf job = new JobConf(params, ConvexHull.class);
        Path outPath = userOutPath;
        FileSystem outFs = (userOutPath == null ? inFile : userOutPath).getFileSystem(job);
        Shape shape = params.getShape("shape");

        if (outPath == null) {
            do {
                outPath = new Path(inFile.toUri().getPath() + ".convex_hull_" + (int) (Math.random() * 1000000));
            } while (outFs.exists(outPath));
        } else {
            if (outFs.exists(outPath)) {
                if (params.getBoolean("overwrite", false)) {
                    outFs.delete(outPath, true);
                } else {
                    throw new RuntimeException("Output path already exists and -overwrite flag is not set");
                }
            }
        }

        job.setJobName("ConvexHull");
        job.setClass(SpatialSite.FilterClass, ConvexHullFilter.class, BlockFilter.class);
        job.setMapperClass(IdentityMapper.class);
        job.setCombinerClass(ConvexHullReducer.class);
        job.setReducerClass(ConvexHullReducer.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(shape.getClass());
        job.setInputFormat(ShapeInputFormat.class);
        ShapeInputFormat.addInputPath(job, inFile);
        job.setOutputFormat(GridOutputFormat2.class);
        GridOutputFormat2.setOutputPath(job, outPath);

        JobClient.runJob(job);

        // If outputPath not set by user, automatically delete it
        if (userOutPath == null)
            outFs.delete(outPath, true);
    }

    private static void printUsage() {
        System.out.println("Computes the convex hull of an input file of shapes");
        System.out.println("Parameters: (* marks required parameters)");
        System.out.println("<input file>: (*) Path to input file");
        System.out.println("<output file>: Path to output file");
        System.out.println("-overwrite: Overwrite output file without notice");

        GenericOptionsParser.printGenericCommandUsage(System.out);
    }

    public static void convexHull(Path inFile, Path outFile, OperationsParams params)
            throws IOException, InterruptedException {
        if (OperationsParams.isLocal(params, inFile)) {
            // Process without MapReduce
            convexHullLocal(inFile, outFile, params);
        } else {
            // Process with MapReduce
            convexHullMapReduce(inFile, outFile, params);
        }
    }

    public static void main(String[] args) throws IOException, InterruptedException {
        OperationsParams params = new OperationsParams(new GenericOptionsParser(args));
        Path[] paths = params.getPaths();
        if (paths.length <= 1 && !params.checkInput()) {
            printUsage();
            System.exit(1);
        }
        if (paths.length >= 2 && !params.checkInputOutput()) {
            printUsage();
            System.exit(1);
        }
        Path inFile = params.getInputPath();
        Path outFile = params.getOutputPath();

        long t1 = System.currentTimeMillis();
        convexHull(inFile, outFile, params);
        long t2 = System.currentTimeMillis();
        System.out.println("Total time: " + (t2 - t1) + " millis");
    }

}