edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java Source code

Java tutorial

Introduction

Here is the source code for edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java

Source

/***********************************************************************
* Copyright (c) 2015 by Regents of the University of Minnesota.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Apache License, Version 2.0 which 
* accompanies this distribution and is available at
* http://www.opensource.org/licenses/apache2.0.php.
*
*************************************************************************/
package edu.umn.cs.spatialHadoop;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.GenericOptionsParser;

import edu.umn.cs.spatialHadoop.core.CellInfo;
import edu.umn.cs.spatialHadoop.core.GridInfo;
import edu.umn.cs.spatialHadoop.core.GridRecordWriter;
import edu.umn.cs.spatialHadoop.core.Rectangle;
import edu.umn.cs.spatialHadoop.core.Shape;
import edu.umn.cs.spatialHadoop.core.ShapeRecordWriter;
import edu.umn.cs.spatialHadoop.core.SpatialSite;
import edu.umn.cs.spatialHadoop.mapred.GridOutputFormat;
import edu.umn.cs.spatialHadoop.mapred.RandomInputFormat;
import edu.umn.cs.spatialHadoop.mapred.RandomShapeGenerator;
import edu.umn.cs.spatialHadoop.mapred.RandomShapeGenerator.DistributionType;
import edu.umn.cs.spatialHadoop.operations.Repartition;
import edu.umn.cs.spatialHadoop.operations.Repartition.RepartitionReduce;

/**
 * Generates a random file of rectangles or points based on some user
 * parameters
 * @author Ahmed Eldawy
 *
 */
public class RandomSpatialGenerator {

    private static void generateMapReduce(Path outFile, OperationsParams params) throws IOException {
        JobConf job = new JobConf(params, RandomSpatialGenerator.class);
        job.setJobName("Generator");
        Shape shape = params.getShape("shape");

        FileSystem outFs = outFile.getFileSystem(job);

        ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
        // Set input format and map class
        job.setInputFormat(RandomInputFormat.class);
        job.setMapperClass(Repartition.RepartitionMap.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(shape.getClass());
        job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

        String sindex = params.get("sindex");
        Rectangle mbr = params.getShape("mbr").getMBR();

        CellInfo[] cells;
        if (sindex == null) {
            cells = new CellInfo[] { new CellInfo(1, mbr) };
        } else if (sindex.equals("grid")) {
            GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2);
            FileSystem fs = outFile.getFileSystem(job);
            long blocksize = fs.getDefaultBlockSize(outFile);
            long size = params.getSize("size");
            int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, outFile, blocksize);
            gridInfo.calculateCellDimensions(numOfCells);
            cells = gridInfo.getAllCells();
        } else {
            throw new RuntimeException("Unsupported spatial index: " + sindex);
        }

        SpatialSite.setCells(job, cells);

        // Do not set a reduce function. Use the default identity reduce function
        if (cells.length == 1) {
            // All objects are in one partition. No need for a reduce phase
            job.setNumReduceTasks(0);
        } else {
            // More than one partition. Need a reduce phase to group shapes of the
            // same partition together
            job.setReducerClass(RepartitionReduce.class);
            job.setNumReduceTasks(
                    Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
        }

        // Set output path
        FileOutputFormat.setOutputPath(job, outFile);
        if (sindex == null || sindex.equals("grid")) {
            job.setOutputFormat(GridOutputFormat.class);
        } else {
            throw new RuntimeException("Unsupported spatial index: " + sindex);
        }

        JobClient.runJob(job);

        // TODO move the following part to OutputCommitter
        // Concatenate all master files into one file
        FileStatus[] resultFiles = outFs.listStatus(outFile, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().contains("_master");
            }
        });
        String ext = resultFiles[0].getPath().getName()
                .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
        Path masterPath = new Path(outFile, "_master" + ext);
        OutputStream destOut = outFs.create(masterPath);
        byte[] buffer = new byte[4096];
        for (FileStatus f : resultFiles) {
            InputStream in = outFs.open(f.getPath());
            int bytes_read;
            do {
                bytes_read = in.read(buffer);
                if (bytes_read > 0)
                    destOut.write(buffer, 0, bytes_read);
            } while (bytes_read > 0);
            in.close();
            outFs.delete(f.getPath(), false);
        }
        destOut.close();
    }

    /**
     * Generates random rectangles and write the result to a file.
     * @param outFS - The file system that contains the output file
     * @param outputFile - The file name to write to. If either outFS or
     *   outputFile is null, data is generated to the standard output
     * @param mbr - The whole MBR to generate in
     * @param shape 
     * @param totalSize - The total size of the generated file
     * @param blocksize 
     * @throws IOException 
     */
    private static void generateFileLocal(Path outFile, OperationsParams params) throws IOException {
        JobConf job = new JobConf(params, RandomSpatialGenerator.class);
        FileSystem outFS = outFile.getFileSystem(params);
        long blocksize = outFS.getDefaultBlockSize(outFile);
        String sindex = params.get("sindex");
        Rectangle mbr = params.getShape("mbr").getMBR();
        long totalSize = params.getSize("size");

        // Calculate the dimensions of each partition based on gindex type
        CellInfo[] cells;
        if (sindex == null) {
            cells = new CellInfo[] { new CellInfo(1, mbr) };
        } else if (sindex.equals("grid")) {
            int num_partitions = Repartition.calculateNumberOfPartitions(params, totalSize, outFS, outFile,
                    blocksize);

            GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2);
            gridInfo.calculateCellDimensions(num_partitions);
            cells = gridInfo.getAllCells();
        } else {
            throw new RuntimeException("Unsupported spatial index: " + sindex);
        }

        outFS.mkdirs(outFile);

        ShapeRecordWriter<Shape> writer;
        if (sindex == null || sindex.equals("grid")) {
            writer = new GridRecordWriter<Shape>(outFile, job, null, cells);
        } else {
            throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
        }

        int rectSize = params.getInt("rectsize", 100);
        long seed = params.getLong("seed", System.currentTimeMillis());
        float circleThickness = params.getFloat("thickness", 1);
        DistributionType type = SpatialSite.getDistributionType(params, "type", DistributionType.UNIFORM);
        Shape shape = params.getShape("shape");
        long t1 = System.currentTimeMillis();

        RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize,
                seed, circleThickness);

        Rectangle key = generator.createKey();

        while (generator.next(key, shape)) {
            // Serialize it to text
            writer.write(NullWritable.get(), shape);
        }
        writer.close(null);
        long t2 = System.currentTimeMillis();

        System.out.println("Generation time: " + (t2 - t1) + " millis");
    }

    private static void printUsage() {
        System.out.println("Generates a file with random shapes");
        System.out.println("Parameters (* marks required parameters):");
        System.out
                .println("<output file> - Path to the file to generate. If omitted, file is generated to stdout.");
        System.out.println(
                "mbr:<x1,y1,x2,y2> - (*) The MBR of the generated data. Originated at (x,y) with dimensions (w,h)");
        System.out.println("shape:<point|(rectangle)> - Type of shapes in generated file");
        System.out.println(
                "sindex:<grid> - Type of global index in generated file. The only supported index is 'grid'");
        System.out.println("seed:<s> - Use a specific seed to generate the file");
        System.out.println("rectsize:<rs> - Maximum edge size for generated rectangles");
        System.out.println("-overwrite - Overwrite output file without notice");
        GenericOptionsParser.printGenericCommandUsage(System.out);
    }

    /**
     * @param args Command line arguments
     * @throws IOException If an exception happens during the underlying MapReduce job 
     */
    public static void main(String[] args) throws IOException {
        OperationsParams params = new OperationsParams(new GenericOptionsParser(args), false);
        if (params.get("mbr") == null) {
            System.err.println("Set MBR of the generated file using rect:<x1,y1,x2,y2>");
            printUsage();
            System.exit(1);
        }

        if (params.get("shape") == null) {
            System.err.println("Shape should be specified");
            printUsage();
            System.exit(1);
        }

        if (!params.checkOutput()) {
            printUsage();
            System.exit(1);
        }

        Path outputFile = params.getPath();

        long totalSize = params.getSize("size");

        if (totalSize < 100 * 1024 * 1024)
            generateFileLocal(outputFile, params);
        else
            generateMapReduce(outputFile, params);
    }

}