Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. */ package com.ricemap.spateDB.operations; import java.io.IOException; import java.lang.reflect.Field; import java.util.Iterator; import java.util.Vector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.Counters; import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.Task; import com.ricemap.spateDB.core.GlobalIndex; import com.ricemap.spateDB.core.Partition; import com.ricemap.spateDB.core.RTree; import com.ricemap.spateDB.core.ResultCollector; import com.ricemap.spateDB.core.SpatialSite; import com.ricemap.spateDB.mapred.BlockFilter; import com.ricemap.spateDB.mapred.DefaultBlockFilter; import com.ricemap.spateDB.mapred.RTreeInputFormat; import com.ricemap.spateDB.mapred.ShapeInputFormat; import com.ricemap.spateDB.mapred.ShapeRecordReader; import com.ricemap.spateDB.mapred.TextOutputFormat; import com.ricemap.spateDB.shape.Prism; import com.ricemap.spateDB.shape.Shape; import com.ricemap.spateDB.util.CommandLineArguments; import com.ricemap.spateDB.util.QueryInput; /** * Performs a range query over a spatial file. * * @author tonyren, Ahmed Eldawy * */ public class RangeQuery { /** Logger for RangeQuery */ private static final Log LOG = LogFactory.getLog(RangeQuery.class); /** Name of the config line that stores the class name of the query shape */ public static final String QUERY_SHAPE_CLASS = "com.ricemap.spateDB.operations.RangeQuery.QueryShapeClass"; /** Name of the config line that stores the query shape */ public static final String QUERY_SHAPE = "com.ricemap.spateDB.operations.RangeQuery.QueryShape"; /** Name of the config line that stores the query field */ public static final String QUERY_FIELD = "com.ricemap.spateDB.operations.RangeQuery.QueryField"; /** Reference to the last range query job submitted */ public static RunningJob lastRunningJob; /** * A filter function that selects partitions overlapping with a query range. * * @author tonyren, Ahmed Eldawy * */ public static class RangeFilter extends DefaultBlockFilter { /** Name of the config line that stores the query shape */ private static final String QUERY_SHAPE = "RangeFilter.QueryShape"; /** A shape that is used to filter input */ private Shape queryRange; /** * Sets the query range in the given job. * * @param job * @param shape */ public static void setQueryRange(JobConf job, Shape shape) { SpatialSite.setShape(job, QUERY_SHAPE, shape); } @Override public void configure(JobConf job) { this.queryRange = SpatialSite.getShape(job, QUERY_SHAPE); } @Override public void selectCells(GlobalIndex<Partition> gIndex, ResultCollector<Partition> output) { int numPartitions; if (gIndex.isReplicated()) { // Need to process all partitions to perform duplicate avoidance numPartitions = gIndex.rangeQuery(queryRange, output); LOG.info("Selected " + numPartitions + " partitions overlapping " + queryRange); } else { Prism queryRange = this.queryRange.getMBR(); // Need to process only partitions on the perimeter of the query // range // Partitions that are totally contained in query range should // not be // processed and should be copied to output directly numPartitions = 0; for (Partition p : gIndex) { if (queryRange.contains(p)) { // TODO partitions totally contained in query range // should be copied // to output directly // XXX Until hard links are supported, R-tree blocks are // processed // similar to R+-tree output.collect(p); numPartitions++; } else if (p.isIntersected(queryRange)) { output.collect(p); numPartitions++; } } LOG.info("Selected " + numPartitions + " partitions on the perimeter of " + queryRange); } } } /** * The reduce function used for distinct count * * @author tonyren * * @param <T> */ public static class DistinctQueryReduce extends MapReduceBase implements Reducer<Writable, NullWritable, Writable, NullWritable> { @Override public void reduce(Writable key, Iterator<NullWritable> value, OutputCollector<Writable, NullWritable> output, Reporter reporter) { try { output.collect(key, NullWritable.get()); } catch (IOException e) { e.printStackTrace(); } } } /** * The reduce function used for distribution * * @author tonyren * * @param <T> */ public static class DistributionQueryReduce extends MapReduceBase implements Reducer<Writable, IntWritable, Writable, IntWritable> { @Override public void reduce(Writable key, Iterator<IntWritable> value, OutputCollector<Writable, IntWritable> output, Reporter reporter) { try { int count = 0; while (value.hasNext()) { count += value.next().get(); } output.collect(key, new IntWritable(count)); } catch (IOException e) { e.printStackTrace(); } } } /** * The map function used for distinct count * * @author tonyren * * @param <T> */ public static class DistinctQueryMap extends MapReduceBase implements Mapper<Prism, Writable, Writable, NullWritable> { /** A shape that is used to filter input */ private Shape queryShape; private Prism queryMbr; private String queryField; @Override public void configure(JobConf job) { super.configure(job); try { queryField = job.get(QUERY_FIELD); String queryShapeClassName = job.get(QUERY_SHAPE_CLASS); Class<? extends Shape> queryShapeClass = Class.forName(queryShapeClassName).asSubclass(Shape.class); queryShape = queryShapeClass.newInstance(); queryShape.fromText(new Text(job.get(QUERY_SHAPE))); queryMbr = queryShape.getMBR(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } } private final NullWritable dummy = NullWritable.get(); /** * Map function for non-indexed blocks */ public void map(final Prism cellMbr, final Writable value, final OutputCollector<Writable, NullWritable> output, Reporter reporter) throws IOException { if (value instanceof Shape) { Shape shape = (Shape) value; try { Class<?> c = shape.getClass(); Field f; f = c.getDeclaredField(queryField); f.setAccessible(true); if (shape.isIntersected(queryShape)) { boolean report_result = false; if (cellMbr.isValid()) { // Check for duplicate avoidance using reference // point // technique double reference_t = Math.max(queryMbr.t1, shape.getMBR().t1); double reference_x = Math.max(queryMbr.x1, shape.getMBR().x1); double reference_y = Math.max(queryMbr.y1, shape.getMBR().y1); report_result = cellMbr.contains(reference_t, reference_x, reference_y); } else { // A heap block, report right away report_result = true; } if (report_result) { Writable result = null; if (f.getType().equals(Integer.TYPE)) { result = new IntWritable((int) f.get(shape)); } else if (f.getType().equals(Double.TYPE)) { result = new DoubleWritable((int) f.get(shape)); } else if (f.getType().equals(Long.TYPE)) { result = new LongWritable((int) f.get(shape)); } output.collect(result, dummy); } } } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } else if (value instanceof RTree) { RTree<Shape> shapes = (RTree<Shape>) value; if (shapes.columnar) { shapes.searchColumnar(queryMbr, new ResultCollector<Writable>() { @Override public void collect(Writable shape) { try { output.collect(shape, dummy); } catch (IOException e) { e.printStackTrace(); } catch (SecurityException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalArgumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }, queryField); } else { shapes.search(queryMbr, new ResultCollector<Shape>() { @Override public void collect(Shape shape) { try { Class<?> c = shape.getClass(); Field f; f = c.getDeclaredField(queryField); f.setAccessible(true); Writable result = null; if (f.getType().equals(Integer.TYPE)) { result = new IntWritable((int) f.get(shape)); } else if (f.getType().equals(Double.TYPE)) { result = new DoubleWritable((int) f.get(shape)); } else if (f.getType().equals(Long.TYPE)) { result = new LongWritable((int) f.get(shape)); } output.collect(result, dummy); } catch (IOException e) { e.printStackTrace(); } catch (SecurityException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalArgumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (NoSuchFieldException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }, queryField); } } } } /** * The map function used for range query * * @author tonyren, eldawy * * @param <T> */ public static class DistributionQueryMap extends MapReduceBase implements Mapper<Prism, Writable, Writable, IntWritable> { /** A shape that is used to filter input */ private Shape queryShape; private Prism queryMbr; private String queryField; @Override public void configure(JobConf job) { super.configure(job); try { String queryShapeClassName = job.get(QUERY_SHAPE_CLASS); Class<? extends Shape> queryShapeClass = Class.forName(queryShapeClassName).asSubclass(Shape.class); queryShape = queryShapeClass.newInstance(); queryShape.fromText(new Text(job.get(QUERY_SHAPE))); queryMbr = queryShape.getMBR(); queryField = job.get(QUERY_FIELD); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } } private final IntWritable one = new IntWritable(1); /** * Map function for non-indexed blocks */ public void map(final Prism cellMbr, final Writable value, final OutputCollector<Writable, IntWritable> output, Reporter reporter) throws IOException { if (value instanceof Shape) { try { Shape shape = (Shape) value; Class<?> c = shape.getClass(); Field f; f = c.getDeclaredField(queryField); f.setAccessible(true); if (shape.isIntersected(queryShape)) { boolean report_result = false; if (cellMbr.isValid()) { // Check for duplicate avoidance using reference // point // technique double reference_t = Math.max(queryMbr.t1, shape.getMBR().t1); double reference_x = Math.max(queryMbr.x1, shape.getMBR().x1); double reference_y = Math.max(queryMbr.y1, shape.getMBR().y1); report_result = cellMbr.contains(reference_t, reference_x, reference_y); } else { // A heap block, report right away report_result = true; } if (report_result) { Writable result = null; if (f.getType().equals(Integer.TYPE)) { try { result = new IntWritable((int) f.get(shape)); } catch (IllegalArgumentException | IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else if (f.getType().equals(Long.TYPE)) { try { result = new LongWritable((int) f.get(shape)); } catch (IllegalArgumentException | IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else if (f.getType().equals(Double.TYPE)) { try { result = new DoubleWritable((int) f.get(shape)); } catch (IllegalArgumentException | IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } } output.collect(result, one); } } } catch (IllegalArgumentException | NoSuchFieldException | SecurityException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else if (value instanceof RTree) { RTree<Shape> shapes = (RTree<Shape>) value; if (shapes.columnar) { shapes.searchColumnar(queryMbr, new ResultCollector<Writable>() { @Override public void collect(Writable shape) { try { output.collect(shape, one); } catch (IOException e) { e.printStackTrace(); } } }, queryField); } else { shapes.search(queryMbr, new ResultCollector<Shape>() { @Override public void collect(Shape shape) { try { Class<?> c = shape.getClass(); Field f; f = c.getDeclaredField(queryField); f.setAccessible(true); Writable result = null; if (f.getType().equals(Integer.class)) { result = new IntWritable((int) f.get(shape)); } else if (f.getType().equals(Integer.class)) { result = new IntWritable((int) f.get(shape)); } else if (f.getType().equals(Integer.class)) { result = new IntWritable((int) f.get(shape)); } output.collect(result, one); } catch (IOException e) { e.printStackTrace(); } catch (SecurityException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalArgumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (NoSuchFieldException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }, queryField); } } } } /** * Performs a range query using MapReduce * * @param fs * @param inputFile * @param queryRange * @param shape * @param output * @return * @throws IOException */ public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape, Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException { JobConf job = new JobConf(FileMBR.class); FileSystem outFs = inputFile.getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path( inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } else { if (outFs.exists(outputPath)) { if (overwrite) { outFs.delete(outputPath, true); } else { throw new RuntimeException("Output path already exists and -overwrite flag is not set"); } } } job.setJobName("RangeQuery"); job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); RangeFilter.setQueryRange(job, queryShape); // Set query range for // filter ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(3); // Decide which map function to use depending on how blocks are indexed // And also which input format to use if (SpatialSite.isRTree(fs, inputFile)) { // RTree indexed file LOG.info("Searching an RTree indexed file"); job.setInputFormat(RTreeInputFormat.class); } else { // A file with no local index LOG.info("Searching a non local-indexed file"); job.setInputFormat(ShapeInputFormat.class); } GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile); // if (gIndex != null && gIndex.isReplicated()){ // job.setMapperClass(RangeQueryMap.class); Class<?> OutputKey = NullWritable.class; try { Class<?> c = shape.getClass(); Field f = c.getDeclaredField(query.field); f.setAccessible(true); if (f.getType().equals(Integer.TYPE)) { OutputKey = IntWritable.class; } else if (f.getType().equals(Double.TYPE)) { OutputKey = DoubleWritable.class; } else if (f.getType().equals(Long.TYPE)) { OutputKey = LongWritable.class; } } catch (SecurityException e) { e.printStackTrace(); } catch (NoSuchFieldException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setMapOutputKeyClass(OutputKey); switch (query.type) { case Distinct: job.setMapperClass(DistinctQueryMap.class); job.setReducerClass(DistinctQueryReduce.class); job.setMapOutputValueClass(NullWritable.class); break; case Distribution: job.setMapperClass(DistributionQueryMap.class); job.setReducerClass(DistributionQueryReduce.class); job.setMapOutputValueClass(IntWritable.class); break; default: break; } // } // else // job.setMapperClass(RangeQueryMapNoDupAvoidance.class); // Set query range for the map function job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName()); job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString()); job.set(QUERY_FIELD, query.field); // Set shape class for the SpatialInputFormat SpatialSite.setShapeClass(job, shape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inputFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job if (!background) { RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); // If outputPath not set by user, automatically delete it if (userOutputPath == null) outFs.delete(outputPath, true); return resultCount; } else { JobClient jc = new JobClient(job); lastRunningJob = jc.submitJob(job); return -1; } } /** * Runs a range query on the local machine by iterating over the whole file. * * @param fs * - FileSystem that contains input file * @param file * - path to the input file * @param queryRange * - The range to look in * @param shape * - An instance of the shape stored in file * @param output * - Output is sent to this collector. If <code>null</code>, * output is not collected and only the number of results is * returned. * @return number of results found * @throws IOException */ public static <S extends Shape> long rangeQueryLocal(FileSystem fs, Path file, Shape queryRange, S shape, ResultCollector<S> output) throws IOException { long file_size = fs.getFileStatus(file).getLen(); ShapeRecordReader<S> shapeReader = new ShapeRecordReader<S>(fs.open(file), 0, file_size); long resultCount = 0; Prism cell = shapeReader.createKey(); while (shapeReader.next(cell, shape)) { if (shape.isIntersected(queryRange)) { boolean report_result; if (cell.isValid()) { // Check for duplicate avoidance Prism intersection_mbr = queryRange.getMBR().getIntersection(shape.getMBR()); report_result = cell.contains(intersection_mbr.t1, intersection_mbr.x1, intersection_mbr.y1); } else { report_result = true; } if (report_result) { resultCount++; if (output != null) { output.collect(shape); } } } } shapeReader.close(); return resultCount; } private static void printUsage() { System.out.println("Performs a range query on an input file"); System.out.println("Parameters: (* marks required parameters)"); System.out.println("<input file> - (*) Path to input file"); System.out.println("<output file> - Path to output file"); System.out.println("prism:<t1,x1,y1,t2,x2,y2> - (*) Query Prism"); System.out.println("-overwrite - Overwrite output file without notice"); } public static void main(String[] args) throws IOException { CommandLineArguments cla = new CommandLineArguments(args); final QueryInput query = cla.getQuery(); final Path[] paths = cla.getPaths(); if (paths.length == 0 || (cla.getPrism() == null && cla.getSelectionRatio() < 0.0f)) { printUsage(); throw new RuntimeException("Illegal parameters"); } JobConf conf = new JobConf(FileMBR.class); final Path inputFile = paths[0]; final FileSystem fs = inputFile.getFileSystem(conf); if (!fs.exists(inputFile)) { printUsage(); throw new RuntimeException("Input file does not exist"); } final Path outputPath = paths.length > 1 ? paths[1] : null; final Prism[] queryRanges = cla.getPrisms(); int concurrency = cla.getConcurrency(); final Shape stockShape = cla.getShape(true); final boolean overwrite = cla.isOverwrite(); final long[] results = new long[queryRanges.length]; final Vector<Thread> threads = new Vector<Thread>(); final BooleanWritable exceptionHappened = new BooleanWritable(); Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() { public void uncaughtException(Thread th, Throwable ex) { ex.printStackTrace(); exceptionHappened.set(true); } }; for (int i = 0; i < queryRanges.length; i++) { Thread t = new Thread() { @Override public void run() { try { int thread_i = threads.indexOf(this); long result_count = rangeQueryMapReduce(fs, inputFile, outputPath, queryRanges[thread_i], stockShape, overwrite, false, query); results[thread_i] = result_count; } catch (IOException e) { throw new RuntimeException(e); } } }; t.setUncaughtExceptionHandler(h); threads.add(t); } long t1 = System.currentTimeMillis(); do { // Ensure that there is at least MaxConcurrentThreads running int i = 0; while (i < concurrency && i < threads.size()) { Thread.State state = threads.elementAt(i).getState(); if (state == Thread.State.TERMINATED) { // Thread already terminated, remove from the queue threads.remove(i); } else if (state == Thread.State.NEW) { // Start the thread and move to next one threads.elementAt(i++).start(); } else { // Thread is still running, skip over it i++; } } if (!threads.isEmpty()) { try { // Sleep for 10 seconds or until the first thread terminates threads.firstElement().join(10000); } catch (InterruptedException e) { e.printStackTrace(); } } } while (!threads.isEmpty()); long t2 = System.currentTimeMillis(); if (exceptionHappened.get()) throw new RuntimeException("Not all jobs finished correctly"); System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.print("Result size: ["); for (long result : results) { System.out.print(result + ", "); } System.out.println("]"); } }