List of usage examples for org.apache.hadoop.conf Configuration getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:edu.umd.cloud9.webgraph.TrecExtractLinks.java
License:Apache License
@Override public int runTool() throws Exception { Configuration conf = getConf(); conf.set("mapred.child.java.opts", "-Xmx3072m"); conf.setInt("mapred.task.timeout", 60000000); Job job = new Job(conf); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); String mappingFile = conf.get("Cloud9.DocnoMappingFile"); FileSystem fs = FileSystem.get(conf); if (!fs.exists(new Path(mappingFile))) { throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!"); }/*from w ww . j a va 2s . c o m*/ DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration()); job.setJobName("ExtractLinks"); job.setNumReduceTasks(numReducers); job.setJarByClass(TrecExtractLinks.class); job.setMapperClass(TrecExtractLinks.Map.class); job.setCombinerClass(TrecExtractLinks.Reduce.class); job.setReducerClass(TrecExtractLinks.Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ArrayListWritable.class); configer.applyJobConfig(job); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); recursivelyAddInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); LOG.info("ExtractLinks"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - mapping file: " + mappingFile); LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false)); job.waitForCompletion(true); return 0; }
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
private static Job indexMapReduce(Path inPath, Path outPath, OperationsParams paramss) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(paramss, "Indexer"); Configuration conf = job.getConfiguration(); job.setJarByClass(Indexer.class); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr"); if (inputMBR == null) { inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf)); OperationsParams.setShape(conf, "mbr", inputMBR); }// w w w . j ava 2 s .c o m // Set the correct partitioner according to index type String index = conf.get("sindex"); if (index == null) throw new RuntimeException("Index type is not set"); long t1 = System.currentTimeMillis(); setLocalIndexer(conf, index); Partitioner partitioner = createPartitioner(inPath, outPath, conf, index); Partitioner.setPartitioner(conf, partitioner); long t2 = System.currentTimeMillis(); System.out.println("Total time for space subdivision in millis: " + (t2 - t1)); // Set mapper and reducer Shape shape = OperationsParams.getShape(conf, "shape"); job.setMapperClass(PartitionerMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(PartitionerReduce.class); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); job.setOutputFormatClass(IndexOutputFormat.class); IndexOutputFormat.setOutputPath(job, outPath); // Set number of reduce tasks according to cluster status ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus(); job.setNumReduceTasks(Math.max(1, Math.min(partitioner.getPartitionCount(), (clusterStatus.getMaxReduceTasks() * 9) / 10))); // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (conf.getBoolean("background", false)) { // Run in background job.submit(); } else { job.waitForCompletion(conf.getBoolean("verbose", false)); } return job; }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException { this.conf = conf; String datasetName = conf.get("dataset"); if (datasetName == null) throw new RuntimeException("Dataset name should be provided"); if (split instanceof CombineFileSplit) { CombineFileSplit csplits = (CombineFileSplit) split; splits = new Vector<FileSplit>(csplits.getNumPaths()); for (int i = 0; i < csplits.getNumPaths(); i++) { FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i), csplits.getLocations()); splits.add(fsplit);/*from w w w .j a v a 2s. com*/ } this.initialize(splits.remove(splits.size() - 1), conf); return; } inFile = ((FileSplit) split).getPath(); fs = inFile.getFileSystem(conf); if (fs instanceof HTTPFileSystem) { // For performance reasons, we don't open HDF files from HTTP inFile = new Path(FileUtil.copyFile(conf, inFile)); fs = FileSystem.getLocal(conf); this.deleteOnEnd = true; } hdfFile = new HDFFile(fs.open(inFile)); // Retrieve meta data String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0); String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0); nasaDataset = new NASADataset(coreMetadata, archiveMetadata); // Retrieve the data array DDVGroup dataGroup = hdfFile.findGroupByName(datasetName); boolean fillValueFound = false; int resolution = 0; // Retrieve metadata int fillValuee = 0; for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDVDataHeader) { DDVDataHeader vheader = (DDVDataHeader) dd; if (vheader.getName().equals("_FillValue")) { Object fillValue = vheader.getEntryAt(0); if (fillValue instanceof Integer) fillValuee = (Integer) fillValue; else if (fillValue instanceof Short) fillValuee = (Short) fillValue; else if (fillValue instanceof Byte) fillValuee = (Byte) fillValue; else throw new RuntimeException("Unsupported type: " + fillValue.getClass()); fillValueFound = true; } else if (vheader.getName().equals("valid_range")) { Object minValue = vheader.getEntryAt(0); if (minValue instanceof Integer) nasaDataset.minValue = (Integer) minValue; else if (minValue instanceof Byte) nasaDataset.minValue = (Byte) minValue; Object maxValue = vheader.getEntryAt(1); if (maxValue instanceof Integer) nasaDataset.maxValue = (Integer) maxValue; else if (maxValue instanceof Byte) nasaDataset.maxValue = (Byte) maxValue; } } } // Retrieve data for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; valueSize = numericDataGroup.getDataSize(); resolution = numericDataGroup.getDimensions()[0]; unparsedDataArray = new byte[valueSize * resolution * resolution]; if (fillValueFound) { fillValueBytes = new byte[valueSize]; HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize); for (int i = 0; i < unparsedDataArray.length; i++) unparsedDataArray[i] = fillValueBytes[i % valueSize]; } numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length); } } nasaDataset.resolution = resolution; if (!fillValueFound) { skipFillValue = false; } else { skipFillValue = conf.getBoolean("skipfill", true); // Whether we need to recover fill values or not boolean recoverFillValues = conf.getBoolean("recoverholes", true); if (recoverFillValues) recoverFillValues(conf); } this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle()); this.nasaShape.setTimestamp(nasaDataset.time); this.value = new NASAIterator(); }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader3.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException { String datasetName = conf.get("dataset"); if (datasetName == null) throw new RuntimeException("Dataset name should be provided"); FileSplit fsplit = (FileSplit) split; FileSystem fs = fsplit.getPath().getFileSystem(conf); FSDataInputStream input = fs.open(fsplit.getPath()); HDFFile hdfFile = new HDFFile(input); // Retrieve meta data nasaDataset = new NASADataset((String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0)); // Retrieve the data array DDVGroup dataGroup = hdfFile.findGroupByName(datasetName); boolean fillValueFound = false; int resolution = 0; for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; dataArray = (short[]) numericDataGroup.getAsAnArray(); resolution = numericDataGroup.getDimensions()[0]; } else if (dd instanceof DDVDataHeader) { DDVDataHeader vheader = (DDVDataHeader) dd; if (vheader.getName().equals("_FillValue")) { this.fillValue = (Integer) vheader.getEntryAt(0); fillValueFound = true;/*from www. j ava 2 s . com*/ } else if (vheader.getName().equals("valid_range")) { nasaDataset.minValue = (Integer) vheader.getEntryAt(0); nasaDataset.maxValue = (Integer) vheader.getEntryAt(1); } } } nasaDataset.resolution = resolution; if (!fillValueFound) { skipFillValue = false; } else { // Whether we need to recover fill values or not boolean recoverFillValues = conf.getBoolean("recoverholes", true); if (recoverFillValues) recoverFillValues(conf); } this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle()); this.value = new NASAIterator(); hdfFile.close(); }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFToText.java
License:Open Source License
/** * Performs an HDF to text operation as a MapReduce job and returns total * number of points generated./*from w w w . ja v a2s.c om*/ * @param inPath * @param outPath * @param datasetName * @param skipFillValue * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static long HDFToTextMapReduce(Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "HDFToText"); Configuration conf = job.getConfiguration(); job.setJarByClass(HDFToText.class); job.setJobName("HDFToText"); // Set Map function details job.setMapperClass(HDFToTextMap.class); job.setNumReduceTasks(0); // Set input information job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class); conf.set("dataset", datasetName); conf.setBoolean("skipfillvalue", skipFillValue); // Set output information job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outPath); // Run the job boolean verbose = conf.getBoolean("verbose", false); job.waitForCompletion(verbose); Counters counters = job.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.OperationsParams.java
License:Open Source License
/** * Checks whether the operation should work in local or MapReduce mode. If * the job explicitly specifies whether to run in local or MapReduce mode, * the specified option is returned. Otherwise, it automatically detects * whether to use local or MapReduce based on the input size. * //from w w w . ja v a 2 s . co m * @return <code>true</code> to run in local mode, <code>false</code> to run * in MapReduce mode. * @throws IOException If the underlying job fails with an IOException * @throws InterruptedException If the underlying job was interrupted */ public static boolean isLocal(Configuration jobConf, Path... input) throws IOException, InterruptedException { final boolean LocalProcessing = true; final boolean MapReduceProcessing = false; // Whatever is explicitly set has the highest priority if (jobConf.get("local") != null) return jobConf.getBoolean("local", false); // If any of the input files are hidden, use local processing for (Path inputFile : input) { if (!SpatialSite.NonHiddenFileFilter.accept(inputFile)) return LocalProcessing; } if (input.length > MaxSplitsForLocalProcessing) { LOG.info("Too many files. Using MapReduce"); return MapReduceProcessing; } Job job = new Job(jobConf); // To ensure we don't change the original SpatialInputFormat3.setInputPaths(job, input); SpatialInputFormat3<Partition, Shape> inputFormat = new SpatialInputFormat3<Partition, Shape>(); try { List<InputSplit> splits = inputFormat.getSplits(job); if (splits.size() > MaxSplitsForLocalProcessing) return MapReduceProcessing; long totalSize = 0; for (InputSplit split : splits) totalSize += split.getLength(); if (totalSize > MaxSizeForLocalProcessing) { LOG.info("Input size is too large. Using MapReduce"); return MapReduceProcessing; } LOG.info("Input size is small enough to use local machine"); return LocalProcessing; } catch (IOException e) { LOG.warn("Cannot get splits for input"); return MapReduceProcessing; } }
From source file:edu.umn.cs.spatialHadoop.visualization.CanvasOutputFormat.java
License:Open Source License
protected static void mergeImages(final Configuration conf, final Path outPath) throws IOException, InterruptedException { final int width = conf.getInt("width", 1000); final int height = conf.getInt("height", 1000); final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR); final boolean vflip = conf.getBoolean("vflip", true); // List all output files resulting from reducers final FileSystem outFs = outPath.getFileSystem(conf); final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() { @Override//www. j a v a 2s .c o m public boolean accept(Path path) { return path.toUri().getPath().contains("part-"); } }); if (resultFiles.length == 0) { System.err.println("Error! Couldn't find any partial output. Exiting!"); return; } System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one"); List<Canvas> intermediateLayers = Parallel.forEach(resultFiles.length, new Parallel.RunnableRange<Canvas>() { @Override public Canvas run(int i1, int i2) { Plotter plotter = Plotter.getPlotter(conf); // The canvas that contains the merge of all assigned layers Canvas finalLayer = null; Canvas tempLayer = plotter.createCanvas(1, 1, new Rectangle()); for (int i = i1; i < i2; i++) { FileStatus resultFile = resultFiles[i]; try { FSDataInputStream inputStream = outFs.open(resultFile.getPath()); while (inputStream.getPos() < resultFile.getLen()) { if (tempLayer == finalLayer) { // More than one layer. Create a separate final layer to merge finalLayer = plotter.createCanvas(width, height, inputMBR); plotter.merge(finalLayer, tempLayer); } tempLayer.readFields(inputStream); if (finalLayer == null) { // First layer. Treat it as a final layer to avoid merging // if it is the only layer finalLayer = tempLayer; } else { // More than only layer. Merge into the final layer plotter.merge(finalLayer, tempLayer); } } inputStream.close(); } catch (IOException e) { System.err.println("Error reading " + resultFile); e.printStackTrace(); } } return finalLayer; } }, conf.getInt("parallel", Runtime.getRuntime().availableProcessors())); // Merge all intermediate layers into one final layer Plotter plotter = Plotter.getPlotter(conf); Canvas finalLayer; if (intermediateLayers.size() == 1) { finalLayer = intermediateLayers.get(0); } else { finalLayer = plotter.createCanvas(width, height, inputMBR); for (Canvas intermediateLayer : intermediateLayers) { plotter.merge(finalLayer, intermediateLayer); } } // Finally, write the resulting image to the given output path System.out.println(System.currentTimeMillis() + ": Writing final image"); outFs.delete(outPath, true); // Delete old (non-combined) images FSDataOutputStream outputFile = outFs.create(outPath); plotter.writeImage(finalLayer, outputFile, vflip); outputFile.close(); }
From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java
License:Open Source License
private static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Plotter plotter;// ww w . j a v a2s .c om try { plotter = plotterClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } Job job = new Job(params, "MultilevelPlot"); job.setJarByClass(SingleLevelPlot.class); // Set plotter Configuration conf = job.getConfiguration(); Plotter.setPlotter(conf, plotterClass); // Set input file MBR Rectangle inputMBR = (Rectangle) params.getShape("mbr"); if (inputMBR == null) inputMBR = FileMBR.fileMBR(inFiles, params); // Adjust width and height if aspect ratio is to be kept if (params.getBoolean("keepratio", true)) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (inputMBR.getWidth() > inputMBR.getHeight()) { inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2; inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth(); } else { inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2; inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight(); } } OperationsParams.setShape(conf, InputMBR, inputMBR); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFiles); if (conf.getBoolean("output", true)) { job.setOutputFormatClass(PyramidOutputFormat2.class); PyramidOutputFormat2.setOutputPath(job, outFile); } else { job.setOutputFormatClass(NullOutputFormat.class); } // Set mapper, reducer and committer String partitionTechnique = params.get("partition", "flat"); if (partitionTechnique.equalsIgnoreCase("flat")) { // Use flat partitioning job.setMapperClass(FlatPartitionMap.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(plotter.getCanvasClass()); job.setReducerClass(FlatPartitionReduce.class); } else if (partitionTechnique.equalsIgnoreCase("pyramid")) { // Use pyramid partitioning Shape shape = params.getShape("shape"); job.setMapperClass(PyramidPartitionMap.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(PyramidPartitionReduce.class); } else { throw new RuntimeException("Unknown partitioning technique '" + partitionTechnique + "'"); } // Set number of reducers job.setNumReduceTasks( Math.max(1, new JobClient(new JobConf()).getClusterStatus().getMaxReduceTasks() * 7 / 8)); // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { job.submit(); } else { job.waitForCompletion(false); } return job; }
From source file:edu.umn.cs.spatialHadoop.visualization.RasterOutputFormat.java
License:Open Source License
protected static void mergeImages(final Configuration conf, final Path outPath) throws IOException, InterruptedException { final int width = conf.getInt("width", 1000); final int height = conf.getInt("height", 1000); final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR); final boolean vflip = conf.getBoolean("vflip", true); // List all output files resulting from reducers final FileSystem outFs = outPath.getFileSystem(conf); final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() { @Override//from w w w .ja va 2 s.co m public boolean accept(Path path) { return path.toUri().getPath().contains("part-"); } }); if (resultFiles.length == 0) { System.err.println("Error! Couldn't find any partial output. Exiting!"); return; } System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one"); Vector<RasterLayer> intermediateLayers = Parallel.forEach(resultFiles.length, new Parallel.RunnableRange<RasterLayer>() { @Override public RasterLayer run(int i1, int i2) { Rasterizer rasterizer = Rasterizer.getRasterizer(conf); // The raster layer that contains the merge of all assigned layers RasterLayer finalLayer = null; RasterLayer tempLayer = rasterizer.createRaster(1, 1, new Rectangle()); for (int i = i1; i < i2; i++) { FileStatus resultFile = resultFiles[i]; try { FSDataInputStream inputStream = outFs.open(resultFile.getPath()); while (inputStream.getPos() < resultFile.getLen()) { if (tempLayer == finalLayer) { // More than one layer. Create a separate final layer to merge finalLayer = rasterizer.createRaster(width, height, inputMBR); rasterizer.merge(finalLayer, tempLayer); } tempLayer.readFields(inputStream); if (finalLayer == null) { // First layer. Treat it as a final layer to avoid merging // if it is the only layer finalLayer = tempLayer; } else { // More than only layer. Merge into the final layer rasterizer.merge(finalLayer, tempLayer); } } inputStream.close(); } catch (IOException e) { System.err.println("Error reading " + resultFile); e.printStackTrace(); } } return finalLayer; } }); // Merge all intermediate layers into one final layer Rasterizer rasterizer = Rasterizer.getRasterizer(conf); RasterLayer finalLayer; if (intermediateLayers.size() == 1) { finalLayer = intermediateLayers.elementAt(0); } else { finalLayer = rasterizer.createRaster(width, height, inputMBR); for (RasterLayer intermediateLayer : intermediateLayers) { rasterizer.merge(finalLayer, intermediateLayer); } } // Finally, write the resulting image to the given output path System.out.println(System.currentTimeMillis() + ": Writing final image"); outFs.delete(outPath, true); // Delete old (non-combined) images FSDataOutputStream outputFile = outFs.create(outPath); rasterizer.writeImage(finalLayer, outputFile, vflip); outputFile.close(); }
From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java
License:Open Source License
/** * Generates a single level using a MapReduce job and returns the created job. * @param inFiles//from w ww . j av a 2 s. co m * @param outFile * @param plotterClass * @param params * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Plotter plotter; try { plotter = plotterClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } Job job = new Job(params, "SingleLevelPlot"); job.setJarByClass(SingleLevelPlot.class); job.setJobName("SingleLevelPlot"); // Set plotter Configuration conf = job.getConfiguration(); Plotter.setPlotter(conf, plotterClass); // Set input file MBR Rectangle inputMBR = (Rectangle) params.getShape("mbr"); Rectangle drawRect = (Rectangle) params.getShape("rect"); if (inputMBR == null) inputMBR = drawRect != null ? drawRect : FileMBR.fileMBR(inFiles, params); OperationsParams.setShape(conf, InputMBR, inputMBR); if (drawRect != null) OperationsParams.setShape(conf, SpatialInputFormat3.InputQueryRange, drawRect); // Adjust width and height if aspect ratio is to be kept int imageWidth = conf.getInt("width", 1000); int imageHeight = conf.getInt("height", 1000); if (params.getBoolean("keepratio", true)) { // Adjust width and height to maintain aspect ratio if (inputMBR.getWidth() / inputMBR.getHeight() > (double) imageWidth / imageHeight) { // Fix width and change height imageHeight = (int) (inputMBR.getHeight() * imageWidth / inputMBR.getWidth()); // Make divisible by two for compatibility with ffmpeg if (imageHeight % 2 == 1) imageHeight--; conf.setInt("height", imageHeight); } else { imageWidth = (int) (inputMBR.getWidth() * imageHeight / inputMBR.getHeight()); conf.setInt("width", imageWidth); } } boolean merge = conf.getBoolean("merge", true); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFiles); if (conf.getBoolean("output", true)) { if (merge) { job.setOutputFormatClass(CanvasOutputFormat.class); conf.setClass("mapred.output.committer.class", CanvasOutputFormat.ImageWriterOld.class, org.apache.hadoop.mapred.OutputCommitter.class); } else { job.setOutputFormatClass(ImageOutputFormat.class); } CanvasOutputFormat.setOutputPath(job, outFile); } else { job.setOutputFormatClass(NullOutputFormat.class); } // Set mapper and reducer based on the partitioning scheme String partition = conf.get("partition", "none"); ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus(); if (partition.equalsIgnoreCase("none")) { LOG.info("Using no-partition plot"); job.setMapperClass(NoPartitionPlotMap.class); job.setCombinerClass(NoPartitionPlotCombine.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(plotter.getCanvasClass()); if (merge) { int numSplits = new SpatialInputFormat3().getSplits(job).size(); job.setReducerClass(NoPartitionPlotReduce.class); // Set number of reduce tasks according to cluster status int maxReduce = Math.max(1, clusterStatus.getMaxReduceTasks() * 7 / 8); job.setNumReduceTasks(Math.max(1, Math.min(maxReduce, numSplits / maxReduce))); } else { job.setNumReduceTasks(0); } } else { LOG.info("Using repartition plot"); Partitioner partitioner; if (partition.equals("pixel")) { // Special case for pixel level partitioning as it depends on the // visualization parameters partitioner = new GridPartitioner(inputMBR, imageWidth, imageHeight); } else if (partition.equals("grid")) { int numBlocks = 0; for (Path in : inFiles) { FileSystem fs = in.getFileSystem(params); long size = FileUtil.getPathSize(fs, in); long blockSize = fs.getDefaultBlockSize(in); numBlocks += Math.ceil(size / (double) blockSize); } int numPartitions = numBlocks * 1000; int gridSize = (int) Math.ceil(Math.sqrt(numPartitions)); partitioner = new GridPartitioner(inputMBR, gridSize, gridSize); } else { // Use a standard partitioner as created by the indexer partitioner = Indexer.createPartitioner(inFiles, outFile, conf, partition); } Shape shape = params.getShape("shape"); job.setMapperClass(RepartitionPlotMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(RepartitionPlotReduce.class); // Set number of reducers according to cluster size job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks() * 9 / 10)); Partitioner.setPartitioner(conf, partitioner); } // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { // Run in background job.submit(); } else { job.waitForCompletion(params.getBoolean("verbose", false)); } return job; }