List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:edu.umn.cs.spatialHadoop.operations.CatUnion.java
License:Open Source License
/** * Read all categories from the category file * @param categoryFile//ww w.ja va 2 s . com * @param categoryShapes * @param idToCategory * @throws IOException */ private static void readCategories(Path categoryFile, Map<Integer, Integer> idToCategory) throws IOException { Map<Integer, String> idToCatName = new HashMap<Integer, String>(); FileSystem fsCategory = FileSystem.getLocal(new Configuration()); long categoryFileSize = fsCategory.getFileStatus(categoryFile).getLen(); if (categoryFileSize > 1024 * 1024) LOG.warn("Category file size is big: " + categoryFileSize); InputStream inCategory = fsCategory.open(categoryFile); LineRecordReader lineReader = new LineRecordReader(inCategory, 0, categoryFileSize, new Configuration()); LongWritable lineOffset = lineReader.createKey(); Text line = lineReader.createValue(); Set<String> catNames = new TreeSet<String>(); while (lineReader.next(lineOffset, line)) { int shape_id = TextSerializerHelper.consumeInt(line, ','); String cat_name = line.toString(); catNames.add(cat_name); idToCatName.put(shape_id, cat_name); } lineReader.close(); // Change category names to numbers Map<String, Integer> cat_name_to_id = new HashMap<String, Integer>(); int cat_id = 0; for (String cat_name : catNames) { cat_name_to_id.put(cat_name, cat_id++); } for (Map.Entry<Integer, String> entry : idToCatName.entrySet()) { idToCategory.put(entry.getKey(), cat_name_to_id.get(entry.getValue())); } }
From source file:edu.umn.cs.spatialHadoop.operations.ClosestPairHadoop.java
License:Open Source License
/** * @param args/*from w w w. j a v a 2 s . c o m*/ * @throws IOException */ public static void main(String[] args) throws IOException { GenericOptionsParser parser = new GenericOptionsParser(args); OperationsParams params = new OperationsParams(parser); if (args.length == 0) { printUsage(); throw new RuntimeException("Illegal arguments. Input file missing"); } Path inputFile = new Path(args[0]); FileSystem fs = inputFile.getFileSystem(new Configuration()); if (!fs.exists(inputFile)) { printUsage(); throw new RuntimeException("Input file does not exist"); } params.setClass("shape", Point.class, Shape.class); samplePoint(fs, inputFile); final long fileSize = fs.getFileStatus(inputFile).getLen(); long delta = (long) (1.0 * sample.size() / (1.0 * fileSize / localMemory)); if (delta == 0) delta = 1; System.out.println("delta = " + delta); Vector<Point> axis = new Vector<Point>(); for (int i = 0; i < sample.size(); i += delta) axis.add(sample.get(i)); sample = axis; System.out.println("Finish Sampling."); cloesetPair(inputFile, params); }
From source file:edu.umn.cs.spatialHadoop.operations.Contains.java
License:Open Source License
public static <S extends Shape> long contains(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Contains.class); LOG.info("Contains journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {/*from www . j a v a 2s . c o m*/ outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Within"); job.setMapperClass(ContainsMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(ContainsReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Crosses.java
License:Open Source License
public static <S extends Shape> long crosses(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Crosses.class); LOG.info("Crosses journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {/*from w ww . j a v a 2 s .c o m*/ outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Crosses"); job.setMapperClass(CrossesMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(CrossesReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Disjoint.java
License:Open Source License
public static <S extends Shape> long disjoint(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Disjoint.class); LOG.info("Touches journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {/*from www .java 2 s . co m*/ outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Disjoint"); job.setMapperClass(DisjointMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(DisjointReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.DistributedJoin.java
License:Open Source License
/** * Select a file to repartition based on some heuristics. If only one file is * indexed, the non-indexed file is repartitioned. If both files are indexed, * the smaller file is repartitioned.//from w w w. ja va 2 s .c o m * * @param files * @param params * @return the index in the given array of the file to be repartitioned. -1 if * all files are non-indexed * @throws IOException */ protected static int selectRepartition(final Path[] files, OperationsParams params) throws IOException { int largest_partitioned_file = -1; long largest_size = 0; for (int i_file = 0; i_file < files.length; i_file++) { FileSystem fs = files[i_file].getFileSystem(params); GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, files[i_file]); if (gindex != null) { // Compute total size (all files in directory) long total_size = 0; for (Partition p : gindex) { Path file = new Path(files[i_file], p.filename); total_size += fs.getFileStatus(file).getLen(); } if (total_size > largest_size) { largest_partitioned_file = i_file; largest_size = total_size; } } } return largest_partitioned_file == -1 ? -1 : 1 - largest_partitioned_file; }
From source file:edu.umn.cs.spatialHadoop.operations.DistributedJoin.java
License:Open Source License
/** * Spatially joins two files./*from www .j a v a2 s . c o m*/ * @param inputFiles * @param userOutputPath * @param params * @return * @throws IOException * @throws InterruptedException */ @SuppressWarnings("unchecked") public static long distributedJoinSmart(final Path[] inputFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { Path[] originalInputFiles = inputFiles.clone(); FileSystem outFs = inputFiles[0].getFileSystem(params); Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputFiles[0].getName() + ".dj_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } // Decide whether to do a repartition step or not int cost_with_repartition, cost_without_repartition; final FileStatus[] fStatus = new FileStatus[inputFiles.length]; for (int i_file = 0; i_file < inputFiles.length; i_file++) { // TODO work with folders. Calculate size more accurately FileSystem fs = inputFiles[i_file].getFileSystem(params); fStatus[i_file] = fs.getFileStatus(inputFiles[i_file]); } // Sort files by length (size) IndexedSortable filesBySize = new IndexedSortable() { @Override public void swap(int i, int j) { Path tmp1 = inputFiles[i]; inputFiles[i] = inputFiles[j]; inputFiles[j] = tmp1; FileStatus tmp2 = fStatus[i]; fStatus[i] = fStatus[j]; fStatus[j] = tmp2; } @Override public int compare(int i, int j) { if (fStatus[i].getLen() < fStatus[j].getLen()) return 0; return fStatus[i].getLen() < fStatus[j].getLen() ? -1 : 1; } }; new QuickSort().sort(filesBySize, 0, inputFiles.length); GlobalIndex<Partition>[] gIndexes = new GlobalIndex[fStatus.length]; int[] numBlocks = new int[fStatus.length]; for (int i_file = 0; i_file < fStatus.length; i_file++) { gIndexes[i_file] = SpatialSite.getGlobalIndex(outFs, fStatus[i_file].getPath()); if (gIndexes[i_file] != null) { // Number of blocks is equal to number of partitions in global // index numBlocks[i_file] = gIndexes[i_file].size(); } else if (fStatus[i_file].isDir()) { // Add up number of file system blocks in all subfiles of this // directory numBlocks[i_file] = 0; FileStatus[] subfiles = outFs.listStatus(inputFiles[i_file], SpatialSite.NonHiddenFileFilter); for (FileStatus subfile : subfiles) { numBlocks[i_file] += outFs.getFileBlockLocations(subfile, 0, subfile.getLen()).length; } } else { // Number of file system blocks in input file numBlocks[i_file] = outFs.getFileBlockLocations(fStatus[i_file], 0, fStatus[i_file].getLen()).length; } } cost_without_repartition = gIndexes[0] != null && gIndexes[1] != null ? GlobalIndex.spatialJoin(gIndexes[0], gIndexes[1], null) : (numBlocks[0] * numBlocks[1]); // Total cost = Cost of repartition (=== 2 * numBlocks[0]) + // cost of join (=== numBlocks[0] + numBlocks[1]) cost_with_repartition = numBlocks[0] * 3 + numBlocks[1]; LOG.info("Cost with repartition is estimated to " + cost_with_repartition); LOG.info("Cost without repartition is estimated to " + cost_without_repartition); boolean need_repartition = cost_with_repartition < cost_without_repartition; if (need_repartition) { int file_to_repartition = selectRepartition(inputFiles, params); repartitionStep(inputFiles, file_to_repartition, params); } // Restore inputFiles to the original order by user if (inputFiles[1] != originalInputFiles[1]) { Path temp = inputFiles[0]; inputFiles[0] = inputFiles[1]; inputFiles[1] = temp; } // Redistribute join the larger file and the partitioned file long result_size = DistributedJoin.joinStep(inputFiles, outputPath, params); if (userOutputPath == null) outFs.delete(outputPath, true); return result_size; }
From source file:edu.umn.cs.spatialHadoop.operations.Equals.java
License:Open Source License
public static <S extends Shape> long equals(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Equals.class); LOG.info("Equals journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {/*from w w w . ja va 2 s. c o m*/ outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Equals"); job.setMapperClass(EqualsMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(EqualsReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.FileMBR.java
License:Open Source License
public static Partition fileMBRLocal(Path[] inFiles, final OperationsParams params) throws IOException, InterruptedException { // 1- Split the input path/file to get splits that can be processed independently final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); Job job = Job.getInstance(params);// www . ja v a 2s .c om SpatialInputFormat3.setInputPaths(job, inFiles); final List<org.apache.hadoop.mapreduce.InputSplit> splits = inputFormat.getSplits(job); int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors()); // 2- Process splits in parallel List<Map<String, Partition>> allMbrs = Parallel.forEach(splits.size(), new RunnableRange<Map<String, Partition>>() { @Override public Map<String, Partition> run(int i1, int i2) { Map<String, Partition> mbrs = new HashMap<String, Partition>(); for (int i = i1; i < i2; i++) { try { org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits .get(i); final RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } Partition p = mbrs.get(fsplit.getPath().getName()); if (p == null) { p = new Partition(); p.filename = fsplit.getPath().getName(); p.cellId = p.filename.hashCode(); p.size = 0; p.recordCount = 0; p.set(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); mbrs.put(p.filename, p); } Text temp = new Text2(); while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape s : shapes) { Rectangle mbr = s.getMBR(); if (mbr != null) p.expand(mbr); p.recordCount++; temp.clear(); s.toText(temp); p.size += temp.getLength() + 1; } } } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } } return mbrs; } }, parallelism); Map<String, Partition> mbrs = allMbrs.remove(allMbrs.size() - 1); for (Map<String, Partition> list : allMbrs) { for (Partition p1 : list.values()) { Partition p2 = mbrs.get(p1.filename); if (p2 != null) { p2.expand(p1); } else { mbrs.put(p1.filename, p1); } } } // Cache the final result, if needed for (Path inFile : inFiles) { FileSystem inFs = inFile.getFileSystem(params); if (!inFs.getFileStatus(inFile).isDir()) continue; Path gindex_path = new Path(inFile, "_master.heap"); // Answer has been already cached (may be by another job) if (inFs.exists(gindex_path)) continue; FileStatus[] files = inFs.listStatus(inFile, SpatialSite.NonHiddenFileFilter); PrintStream wktout = new PrintStream(inFs.create(new Path(inFile, "_heap.wkt"), false)); PrintStream gout = new PrintStream(inFs.create(gindex_path, false)); Text text = new Text2(); for (FileStatus file : files) { text.clear(); Partition p = mbrs.get(file.getPath().getName()); gout.println(p.toText(text).toString()); wktout.println(p.toWKT()); } wktout.close(); gout.close(); } // Return the final answer Partition finalResult = new Partition(); finalResult.size = finalResult.recordCount = 0; finalResult.x1 = finalResult.y1 = Double.MAX_VALUE; finalResult.x2 = finalResult.y2 = -Double.MAX_VALUE; for (Partition p2 : mbrs.values()) finalResult.expand(p2); return finalResult; }
From source file:edu.umn.cs.spatialHadoop.operations.GeometricPlot.java
License:Open Source License
/** * Combines images of different datasets into one image that is displayed * to users.// www.j av a 2 s. com * This method is called from the web interface to display one image for * multiple selected datasets. * @param fs The file system that contains the datasets and images * @param files Paths to directories which contains the datasets * @param includeBoundaries Also plot the indexing boundaries of datasets * @return An image that is the combination of all datasets images * @throws IOException * @throws InterruptedException */ public static BufferedImage combineImages(Configuration conf, Path[] files, boolean includeBoundaries, int width, int height) throws IOException, InterruptedException { BufferedImage result = null; // Retrieve the MBRs of all datasets Rectangle allMbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : files) { Rectangle mbr = FileMBR.fileMBR(file, new OperationsParams(conf)); allMbr.expand(mbr); } // Adjust width and height to maintain aspect ratio if ((allMbr.x2 - allMbr.x1) / (allMbr.y2 - allMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((allMbr.y2 - allMbr.y1) * width / (allMbr.x2 - allMbr.x1)); } else { width = (int) ((allMbr.x2 - allMbr.x1) * height / (allMbr.y2 - allMbr.y1)); } result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); for (Path file : files) { FileSystem fs = file.getFileSystem(conf); if (fs.getFileStatus(file).isDir()) { // Retrieve the MBR of this dataset Rectangle mbr = FileMBR.fileMBR(file, new OperationsParams(conf)); // Compute the coordinates of this image in the whole picture mbr.x1 = (mbr.x1 - allMbr.x1) * width / allMbr.getWidth(); mbr.x2 = (mbr.x2 - allMbr.x1) * width / allMbr.getWidth(); mbr.y1 = (mbr.y1 - allMbr.y1) * height / allMbr.getHeight(); mbr.y2 = (mbr.y2 - allMbr.y1) * height / allMbr.getHeight(); // Retrieve the image of this dataset Path imagePath = new Path(file, "_data.png"); if (!fs.exists(imagePath)) throw new RuntimeException("Image " + imagePath + " not ready"); FSDataInputStream imageFile = fs.open(imagePath); BufferedImage image = ImageIO.read(imageFile); imageFile.close(); // Draw the image Graphics graphics = result.getGraphics(); graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(), (int) mbr.getHeight(), null); graphics.dispose(); if (includeBoundaries) { // Plot also the image of the boundaries // Retrieve the image of the dataset boundaries imagePath = new Path(file, "_partitions.png"); if (fs.exists(imagePath)) { imageFile = fs.open(imagePath); image = ImageIO.read(imageFile); imageFile.close(); // Draw the image graphics = result.getGraphics(); graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(), (int) mbr.getHeight(), null); graphics.dispose(); } } } } return result; }