List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.ricemap.spateDB.operations.FileMBR.java
License:Apache License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing//from w ww .jav a 2s . c o m * @param conf * @param fs * @param file * @return * @throws IOException */ public static <S extends Shape> Prism fileMBRMapReduce(FileSystem fs, Path file, S stockShape, boolean background) throws IOException { // Quickly get file MBR if it is globally indexed GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(fs, file); if (globalIndex != null) { // Return the MBR of the global index. // Compute file size by adding up sizes of all files assuming they are // not compressed long totalLength = 0; for (Partition p : globalIndex) { Path filePath = new Path(file, p.filename); if (fs.exists(filePath)) totalLength += fs.getFileStatus(filePath).getLen(); } sizeOfLastProcessedFile = totalLength; return globalIndex.getMBR(); } JobConf job = new JobConf(FileMBR.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(file.toUri().getPath() + ".mbr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("FileMBR"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Prism.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Reduce.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeInputFormat.class); SpatialSite.setShapeClass(job, stockShape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, file); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(MBROutputCommitter.class); // Submit the job if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); return null; } else { lastSubmittedJob = JobClient.runJob(job); Counters counters = lastSubmittedJob.getCounters(); Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES); FileMBR.sizeOfLastProcessedFile = inputBytesCounter.getValue(); // Read job result FileStatus[] results = outFs.listStatus(outputPath); Prism mbr = new Prism(); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); if (lineReader.readLine(text) > 0) { mbr.fromText(text); } lineReader.close(); } } outFs.delete(outputPath, true); return mbr; } }
From source file:com.ricemap.spateDB.operations.FileMBR.java
License:Apache License
/** * Counts the exact number of lines in a file by opening the file and * reading it line by line/* ww w .j av a 2 s . com*/ * @param fs * @param file * @return * @throws IOException */ public static <S extends Shape> Prism fileMBRLocal(FileSystem fs, Path file, S shape) throws IOException { // Try to get file MBR from the global index (if possible) GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, file); if (gindex != null) { return gindex.getMBR(); } long file_size = fs.getFileStatus(file).getLen(); sizeOfLastProcessedFile = file_size; ShapeRecordReader<Shape> shapeReader = new ShapeRecordReader<Shape>(new Configuration(), new FileSplit(file, 0, file_size, new String[] {})); Prism mbr = new Prism(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); Prism key = shapeReader.createKey(); while (shapeReader.next(key, shape)) { Prism rect = shape.getMBR(); mbr.expand(rect); } return mbr; }
From source file:com.ricemap.spateDB.operations.FileMBR.java
License:Apache License
public static Prism fileMBR(FileSystem fs, Path inFile, Shape stockShape) throws IOException { FileSystem inFs = inFile.getFileSystem(new Configuration()); FileStatus inFStatus = inFs.getFileStatus(inFile); if (inFStatus.isDir() || inFStatus.getLen() / inFStatus.getBlockSize() > 1) { // Either a directory of file or a large file return fileMBRMapReduce(fs, inFile, stockShape, false); } else {//from w w w . j a v a2 s . c om // A single small file, process it without MapReduce return fileMBRLocal(fs, inFile, stockShape); } }
From source file:com.ricemap.spateDB.operations.LineRandomizer.java
License:Apache License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing/*from ww w . j ava2s .com*/ * @param conf * @param infs * @param infile * @return * @throws IOException */ public static void randomizerMapReduce(Path infile, Path outfile, boolean overwrite) throws IOException { JobConf job = new JobConf(LineRandomizer.class); FileSystem outfs = outfile.getFileSystem(job); if (overwrite) outfs.delete(outfile, true); job.setJobName("Randomizer"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(Map.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(Reduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); FileSystem infs = infile.getFileSystem(job); int numOfPartitions = (int) Math .ceil((double) infs.getFileStatus(infile).getLen() / infs.getDefaultBlockSize(outfile)); job.setInt(NumOfPartitions, numOfPartitions); job.setInputFormat(TextInputFormat.class); TextInputFormat.setInputPaths(job, infile); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outfile); // Submit the job JobClient.runJob(job); }
From source file:com.ricemap.spateDB.operations.Plot.java
License:Apache License
public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount, boolean background) throws IOException { JobConf job = new JobConf(Plot.class); job.setJobName("Plot"); job.setMapperClass(PlotMap.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(PlotReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setMapOutputKeyClass(Prism.class); SpatialSite.setShapeClass(job, shape.getClass()); job.setMapOutputValueClass(shape.getClass()); FileSystem inFs = inFile.getFileSystem(job); Prism fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false); FileStatus inFileStatus = inFs.getFileStatus(inFile); CellInfo[] cellInfos;// www . j a v a 2s . c om GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile); if (gindex == null) { // A heap file. The map function should partition the file GridInfo gridInfo = new GridInfo(fileMbr.t1, fileMbr.x1, fileMbr.y1, fileMbr.t2, fileMbr.x2, fileMbr.y2); gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize()); cellInfos = gridInfo.getAllCells(); // Doesn't make sense to show any partition information in a heap file showBorders = showBlockCount = showRecordCount = false; } else { cellInfos = SpatialSite.cellsOf(inFs, inFile); } // Set cell information in the job configuration to be used by the mapper SpatialSite.setCells(job, cellInfos); // Adjust width and height to maintain aspect ratio if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1)); } else { width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1)); } LOG.info("Creating an image of size " + width + "x" + height); ImageOutputFormat.setFileMBR(job, fileMbr); ImageOutputFormat.setImageWidth(job, width); ImageOutputFormat.setImageHeight(job, height); job.setBoolean(ShowBorders, showBorders); job.setBoolean(ShowBlockCount, showBlockCount); job.setBoolean(ShowRecordCount, showRecordCount); job.setInt(StrokeColor, color.getRGB()); // Set input and output job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); // Set output committer which will stitch images together after all reducers // finish job.setOutputCommitter(PlotOutputCommitter.class); job.setOutputFormat(ImageOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); } else { lastSubmittedJob = JobClient.runJob(job); } }
From source file:com.ricemap.spateDB.operations.Plot.java
License:Apache License
public static <S extends Shape> void plotLocal(Path inFile, Path outFile, S shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount) throws IOException { FileSystem inFs = inFile.getFileSystem(new Configuration()); Prism fileMbr = FileMBR.fileMBRLocal(inFs, inFile, shape); LOG.info("FileMBR: " + fileMbr); // Adjust width and height to maintain aspect ratio if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1)); } else {// ww w .j ava2 s .c o m width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1)); } double scale2 = (double) width * height / ((double) (fileMbr.x2 - fileMbr.x1) * (fileMbr.y2 - fileMbr.y1)); // Create an image BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); Graphics2D graphics = image.createGraphics(); Color bg_color = new Color(0, 0, 0, 0); graphics.setBackground(bg_color); graphics.clearRect(0, 0, width, height); graphics.setColor(color); long fileLength = inFs.getFileStatus(inFile).getLen(); ShapeRecordReader<S> reader = new ShapeRecordReader<S>(inFs.open(inFile), 0, fileLength); Prism cell = reader.createKey(); while (reader.next(cell, shape)) { drawShape(graphics, shape, fileMbr, width, height, scale2); } reader.close(); graphics.dispose(); FileSystem outFs = outFile.getFileSystem(new Configuration()); OutputStream out = outFs.create(outFile, true); ImageIO.write(image, "png", out); out.close(); }
From source file:com.ricemap.spateDB.operations.Plot.java
License:Apache License
public static <S extends Shape> void plot(Path inFile, Path outFile, S shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount) throws IOException { FileSystem inFs = inFile.getFileSystem(new Configuration()); FileStatus inFStatus = inFs.getFileStatus(inFile); if (inFStatus.isDir() || inFStatus.getLen() > 300 * inFStatus.getBlockSize()) { plotMapReduce(inFile, outFile, shape, width, height, color, showBorders, showBlockCount, showRecordCount, false); } else {/* w ww .ja v a 2 s .co m*/ plotLocal(inFile, outFile, shape, width, height, color, showBorders, showBlockCount, showRecordCount); } }
From source file:com.ricemap.spateDB.operations.Plot.java
License:Apache License
/** * Combines images of different datasets into one image that is displayed * to users.//from www . ja v a 2 s.c o m * @param fs The file system that contains the datasets and images * @param files Paths to directories which contains the datasets * @param includeBoundaries Also plot the indexing boundaries of datasets * @return An image that is the combination of all datasets images * @throws IOException */ public static BufferedImage combineImages(Configuration conf, Path[] files, boolean includeBoundaries, int width, int height) throws IOException { BufferedImage result = null; // Retrieve the MBRs of all datasets Prism allMbr = new Prism(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : files) { FileSystem fs = file.getFileSystem(conf); Prism mbr = FileMBR.fileMBR(fs, file, null); allMbr.expand(mbr); } // Adjust width and height to maintain aspect ratio if ((allMbr.x2 - allMbr.x1) / (allMbr.y2 - allMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((allMbr.y2 - allMbr.y1) * width / (allMbr.x2 - allMbr.x1)); } else { width = (int) ((allMbr.x2 - allMbr.x1) * height / (allMbr.y2 - allMbr.y1)); } result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); for (Path file : files) { FileSystem fs = file.getFileSystem(conf); if (fs.getFileStatus(file).isDir()) { // Retrieve the MBR of this dataset Prism mbr = FileMBR.fileMBR(fs, file, null); // Compute the coordinates of this image in the whole picture mbr.x1 = (mbr.x1 - allMbr.x1) * width / allMbr.getWidth(); mbr.x2 = (mbr.x2 - allMbr.x1) * width / allMbr.getWidth(); mbr.y1 = (mbr.y1 - allMbr.y1) * height / allMbr.getHeight(); mbr.y2 = (mbr.y2 - allMbr.y1) * height / allMbr.getHeight(); // Retrieve the image of this dataset Path imagePath = new Path(file, "_data.png"); if (!fs.exists(imagePath)) throw new RuntimeException("Image " + imagePath + " not ready"); FSDataInputStream imageFile = fs.open(imagePath); BufferedImage image = ImageIO.read(imageFile); imageFile.close(); // Draw the image Graphics graphics = result.getGraphics(); graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(), (int) mbr.getHeight(), null); graphics.dispose(); if (includeBoundaries) { // Plot also the image of the boundaries // Retrieve the image of the dataset boundaries imagePath = new Path(file, "_partitions.png"); if (fs.exists(imagePath)) { imageFile = fs.open(imagePath); image = ImageIO.read(imageFile); imageFile.close(); // Draw the image graphics = result.getGraphics(); graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(), (int) mbr.getHeight(), null); graphics.dispose(); } } } } return result; }
From source file:com.ricemap.spateDB.operations.RangeQuery.java
License:Apache License
/** * Runs a range query on the local machine by iterating over the whole file. * //ww w .j ava2 s.co m * @param fs * - FileSystem that contains input file * @param file * - path to the input file * @param queryRange * - The range to look in * @param shape * - An instance of the shape stored in file * @param output * - Output is sent to this collector. If <code>null</code>, * output is not collected and only the number of results is * returned. * @return number of results found * @throws IOException */ public static <S extends Shape> long rangeQueryLocal(FileSystem fs, Path file, Shape queryRange, S shape, ResultCollector<S> output) throws IOException { long file_size = fs.getFileStatus(file).getLen(); ShapeRecordReader<S> shapeReader = new ShapeRecordReader<S>(fs.open(file), 0, file_size); long resultCount = 0; Prism cell = shapeReader.createKey(); while (shapeReader.next(cell, shape)) { if (shape.isIntersected(queryRange)) { boolean report_result; if (cell.isValid()) { // Check for duplicate avoidance Prism intersection_mbr = queryRange.getMBR().getIntersection(shape.getMBR()); report_result = cell.contains(intersection_mbr.t1, intersection_mbr.x1, intersection_mbr.y1); } else { report_result = true; } if (report_result) { resultCount++; if (output != null) { output.collect(shape); } } } } shapeReader.close(); return resultCount; }
From source file:com.ricemap.spateDB.operations.RecordCount.java
License:Apache License
/** * Counts the approximate number of lines in a file by getting an approximate * average line length//from w w w. j a v a 2 s. c o m * @param fs * @param file * @return * @throws IOException */ public static <T> long recordCountApprox(FileSystem fs, Path file) throws IOException { final long fileSize = fs.getFileStatus(file).getLen(); final FSDataInputStream in = fs.open(file); Estimator<Long> lineEstimator = new Estimator<Long>(0.05); lineEstimator.setRandomSample(new Estimator.RandomSample() { @Override public double next() { int lineLength = 0; try { long randomFilePosition = (long) (Math.random() * fileSize); in.seek(randomFilePosition); // Skip the rest of this line byte lastReadByte; do { lastReadByte = in.readByte(); } while (lastReadByte != '\n' && lastReadByte != '\r'); while (in.getPos() < fileSize - 1) { lastReadByte = in.readByte(); if (lastReadByte == '\n' || lastReadByte == '\r') { break; } lineLength++; } } catch (IOException e) { e.printStackTrace(); } return lineLength + 1; } }); lineEstimator.setUserFunction(new Estimator.UserFunction<Long>() { @Override public Long calculate(double x) { return (long) (fileSize / x); } }); lineEstimator.setQualityControl(new Estimator.QualityControl<Long>() { @Override public boolean isAcceptable(Long y1, Long y2) { return (double) Math.abs(y2 - y1) / Math.min(y1, y2) < 0.01; } }); Estimator.Range<Long> lineCount = lineEstimator.getEstimate(); in.close(); return (lineCount.limit1 + lineCount.limit2) / 2; }