Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.ricemap.spateDB.operations.FileMBR.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing//from w ww .jav  a  2s  . c  o  m
 * @param conf
 * @param fs
 * @param file
 * @return
 * @throws IOException 
 */
public static <S extends Shape> Prism fileMBRMapReduce(FileSystem fs, Path file, S stockShape,
        boolean background) throws IOException {
    // Quickly get file MBR if it is globally indexed
    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(fs, file);
    if (globalIndex != null) {
        // Return the MBR of the global index.
        // Compute file size by adding up sizes of all files assuming they are
        // not compressed
        long totalLength = 0;
        for (Partition p : globalIndex) {
            Path filePath = new Path(file, p.filename);
            if (fs.exists(filePath))
                totalLength += fs.getFileStatus(filePath).getLen();
        }
        sizeOfLastProcessedFile = totalLength;
        return globalIndex.getMBR();
    }
    JobConf job = new JobConf(FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(file.toUri().getPath() + ".mbr_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("FileMBR");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Prism.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Reduce.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeInputFormat.class);
    SpatialSite.setShapeClass(job, stockShape.getClass());
    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, file);
    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputCommitter(MBROutputCommitter.class);

    // Submit the job
    if (background) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
        return null;
    } else {
        lastSubmittedJob = JobClient.runJob(job);
        Counters counters = lastSubmittedJob.getCounters();
        Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
        FileMBR.sizeOfLastProcessedFile = inputBytesCounter.getValue();

        // Read job result
        FileStatus[] results = outFs.listStatus(outputPath);
        Prism mbr = new Prism();
        for (FileStatus fileStatus : results) {
            if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
                LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
                Text text = new Text();
                if (lineReader.readLine(text) > 0) {
                    mbr.fromText(text);
                }
                lineReader.close();
            }
        }

        outFs.delete(outputPath, true);

        return mbr;
    }
}

From source file:com.ricemap.spateDB.operations.FileMBR.java

License:Apache License

/**
 * Counts the exact number of lines in a file by opening the file and
 * reading it line by line/*  ww w .j  av  a 2 s  .  com*/
 * @param fs
 * @param file
 * @return
 * @throws IOException
 */
public static <S extends Shape> Prism fileMBRLocal(FileSystem fs, Path file, S shape) throws IOException {
    // Try to get file MBR from the global index (if possible)
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, file);
    if (gindex != null) {
        return gindex.getMBR();
    }
    long file_size = fs.getFileStatus(file).getLen();
    sizeOfLastProcessedFile = file_size;

    ShapeRecordReader<Shape> shapeReader = new ShapeRecordReader<Shape>(new Configuration(),
            new FileSplit(file, 0, file_size, new String[] {}));

    Prism mbr = new Prism(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE,
            -Double.MAX_VALUE, -Double.MAX_VALUE);

    Prism key = shapeReader.createKey();

    while (shapeReader.next(key, shape)) {
        Prism rect = shape.getMBR();
        mbr.expand(rect);
    }
    return mbr;
}

From source file:com.ricemap.spateDB.operations.FileMBR.java

License:Apache License

public static Prism fileMBR(FileSystem fs, Path inFile, Shape stockShape) throws IOException {
    FileSystem inFs = inFile.getFileSystem(new Configuration());
    FileStatus inFStatus = inFs.getFileStatus(inFile);
    if (inFStatus.isDir() || inFStatus.getLen() / inFStatus.getBlockSize() > 1) {
        // Either a directory of file or a large file
        return fileMBRMapReduce(fs, inFile, stockShape, false);
    } else {//from w w w  .  j  a  v  a2 s . c om
        // A single small file, process it without MapReduce
        return fileMBRLocal(fs, inFile, stockShape);
    }
}

From source file:com.ricemap.spateDB.operations.LineRandomizer.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing/*from ww w . j ava2s  .com*/
 * @param conf
 * @param infs
 * @param infile
 * @return
 * @throws IOException 
 */
public static void randomizerMapReduce(Path infile, Path outfile, boolean overwrite) throws IOException {
    JobConf job = new JobConf(LineRandomizer.class);

    FileSystem outfs = outfile.getFileSystem(job);

    if (overwrite)
        outfs.delete(outfile, true);

    job.setJobName("Randomizer");
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    FileSystem infs = infile.getFileSystem(job);
    int numOfPartitions = (int) Math
            .ceil((double) infs.getFileStatus(infile).getLen() / infs.getDefaultBlockSize(outfile));
    job.setInt(NumOfPartitions, numOfPartitions);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, infile);

    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outfile);

    // Submit the job
    JobClient.runJob(job);
}

From source file:com.ricemap.spateDB.operations.Plot.java

License:Apache License

public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width,
        int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount,
        boolean background) throws IOException {
    JobConf job = new JobConf(Plot.class);
    job.setJobName("Plot");

    job.setMapperClass(PlotMap.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setReducerClass(PlotReduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));
    job.setMapOutputKeyClass(Prism.class);
    SpatialSite.setShapeClass(job, shape.getClass());
    job.setMapOutputValueClass(shape.getClass());

    FileSystem inFs = inFile.getFileSystem(job);
    Prism fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false);
    FileStatus inFileStatus = inFs.getFileStatus(inFile);

    CellInfo[] cellInfos;//  www . j  a v  a  2s . c  om
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile);
    if (gindex == null) {
        // A heap file. The map function should partition the file
        GridInfo gridInfo = new GridInfo(fileMbr.t1, fileMbr.x1, fileMbr.y1, fileMbr.t2, fileMbr.x2,
                fileMbr.y2);
        gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize());
        cellInfos = gridInfo.getAllCells();
        // Doesn't make sense to show any partition information in a heap file
        showBorders = showBlockCount = showRecordCount = false;
    } else {
        cellInfos = SpatialSite.cellsOf(inFs, inFile);
    }

    // Set cell information in the job configuration to be used by the mapper
    SpatialSite.setCells(job, cellInfos);

    // Adjust width and height to maintain aspect ratio
    if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) {
        // Fix width and change height
        height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1));
    } else {
        width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1));
    }
    LOG.info("Creating an image of size " + width + "x" + height);
    ImageOutputFormat.setFileMBR(job, fileMbr);
    ImageOutputFormat.setImageWidth(job, width);
    ImageOutputFormat.setImageHeight(job, height);
    job.setBoolean(ShowBorders, showBorders);
    job.setBoolean(ShowBlockCount, showBlockCount);
    job.setBoolean(ShowRecordCount, showRecordCount);
    job.setInt(StrokeColor, color.getRGB());

    // Set input and output
    job.setInputFormat(ShapeInputFormat.class);
    ShapeInputFormat.addInputPath(job, inFile);
    // Set output committer which will stitch images together after all reducers
    // finish
    job.setOutputCommitter(PlotOutputCommitter.class);

    job.setOutputFormat(ImageOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outFile);

    if (background) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
    } else {
        lastSubmittedJob = JobClient.runJob(job);
    }
}

From source file:com.ricemap.spateDB.operations.Plot.java

License:Apache License

public static <S extends Shape> void plotLocal(Path inFile, Path outFile, S shape, int width, int height,
        Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount) throws IOException {
    FileSystem inFs = inFile.getFileSystem(new Configuration());
    Prism fileMbr = FileMBR.fileMBRLocal(inFs, inFile, shape);
    LOG.info("FileMBR: " + fileMbr);

    // Adjust width and height to maintain aspect ratio
    if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) {
        // Fix width and change height
        height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1));
    } else {// ww w  .j ava2 s  .c o m
        width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1));
    }

    double scale2 = (double) width * height / ((double) (fileMbr.x2 - fileMbr.x1) * (fileMbr.y2 - fileMbr.y1));

    // Create an image
    BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
    Graphics2D graphics = image.createGraphics();
    Color bg_color = new Color(0, 0, 0, 0);
    graphics.setBackground(bg_color);
    graphics.clearRect(0, 0, width, height);
    graphics.setColor(color);

    long fileLength = inFs.getFileStatus(inFile).getLen();
    ShapeRecordReader<S> reader = new ShapeRecordReader<S>(inFs.open(inFile), 0, fileLength);

    Prism cell = reader.createKey();
    while (reader.next(cell, shape)) {
        drawShape(graphics, shape, fileMbr, width, height, scale2);
    }

    reader.close();
    graphics.dispose();
    FileSystem outFs = outFile.getFileSystem(new Configuration());
    OutputStream out = outFs.create(outFile, true);
    ImageIO.write(image, "png", out);
    out.close();
}

From source file:com.ricemap.spateDB.operations.Plot.java

License:Apache License

public static <S extends Shape> void plot(Path inFile, Path outFile, S shape, int width, int height,
        Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount) throws IOException {
    FileSystem inFs = inFile.getFileSystem(new Configuration());
    FileStatus inFStatus = inFs.getFileStatus(inFile);
    if (inFStatus.isDir() || inFStatus.getLen() > 300 * inFStatus.getBlockSize()) {
        plotMapReduce(inFile, outFile, shape, width, height, color, showBorders, showBlockCount,
                showRecordCount, false);
    } else {/* w ww  .ja  v a 2 s  .co  m*/
        plotLocal(inFile, outFile, shape, width, height, color, showBorders, showBlockCount, showRecordCount);
    }
}

From source file:com.ricemap.spateDB.operations.Plot.java

License:Apache License

/**
 * Combines images of different datasets into one image that is displayed
 * to users.//from   www . ja  v a 2 s.c  o  m
 * @param fs The file system that contains the datasets and images
 * @param files Paths to directories which contains the datasets
 * @param includeBoundaries Also plot the indexing boundaries of datasets
 * @return An image that is the combination of all datasets images
 * @throws IOException
 */
public static BufferedImage combineImages(Configuration conf, Path[] files, boolean includeBoundaries,
        int width, int height) throws IOException {
    BufferedImage result = null;
    // Retrieve the MBRs of all datasets
    Prism allMbr = new Prism(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE,
            -Double.MAX_VALUE, -Double.MAX_VALUE);
    for (Path file : files) {
        FileSystem fs = file.getFileSystem(conf);
        Prism mbr = FileMBR.fileMBR(fs, file, null);
        allMbr.expand(mbr);
    }

    // Adjust width and height to maintain aspect ratio
    if ((allMbr.x2 - allMbr.x1) / (allMbr.y2 - allMbr.y1) > (double) width / height) {
        // Fix width and change height
        height = (int) ((allMbr.y2 - allMbr.y1) * width / (allMbr.x2 - allMbr.x1));
    } else {
        width = (int) ((allMbr.x2 - allMbr.x1) * height / (allMbr.y2 - allMbr.y1));
    }
    result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);

    for (Path file : files) {
        FileSystem fs = file.getFileSystem(conf);
        if (fs.getFileStatus(file).isDir()) {
            // Retrieve the MBR of this dataset
            Prism mbr = FileMBR.fileMBR(fs, file, null);
            // Compute the coordinates of this image in the whole picture
            mbr.x1 = (mbr.x1 - allMbr.x1) * width / allMbr.getWidth();
            mbr.x2 = (mbr.x2 - allMbr.x1) * width / allMbr.getWidth();
            mbr.y1 = (mbr.y1 - allMbr.y1) * height / allMbr.getHeight();
            mbr.y2 = (mbr.y2 - allMbr.y1) * height / allMbr.getHeight();
            // Retrieve the image of this dataset
            Path imagePath = new Path(file, "_data.png");
            if (!fs.exists(imagePath))
                throw new RuntimeException("Image " + imagePath + " not ready");
            FSDataInputStream imageFile = fs.open(imagePath);
            BufferedImage image = ImageIO.read(imageFile);
            imageFile.close();
            // Draw the image
            Graphics graphics = result.getGraphics();
            graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(), (int) mbr.getHeight(),
                    null);
            graphics.dispose();

            if (includeBoundaries) {
                // Plot also the image of the boundaries
                // Retrieve the image of the dataset boundaries
                imagePath = new Path(file, "_partitions.png");
                if (fs.exists(imagePath)) {
                    imageFile = fs.open(imagePath);
                    image = ImageIO.read(imageFile);
                    imageFile.close();
                    // Draw the image
                    graphics = result.getGraphics();
                    graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(),
                            (int) mbr.getHeight(), null);
                    graphics.dispose();
                }
            }
        }
    }

    return result;
}

From source file:com.ricemap.spateDB.operations.RangeQuery.java

License:Apache License

/**
 * Runs a range query on the local machine by iterating over the whole file.
 * //ww w .j ava2 s.co  m
 * @param fs
 *            - FileSystem that contains input file
 * @param file
 *            - path to the input file
 * @param queryRange
 *            - The range to look in
 * @param shape
 *            - An instance of the shape stored in file
 * @param output
 *            - Output is sent to this collector. If <code>null</code>,
 *            output is not collected and only the number of results is
 *            returned.
 * @return number of results found
 * @throws IOException
 */
public static <S extends Shape> long rangeQueryLocal(FileSystem fs, Path file, Shape queryRange, S shape,
        ResultCollector<S> output) throws IOException {
    long file_size = fs.getFileStatus(file).getLen();
    ShapeRecordReader<S> shapeReader = new ShapeRecordReader<S>(fs.open(file), 0, file_size);

    long resultCount = 0;
    Prism cell = shapeReader.createKey();

    while (shapeReader.next(cell, shape)) {
        if (shape.isIntersected(queryRange)) {
            boolean report_result;
            if (cell.isValid()) {
                // Check for duplicate avoidance
                Prism intersection_mbr = queryRange.getMBR().getIntersection(shape.getMBR());
                report_result = cell.contains(intersection_mbr.t1, intersection_mbr.x1, intersection_mbr.y1);
            } else {
                report_result = true;
            }
            if (report_result) {
                resultCount++;
                if (output != null) {
                    output.collect(shape);
                }
            }
        }
    }
    shapeReader.close();
    return resultCount;
}

From source file:com.ricemap.spateDB.operations.RecordCount.java

License:Apache License

/**
 * Counts the approximate number of lines in a file by getting an approximate
 * average line length//from w w w. j a  v  a  2 s. c  o m
 * @param fs
 * @param file
 * @return
 * @throws IOException
 */
public static <T> long recordCountApprox(FileSystem fs, Path file) throws IOException {
    final long fileSize = fs.getFileStatus(file).getLen();
    final FSDataInputStream in = fs.open(file);

    Estimator<Long> lineEstimator = new Estimator<Long>(0.05);
    lineEstimator.setRandomSample(new Estimator.RandomSample() {

        @Override
        public double next() {
            int lineLength = 0;
            try {
                long randomFilePosition = (long) (Math.random() * fileSize);
                in.seek(randomFilePosition);

                // Skip the rest of this line
                byte lastReadByte;
                do {
                    lastReadByte = in.readByte();
                } while (lastReadByte != '\n' && lastReadByte != '\r');

                while (in.getPos() < fileSize - 1) {
                    lastReadByte = in.readByte();
                    if (lastReadByte == '\n' || lastReadByte == '\r') {
                        break;
                    }
                    lineLength++;
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            return lineLength + 1;
        }
    });

    lineEstimator.setUserFunction(new Estimator.UserFunction<Long>() {
        @Override
        public Long calculate(double x) {
            return (long) (fileSize / x);
        }
    });

    lineEstimator.setQualityControl(new Estimator.QualityControl<Long>() {

        @Override
        public boolean isAcceptable(Long y1, Long y2) {
            return (double) Math.abs(y2 - y1) / Math.min(y1, y2) < 0.01;
        }
    });

    Estimator.Range<Long> lineCount = lineEstimator.getEstimate();
    in.close();

    return (lineCount.limit1 + lineCount.limit2) / 2;
}