Example usage for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException

Source Link

Document

True iff the named path is a directory.

Usage

From source file:edu.ucsb.cs.partitioning.statistics.CollectorBaraglia.java

License:Apache License

public static long countFileVectors(FileSystem fs, Path inputFile, JobConf job) throws IOException {
    long nDocuments = 0;
    LongWritable key = new LongWritable();
    FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();

    if (fs.isDirectory(inputFile))
        return 0;
    SequenceFile.Reader in = new SequenceFile.Reader(fs, inputFile, job);
    while (in.next(key, value))
        nDocuments++;//from   ww  w .  ja  va 2  s  . co m
    in.close();
    return nDocuments;
}

From source file:edu.ucsb.cs.partitioning.statistics.DistributionPlotter.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 4)
        printUsage();//  w w  w  . j av a 2 s  .  c o m

    input = args[0];
    output = args[1];
    range = Float.parseFloat(args[2]);
    p = Float.parseFloat(args[3]);

    Configuration conf = new Configuration();
    Path inputPath = new Path(input);
    FileSystem hdfs = inputPath.getFileSystem(conf);
    int lineCount = 0;
    double avg = 0, variance = 0;
    ArrayList<Float> pnorms = new ArrayList<Float>();
    Reader reader = null;

    if ((!hdfs.exists(inputPath)) || (!hdfs.isDirectory(inputPath)))
        printUsage();

    FileStatus[] files = setFiles(hdfs, inputPath);
    for (int i = 0; i < files.length; i++) {
        inputPath = files[i].getPath();
        if (hdfs.isDirectory(inputPath) || inputPath.getName().startsWith("_"))
            continue;
        System.out.println("Reading file " + inputPath.getName()); // remove
        reader = new SequenceFile.Reader(hdfs, inputPath, conf);

        LongWritable key = new LongWritable();
        FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();

        while (reader.next(key, value)) {
            float x = value.getPNorm(p);
            avg += x;
            pnorms.add(x);
            int pNorm = findRange(x);
            if (max < pNorm)
                max = pNorm;
            int bar = pNorm;
            if (historgram.containsKey(bar))
                historgram.put(bar, historgram.get(bar) + 1);
            else
                historgram.put(bar, 1);
            lineCount++;
        }
        reader.close();
    }
    avg /= lineCount;
    for (int i = 0; i < pnorms.size(); i++)
        variance += Math.pow(pnorms.get(i) - avg, 2);
    variance /= (lineCount - 1);
    writeHistorgramToFile(output, avg, variance);
    System.out.println(lineCount + " vectors are processed. ");
}

From source file:edu.ucsb.cs.partitioning.statistics.rsd.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length != 2) {
        System.out.println("Usage:<input directory of (longWritable,FeatureWeightArrayWritable)> <p-norm>");
        return;/*from  w  ww .  ja v  a 2s .co  m*/
    }
    Configuration conf = new Configuration();
    Path inputPath = new Path(args[0]);
    FileSystem hdfs = inputPath.getFileSystem(conf);
    int lineCount = 0, pnorm = Integer.parseInt(args[1]);
    ArrayList<Float> pnorms = null, norm1 = null;
    Reader reader = null;

    if (!hdfs.exists(inputPath) || hdfs.isFile(inputPath)) {
        System.out.println("\n Input doesn't exists or is not a directory!");
        return;
    }

    FileStatus[] files = setFiles(hdfs, inputPath);
    for (int i = 0; i < files.length; i++) {
        inputPath = files[i].getPath();
        if (hdfs.isDirectory(inputPath) || inputPath.getName().startsWith("_"))
            continue;
        System.out.println("Reading file " + inputPath.getName()); // remove
        reader = new SequenceFile.Reader(hdfs, inputPath, conf);

        LongWritable key = new LongWritable();
        FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();

        pnorms = new ArrayList<Float>();

        while (reader.next(key, value)) {
            pnorms.add(value.getPNorm(pnorm));
            lineCount++;
        }
    }
    float pnormrstd = getRStd(pnorms);

    System.out
            .println("\nInput has " + lineCount + " records.\n" + pnorm + "-Norm %-RSD = " + (pnormrstd * 100));
    reader.close();
}

From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java

License:Open Source License

private static void plotLocal(Path[] inFiles, final Path outPath, final Class<? extends Plotter> plotterClass,
        final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    final boolean vflip = params.getBoolean("vflip", true);

    OperationsParams mbrParams = new OperationsParams(params);
    mbrParams.setBoolean("background", false);
    final Rectangle inputMBR = params.get("mbr") != null ? params.getShape("mbr").getMBR()
            : FileMBR.fileMBR(inFiles, mbrParams);
    OperationsParams.setShape(params, InputMBR, inputMBR);

    // Retrieve desired output image size and keep aspect ratio if needed
    int tileWidth = params.getInt("tilewidth", 256);
    int tileHeight = params.getInt("tileheight", 256);
    // Adjust width and height if aspect ratio is to be kept
    if (params.getBoolean("keepratio", true)) {
        // Expand input file to a rectangle for compatibility with the pyramid
        // structure
        if (inputMBR.getWidth() > inputMBR.getHeight()) {
            inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2;
            inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth();
        } else {/*from w  w  w . java 2s .c  o m*/
            inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2;
            inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight();
        }
    }

    String outFName = outPath.getName();
    int extensionStart = outFName.lastIndexOf('.');
    final String extension = extensionStart == -1 ? ".png" : outFName.substring(extensionStart);

    // Start reading input file
    Vector<InputSplit> splits = new Vector<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (Path inFile : inFiles) {
        FileSystem inFs = inFile.getFileSystem(params);
        if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) {
            if (SpatialSite.NonHiddenFileFilter.accept(inFile)) {
                // Use the normal input format splitter to add this non-hidden file
                Job job = Job.getInstance(params);
                SpatialInputFormat3.addInputPath(job, inFile);
                splits.addAll(inputFormat.getSplits(job));
            } else {
                // A hidden file, add it immediately as one split
                // This is useful if the input is a hidden file which is automatically
                // skipped by FileInputFormat. We need to plot a hidden file for the case
                // of plotting partition boundaries of a spatial index
                splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0]));
            }
        } else {
            Job job = Job.getInstance(params);
            SpatialInputFormat3.addInputPath(job, inFile);
            splits.addAll(inputFormat.getSplits(job));
        }
    }

    try {
        Plotter plotter = plotterClass.newInstance();
        plotter.configure(params);

        String[] strLevels = params.get("levels", "7").split("\\.\\.");
        int minLevel, maxLevel;
        if (strLevels.length == 1) {
            minLevel = 0;
            maxLevel = Integer.parseInt(strLevels[0]);
        } else {
            minLevel = Integer.parseInt(strLevels[0]);
            maxLevel = Integer.parseInt(strLevels[1]);
        }

        GridInfo bottomGrid = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2);
        bottomGrid.rows = bottomGrid.columns = 1 << maxLevel;

        TileIndex key = new TileIndex();

        // All canvases in the pyramid, one per tile
        Map<TileIndex, Canvas> canvases = new HashMap<TileIndex, Canvas>();
        for (InputSplit split : splits) {
            FileSplit fsplit = (FileSplit) split;
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }

            while (reader.nextKeyValue()) {
                Rectangle partition = reader.getCurrentKey();
                if (!partition.isValid())
                    partition.set(inputMBR);

                Iterable<Shape> shapes = reader.getCurrentValue();

                for (Shape shape : shapes) {
                    Rectangle shapeMBR = shape.getMBR();
                    if (shapeMBR == null)
                        continue;
                    java.awt.Rectangle overlappingCells = bottomGrid.getOverlappingCells(shapeMBR);
                    // Iterate over levels from bottom up
                    for (key.level = maxLevel; key.level >= minLevel; key.level--) {
                        for (key.x = overlappingCells.x; key.x < overlappingCells.x
                                + overlappingCells.width; key.x++) {
                            for (key.y = overlappingCells.y; key.y < overlappingCells.y
                                    + overlappingCells.height; key.y++) {
                                Canvas canvas = canvases.get(key);
                                if (canvas == null) {
                                    Rectangle tileMBR = new Rectangle();
                                    int gridSize = 1 << key.level;
                                    tileMBR.x1 = (inputMBR.x1 * (gridSize - key.x) + inputMBR.x2 * key.x)
                                            / gridSize;
                                    tileMBR.x2 = (inputMBR.x1 * (gridSize - (key.x + 1))
                                            + inputMBR.x2 * (key.x + 1)) / gridSize;
                                    tileMBR.y1 = (inputMBR.y1 * (gridSize - key.y) + inputMBR.y2 * key.y)
                                            / gridSize;
                                    tileMBR.y2 = (inputMBR.y1 * (gridSize - (key.y + 1))
                                            + inputMBR.y2 * (key.y + 1)) / gridSize;
                                    canvas = plotter.createCanvas(tileWidth, tileHeight, tileMBR);
                                    canvases.put(key.clone(), canvas);
                                }
                                plotter.plot(canvas, shape);
                            }
                        }
                        // Update overlappingCells for the higher level
                        int updatedX1 = overlappingCells.x / 2;
                        int updatedY1 = overlappingCells.y / 2;
                        int updatedX2 = (overlappingCells.x + overlappingCells.width - 1) / 2;
                        int updatedY2 = (overlappingCells.y + overlappingCells.height - 1) / 2;
                        overlappingCells.x = updatedX1;
                        overlappingCells.y = updatedY1;
                        overlappingCells.width = updatedX2 - updatedX1 + 1;
                        overlappingCells.height = updatedY2 - updatedY1 + 1;
                    }
                }
            }
            reader.close();
        }

        // Done with all splits. Write output to disk
        LOG.info("Done with plotting. Now writing the output");
        final FileSystem outFS = outPath.getFileSystem(params);

        LOG.info("Writing default empty image");
        // Write a default empty image to be displayed for non-generated tiles
        BufferedImage emptyImg = new BufferedImage(tileWidth, tileHeight, BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = new SimpleGraphics(emptyImg);
        g.setBackground(new Color(0, 0, 0, 0));
        g.clearRect(0, 0, tileWidth, tileHeight);
        g.dispose();

        // Write HTML file to browse the mutlielvel image
        OutputStream out = outFS.create(new Path(outPath, "default.png"));
        ImageIO.write(emptyImg, "png", out);
        out.close();

        // Add an HTML file that visualizes the result using Google Maps
        LOG.info("Writing the HTML viewer file");
        LineReader templateFileReader = new LineReader(
                MultilevelPlot.class.getResourceAsStream("/zoom_view.html"));
        PrintStream htmlOut = new PrintStream(outFS.create(new Path(outPath, "index.html")));
        Text line = new Text();
        while (templateFileReader.readLine(line) > 0) {
            String lineStr = line.toString();
            lineStr = lineStr.replace("#{TILE_WIDTH}", Integer.toString(tileWidth));
            lineStr = lineStr.replace("#{TILE_HEIGHT}", Integer.toString(tileHeight));
            lineStr = lineStr.replace("#{MAX_ZOOM}", Integer.toString(maxLevel));
            lineStr = lineStr.replace("#{MIN_ZOOM}", Integer.toString(minLevel));
            lineStr = lineStr.replace("#{TILE_URL}",
                    "'tile-' + zoom + '-' + coord.x + '-' + coord.y + '" + extension + "'");

            htmlOut.println(lineStr);
        }
        templateFileReader.close();
        htmlOut.close();

        // Write the tiles
        final Entry<TileIndex, Canvas>[] entries = canvases.entrySet().toArray(new Map.Entry[canvases.size()]);
        // Clear the hash map to save memory as it is no longer needed
        canvases.clear();
        int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());
        Parallel.forEach(entries.length, new RunnableRange<Object>() {
            @Override
            public Object run(int i1, int i2) {
                boolean output = params.getBoolean("output", true);
                try {
                    Plotter plotter = plotterClass.newInstance();
                    plotter.configure(params);
                    for (int i = i1; i < i2; i++) {
                        Map.Entry<TileIndex, Canvas> entry = entries[i];
                        TileIndex key = entry.getKey();
                        if (vflip)
                            key.y = ((1 << key.level) - 1) - key.y;

                        Path imagePath = new Path(outPath, key.getImageFileName() + extension);
                        // Write this tile to an image
                        DataOutputStream outFile = output ? outFS.create(imagePath)
                                : new DataOutputStream(new NullOutputStream());
                        plotter.writeImage(entry.getValue(), outFile, vflip);
                        outFile.close();

                        // Remove entry to allows GC to collect it
                        entries[i] = null;
                    }
                    return null;
                } catch (InstantiationException e) {
                    e.printStackTrace();
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                return null;
            }
        }, parallelism);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    }
}

From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java

License:Open Source License

public static void plotLocal(Path[] inFiles, Path outFile, final Class<? extends Plotter> plotterClass,
        final OperationsParams params) throws IOException, InterruptedException {
    OperationsParams mbrParams = new OperationsParams(params);
    mbrParams.setBoolean("background", false);
    final Rectangle inputMBR = params.get(InputMBR) != null ? params.getShape("mbr").getMBR()
            : FileMBR.fileMBR(inFiles, mbrParams);
    if (params.get(InputMBR) == null)
        OperationsParams.setShape(params, InputMBR, inputMBR);

    // Retrieve desired output image size and keep aspect ratio if needed
    int width = params.getInt("width", 1000);
    int height = params.getInt("height", 1000);
    if (params.getBoolean("keepratio", true)) {
        // Adjust width and height to maintain aspect ratio and store the adjusted
        // values back in params in case the caller needs to retrieve them
        if (inputMBR.getWidth() / inputMBR.getHeight() > (double) width / height)
            params.setInt("height", height = (int) (inputMBR.getHeight() * width / inputMBR.getWidth()));
        else//ww w. j av a  2 s  . co m
            params.setInt("width", width = (int) (inputMBR.getWidth() * height / inputMBR.getHeight()));
    }
    // Store width and height in final variables to make them accessible in parallel
    final int fwidth = width, fheight = height;

    // Start reading input file
    List<InputSplit> splits = new ArrayList<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (Path inFile : inFiles) {
        FileSystem inFs = inFile.getFileSystem(params);
        if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) {
            if (SpatialSite.NonHiddenFileFilter.accept(inFile)) {
                // Use the normal input format splitter to add this non-hidden file
                Job job = Job.getInstance(params);
                SpatialInputFormat3.addInputPath(job, inFile);
                splits.addAll(inputFormat.getSplits(job));
            } else {
                // A hidden file, add it immediately as one split
                // This is useful if the input is a hidden file which is automatically
                // skipped by FileInputFormat. We need to plot a hidden file for the case
                // of plotting partition boundaries of a spatial index
                splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0]));
            }
        } else {
            // Use the normal input format splitter to add this non-hidden file
            Job job = Job.getInstance(params);
            SpatialInputFormat3.addInputPath(job, inFile);
            splits.addAll(inputFormat.getSplits(job));
        }
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());
    List<Canvas> partialCanvases = Parallel.forEach(fsplits.length, new RunnableRange<Canvas>() {
        @Override
        public Canvas run(int i1, int i2) {
            Plotter plotter;
            try {
                plotter = plotterClass.newInstance();
            } catch (InstantiationException e) {
                throw new RuntimeException("Error creating rastierizer", e);
            } catch (IllegalAccessException e) {
                throw new RuntimeException("Error creating rastierizer", e);
            }
            plotter.configure(params);
            // Create the partial layer that will contain the plot of the assigned partitions
            Canvas partialCanvas = plotter.createCanvas(fwidth, fheight, inputMBR);

            for (int i = i1; i < i2; i++) {
                try {
                    RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplits[i],
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplits[i], params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplits[i], params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplits[i], params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }

                    while (reader.nextKeyValue()) {
                        Rectangle partition = reader.getCurrentKey();
                        if (!partition.isValid())
                            partition.set(inputMBR);

                        Iterable<Shape> shapes = reader.getCurrentValue();
                        // Run the plot step
                        plotter.plot(partialCanvas, plotter.isSmooth() ? plotter.smooth(shapes) : shapes);
                    }
                    reader.close();
                } catch (IOException e) {
                    throw new RuntimeException("Error reading the file ", e);
                } catch (InterruptedException e) {
                    throw new RuntimeException("Interrupt error ", e);
                }
            }
            return partialCanvas;
        }
    }, parallelism);
    boolean merge = params.getBoolean("merge", true);
    Plotter plotter;
    try {
        plotter = plotterClass.newInstance();
        plotter.configure(params);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating plotter", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating plotter", e);
    }

    // Whether we should vertically flip the final image or not
    boolean vflip = params.getBoolean("vflip", true);
    if (merge) {
        LOG.info("Merging " + partialCanvases.size() + " partial canvases");
        // Create the final canvas that will contain the final image
        Canvas finalCanvas = plotter.createCanvas(fwidth, fheight, inputMBR);
        for (Canvas partialCanvas : partialCanvases)
            plotter.merge(finalCanvas, partialCanvas);

        // Finally, write the resulting image to the given output path
        LOG.info("Writing final image");
        FileSystem outFs = outFile.getFileSystem(params);
        FSDataOutputStream outputFile = outFs.create(outFile);

        plotter.writeImage(finalCanvas, outputFile, vflip);
        outputFile.close();
    } else {
        // No merge
        LOG.info("Writing partial images");
        FileSystem outFs = outFile.getFileSystem(params);
        for (int i = 0; i < partialCanvases.size(); i++) {
            Path filename = new Path(outFile, String.format("part-%05d.png", i));
            FSDataOutputStream outputFile = outFs.create(filename);

            plotter.writeImage(partialCanvases.get(i), outputFile, vflip);
            outputFile.close();
        }
    }
}

From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormat.java

License:Apache License

/**
 * Finds input file references in the control line by looking 
 * into its toolspec.//from  w ww  .ja va  2s.c  o  m
 *
 * @param fs Hadoop filesystem handle
 * @param parser for parsing the control line
 * @param repo Toolspec repository
 * @return array of paths to input file references
 */
public static Path[] getInputFiles(FileSystem fs, CmdLineParser parser, Repository repo, String controlLine)
        throws IOException {
    parser.parse(controlLine);

    Command command = parser.getCommands()[0];
    String strStdinFile = parser.getStdinFile();
    // parse it, read input file parameters
    Tool tool = repo.getTool(command.getTool());

    ToolProcessor proc = new ToolProcessor(tool);
    Operation operation = proc.findOperation(command.getAction());
    if (operation == null)
        throw new IOException("operation " + command.getAction() + " not found");

    proc.setOperation(operation);
    proc.setParameters(command.getPairs());
    Map<String, String> mapInputFileParameters = proc.getInputFileParameters();
    ArrayList<Path> inFiles = new ArrayList<Path>();
    if (strStdinFile != null) {
        Path p = new Path(strStdinFile);
        if (fs.exists(p)) {
            inFiles.add(p);
        }
    }

    for (String fileRef : mapInputFileParameters.values()) {
        Path p = new Path(fileRef);
        if (fs.exists(p)) {
            if (fs.isDirectory(p)) {
                inFiles.addAll(getFilesInDir(fs, p));
            } else {
                inFiles.add(p);
            }
        }
    }
    return inFiles.toArray(new Path[0]);
}

From source file:ezbake.azkaban.job.HdfsCleaner.java

License:Apache License

private void pruneDirectories(final FileSystem fs, Path path) throws IOException {
    // For each of the projects, filter through each of its runs
    for (FileStatus projectNumber : fs.listStatus(path)) {

        // There SHOULD only be directories, but filter just to make sure
        final FileStatus[] runDirs = fs.listStatus(projectNumber.getPath(), new PathFilter() {
            @Override/*from   w  w w.  ja  v a 2s. c  o  m*/
            public boolean accept(Path path) {
                try {
                    return fs.isDirectory(path);
                } catch (IOException e) {
                    logger.error("Error trying to filter directories", e);
                    return false;
                }
            }
        });

        // Directories are returned canonically. Though this should be good enough, let's sort the right way
        final SortedSet<Long> runSet = new TreeSet<>();
        for (FileStatus status : runDirs) {
            try {
                runSet.add(Long.valueOf(status.getPath().getName()));
            } catch (NumberFormatException ex) {
                logger.error("Directory {} is not a long and probably not a runtime dir. Skipping",
                        status.getPath().getName());
            }
        }

        // Keep the last N number of runs
        int i = 0;
        final int stopAt = runSet.size() - lastN;
        for (Long runtime : runSet) {
            if (i++ >= stopAt) {
                break;
            }

            final Path runtimeDir = new Path(projectNumber.getPath(), runtime.toString());
            logger.info("Checking dir <{}> vs ageoff of <{}>", runtimeDir, ageOffTimestamp);

            // Check if it meets the threshold for pruning and if so delete the dir
            if (runtime <= ageOffTimestamp) {
                // Check to make sure this isn't the dir of a currently running job
                try (FSDataInputStream pidStream = fs.open(new Path(runtimeDir, FrameworkDriver.STATUS_FILE))) {
                    if (FrameworkDriver.JobStatus
                            .valueOf(pidStream.readUTF()) == FrameworkDriver.JobStatus.RUNNING) {
                        logger.warn("Directory <{}> is for a currently running job.  Skipping", runtimeDir);
                        continue;
                    }
                } catch (IOException e) {
                    logger.warn("directory {} missing PID file or could not be read.  Skipping",
                            runtimeDir.getName(), e.getMessage());
                    continue;
                }

                logger.info("Removing dir: {}", runtimeDir);
                fs.delete(runtimeDir, true);
            }
        }
    }
}

From source file:gobblin.data.management.trash.Trash.java

License:Apache License

protected void ensureTrashLocationExists(FileSystem fs, Path trashLocation) throws IOException {
    if (fs.exists(trashLocation)) {
        if (!fs.isDirectory(trashLocation)) {
            throw new IOException(String.format("Trash location %s is not a directory.", trashLocation));
        }/*from   w w  w .  j  av a  2 s .co m*/

        if (!fs.exists(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) {
            // If trash identifier file is not present, directory might have been created by user.
            // Add trash identifier file only if directory is empty.
            if (fs.listStatus(trashLocation).length > 0) {
                throw new IOException(String.format(
                        "Trash directory %s exists, but it does not look like a trash directory. "
                                + "File: %s missing and directory is not empty.",
                        trashLocation, TRASH_IDENTIFIER_FILE));
            } else if (!fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) {
                throw new IOException(String.format("Failed to create file %s in existing trash directory %s.",
                        TRASH_IDENTIFIER_FILE, trashLocation));
            }
        }
    } else if (!(fs.mkdirs(trashLocation.getParent(), ALL_PERM) && fs.mkdirs(trashLocation, PERM)
            && fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE)))) {
        // Failed to create directory or create trash identifier file.
        throw new IOException("Failed to create trash directory at " + trashLocation.toString());
    }
}

From source file:gobblin.data.management.trash.TrashFactoryTest.java

License:Apache License

@Test
public void test() throws IOException {
    FileSystem fs = mock(FileSystem.class);

    Path homeDirectory = new Path("/home/directory");
    Path trashDirectory = new Path(homeDirectory, Trash.DEFAULT_TRASH_DIRECTORY);
    Path trashIdentifierFile = new Path(trashDirectory, Trash.TRASH_IDENTIFIER_FILE);

    when(fs.getHomeDirectory()).thenReturn(homeDirectory);
    when(fs.exists(trashDirectory)).thenReturn(true);
    when(fs.exists(trashIdentifierFile)).thenReturn(true);
    when(fs.listStatus(trashDirectory)).thenReturn(new FileStatus[] {});
    when(fs.isDirectory(trashDirectory)).thenReturn(true);

    when(fs.mkdirs(any(Path.class))).thenReturn(true);
    when(fs.mkdirs(any(Path.class), any(FsPermission.class))).thenReturn(true);
    when(fs.createNewFile(any(Path.class))).thenReturn(true);
    when(fs.makeQualified(any(Path.class))).thenAnswer(new Answer<Path>() {
        @Override//from   w  w  w  .ja  v a 2  s . c  om
        public Path answer(InvocationOnMock invocation) throws Throwable {
            return (Path) invocation.getArguments()[0];
        }
    });

    Properties properties;

    properties = getBaseProperties(trashDirectory);
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof Trash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof ProxiedTrash);

    properties = getBaseProperties(trashDirectory);
    properties.setProperty(TrashFactory.SIMULATE, Boolean.toString(true));
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof MockTrash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof MockTrash);

    properties = getBaseProperties(trashDirectory);
    properties.setProperty(TrashFactory.TRASH_TEST, Boolean.toString(true));
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof TestTrash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof TestTrash);

    properties = getBaseProperties(trashDirectory);
    properties.setProperty(TrashFactory.SKIP_TRASH, Boolean.toString(true));
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof ImmediateDeletionTrash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof ImmediateDeletionTrash);

}

From source file:gobblin.runtime.mapreduce.GobblinOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException {
    LOG.info("Aborting Job: " + jobContext.getJobID() + " with state: " + state);

    Configuration conf = jobContext.getConfiguration();

    URI fsUri = URI.create(conf.get(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI));
    FileSystem fs = FileSystem.get(fsUri, conf);

    Path mrJobDir = new Path(conf.get(ConfigurationKeys.MR_JOB_ROOT_DIR_KEY),
            conf.get(ConfigurationKeys.JOB_NAME_KEY));
    Path jobInputDir = new Path(mrJobDir, MRJobLauncher.INPUT_DIR_NAME);

    if (!fs.exists(jobInputDir) || !fs.isDirectory(jobInputDir)) {
        LOG.warn(String.format("%s either does not exist or is not a directory. No data to cleanup.",
                jobInputDir));/*ww  w.j  a v  a  2  s. c  o  m*/
        return;
    }

    // Iterate through all files in the jobInputDir, each file should correspond to a serialized wu or mwu
    try {
        for (FileStatus status : fs.listStatus(jobInputDir, new WorkUnitFilter())) {

            Closer workUnitFileCloser = Closer.create();

            // If the file ends with ".wu" de-serialize it into a WorkUnit
            if (status.getPath().getName().endsWith(AbstractJobLauncher.WORK_UNIT_FILE_EXTENSION)) {
                WorkUnit wu = WorkUnit.createEmpty();
                try {
                    wu.readFields(workUnitFileCloser.register(new DataInputStream(fs.open(status.getPath()))));
                } finally {
                    workUnitFileCloser.close();
                }
                JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG);
            }

            // If the file ends with ".mwu" de-serialize it into a MultiWorkUnit
            if (status.getPath().getName().endsWith(AbstractJobLauncher.MULTI_WORK_UNIT_FILE_EXTENSION)) {
                MultiWorkUnit mwu = MultiWorkUnit.createEmpty();
                try {
                    mwu.readFields(workUnitFileCloser.register(new DataInputStream(fs.open(status.getPath()))));
                } finally {
                    workUnitFileCloser.close();
                }
                for (WorkUnit wu : mwu.getWorkUnits()) {
                    JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG);
                }
            }
        }
    } finally {
        try {
            cleanUpWorkingDirectory(mrJobDir, fs);
        } finally {
            super.abortJob(jobContext, state);
        }
    }
}