Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.redsqirl.workflow.server.OozieManager.java

License:Open Source License

/**
 * Clean the directory where the Job details are stored
 * /*from  w  w w . ja v a2 s.  com*/
 * @param nameWf
 * @throws RemoteException
 */
public void cleanJobDirectory(final String nameWf) throws RemoteException {
    Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs());
    FileSystem fs = null;
    int numberToKeep = WorkflowPrefManager.getNbOozieDirToKeep();
    try {
        fs = NameNodeVar.getFS();
        FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() {

            @Override
            public boolean accept(Path arg0) {
                return arg0.getName().startsWith(nameWf + "_");
            }
        });
        Arrays.sort(children, 0, children.length, new Comparator<FileStatus>() {

            @Override
            public int compare(FileStatus arg0, FileStatus arg1) {
                return (int) ((arg0.getModificationTime() - arg1.getModificationTime()) / 10000);
            }
        });
        for (int i = 0; i < children.length - numberToKeep; ++i) {
            fs.delete(children[i].getPath(), true);
        }
    } catch (Exception e1) {
        logger.error(e1);
    }
}

From source file:com.redsqirl.workflow.server.Workflow.java

License:Open Source License

/**
 * Clean the backup directory//from w ww.  jav  a  2  s  .  co  m
 * 
 * @throws IOException
 */
public void cleanUpBackup() throws IOException {
    String path = WorkflowPrefManager.getBackupPath();
    int nbBackup = WorkflowPrefManager.getNbBackup();

    FileSystem fs = NameNodeVar.getFS();
    // FileStatus stat = fs.getFileStatus(new Path(path));
    FileStatus[] fsA = fs.listStatus(new Path(path), new PathFilter() {

        @Override
        public boolean accept(Path arg0) {
            return arg0.getName().matches(".*[0-9]{14}(.rs|.srs)$");
        }
    });
    logger.debug("Backup directory: " + fsA.length + " files, " + nbBackup + " to keep, "
            + Math.max(0, fsA.length - nbBackup) + " to remove");
    if (fsA.length > nbBackup) {
        int numberToRemove = fsA.length - nbBackup;
        Map<Path, Long> pathToRemove = new HashMap<Path, Long>();
        Path pathMin = null;
        Long min = Long.MAX_VALUE;
        for (FileStatus stat : fsA) {
            if (pathToRemove.size() < numberToRemove) {
                pathToRemove.put(stat.getPath(), stat.getModificationTime());
            } else if (min > stat.getModificationTime()) {
                pathToRemove.remove(pathMin);
                pathToRemove.put(stat.getPath(), stat.getModificationTime());
            }
            if (min > stat.getModificationTime()) {
                min = stat.getModificationTime();
                pathMin = stat.getPath();
            }

        }
        for (Path pathDel : pathToRemove.keySet()) {
            fs.delete(pathDel, false);
        }
    }
    // fs.close();
}

From source file:com.redsqirl.workflow.server.Workflow.java

License:Open Source License

/**
 * Backup the workflow/*from  w ww.  j a  va  2s .c o m*/
 * 
 * @throws RemoteException
 */
public String backup() throws RemoteException {
    String path = getBackupName(createBackupDir());
    boolean save_swp = isSaved();
    logger.debug("back up path " + path);
    String error = save(path);
    saved = save_swp;
    try {
        if (error != null) {
            logger.warn("Fail to back up: " + error);
            FileSystem fs = NameNodeVar.getFS();
            fs.delete(new Path(path), false);
        }
        logger.debug("Clean up back up");
        cleanUpBackup();
    } catch (Exception e) {
        logger.warn(e.getMessage());
        logger.warn("Failed cleaning up backup directory");
    }

    return path;
}

From source file:com.revolutionanalytics.hadoop.hdfs.FileUtils.java

License:Apache License

private static void delete(Configuration cfg, FileSystem srcFS, Path src, boolean recursive)
        throws IOException {
    Trash trashTmp = new Trash(srcFS, cfg);
    if (trashTmp.moveToTrash(src)) {
        System.out.println("Moved to trash: " + src);
        return;/*  www  .  j a  v a2s.co m*/
    }
    if (srcFS.delete(src, true)) {
        System.out.println("Deleted " + src);
    } else {
        if (!srcFS.exists(src)) {
            throw new FileNotFoundException("cannot remove " + src + ": No such file or directory.");
        }
        throw new IOException("Delete failed " + src);
    }
}

From source file:com.ricemap.spateDB.operations.FileMBR.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing//from w ww.j a va2  s . c o  m
 * @param conf
 * @param fs
 * @param file
 * @return
 * @throws IOException 
 */
public static <S extends Shape> Prism fileMBRMapReduce(FileSystem fs, Path file, S stockShape,
        boolean background) throws IOException {
    // Quickly get file MBR if it is globally indexed
    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(fs, file);
    if (globalIndex != null) {
        // Return the MBR of the global index.
        // Compute file size by adding up sizes of all files assuming they are
        // not compressed
        long totalLength = 0;
        for (Partition p : globalIndex) {
            Path filePath = new Path(file, p.filename);
            if (fs.exists(filePath))
                totalLength += fs.getFileStatus(filePath).getLen();
        }
        sizeOfLastProcessedFile = totalLength;
        return globalIndex.getMBR();
    }
    JobConf job = new JobConf(FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(file.toUri().getPath() + ".mbr_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("FileMBR");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Prism.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Reduce.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeInputFormat.class);
    SpatialSite.setShapeClass(job, stockShape.getClass());
    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, file);
    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputCommitter(MBROutputCommitter.class);

    // Submit the job
    if (background) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
        return null;
    } else {
        lastSubmittedJob = JobClient.runJob(job);
        Counters counters = lastSubmittedJob.getCounters();
        Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
        FileMBR.sizeOfLastProcessedFile = inputBytesCounter.getValue();

        // Read job result
        FileStatus[] results = outFs.listStatus(outputPath);
        Prism mbr = new Prism();
        for (FileStatus fileStatus : results) {
            if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
                LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
                Text text = new Text();
                if (lineReader.readLine(text) > 0) {
                    mbr.fromText(text);
                }
                lineReader.close();
            }
        }

        outFs.delete(outputPath, true);

        return mbr;
    }
}

From source file:com.ricemap.spateDB.operations.LineRandomizer.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing//from  ww  w . j  a v a 2  s . com
 * @param conf
 * @param infs
 * @param infile
 * @return
 * @throws IOException 
 */
public static void randomizerMapReduce(Path infile, Path outfile, boolean overwrite) throws IOException {
    JobConf job = new JobConf(LineRandomizer.class);

    FileSystem outfs = outfile.getFileSystem(job);

    if (overwrite)
        outfs.delete(outfile, true);

    job.setJobName("Randomizer");
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    FileSystem infs = infile.getFileSystem(job);
    int numOfPartitions = (int) Math
            .ceil((double) infs.getFileStatus(infile).getLen() / infs.getDefaultBlockSize(outfile));
    job.setInt(NumOfPartitions, numOfPartitions);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, infile);

    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outfile);

    // Submit the job
    JobClient.runJob(job);
}

From source file:com.ricemap.spateDB.operations.Plot.java

License:Apache License

/**
 * @param args//from  w  ww  .  j  a v  a  2s  . co  m
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    System.setProperty("java.awt.headless", "true");
    CommandLineArguments cla = new CommandLineArguments(args);
    JobConf conf = new JobConf(Plot.class);
    Path[] files = cla.getPaths();
    if (files.length < 2) {
        printUsage();
        throw new RuntimeException("Illegal arguments. File names missing");
    }

    Path inFile = files[0];
    FileSystem inFs = inFile.getFileSystem(conf);
    if (!inFs.exists(inFile)) {
        printUsage();
        throw new RuntimeException("Input file does not exist");
    }

    boolean overwrite = cla.isOverwrite();
    Path outFile = files[1];
    FileSystem outFs = outFile.getFileSystem(conf);
    if (outFs.exists(outFile)) {
        if (overwrite)
            outFs.delete(outFile, true);
        else
            throw new RuntimeException("Output file exists and overwrite flag is not set");
    }

    boolean showBorders = cla.is("borders");
    boolean showBlockCount = cla.is("showblockcount");
    boolean showRecordCount = cla.is("showrecordcount");
    Shape shape = cla.getShape(true);

    int width = cla.getWidth(1000);
    int height = cla.getHeight(1000);

    Color color = cla.getColor();

    plot(inFile, outFile, shape, width, height, color, showBorders, showBlockCount, showRecordCount);

    System.out.println("Values range: [" + min_value + "," + max_value + "]");
}

From source file:com.ricemap.spateDB.operations.RangeQuery.java

License:Apache License

/**
 * Performs a range query using MapReduce
 * /*from w ww.  j  ava  2  s .c  o  m*/
 * @param fs
 * @param inputFile
 * @param queryRange
 * @param shape
 * @param output
 * @return
 * @throws IOException
 */
public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape,
        Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    FileSystem outFs = inputFile.getFileSystem(job);
    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(
                    inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000));
        } while (outFs.exists(outputPath));
    } else {
        if (outFs.exists(outputPath)) {
            if (overwrite) {
                outFs.delete(outputPath, true);
            } else {
                throw new RuntimeException("Output path already exists and -overwrite flag is not set");
            }
        }
    }

    job.setJobName("RangeQuery");
    job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    RangeFilter.setQueryRange(job, queryShape); // Set query range for
    // filter

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(3);

    // Decide which map function to use depending on how blocks are indexed
    // And also which input format to use
    if (SpatialSite.isRTree(fs, inputFile)) {
        // RTree indexed file
        LOG.info("Searching an RTree indexed file");
        job.setInputFormat(RTreeInputFormat.class);
    } else {
        // A file with no local index
        LOG.info("Searching a non local-indexed file");
        job.setInputFormat(ShapeInputFormat.class);
    }

    GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile);
    // if (gIndex != null && gIndex.isReplicated()){
    // job.setMapperClass(RangeQueryMap.class);

    Class<?> OutputKey = NullWritable.class;
    try {
        Class<?> c = shape.getClass();
        Field f = c.getDeclaredField(query.field);
        f.setAccessible(true);
        if (f.getType().equals(Integer.TYPE)) {
            OutputKey = IntWritable.class;
        } else if (f.getType().equals(Double.TYPE)) {
            OutputKey = DoubleWritable.class;
        } else if (f.getType().equals(Long.TYPE)) {
            OutputKey = LongWritable.class;
        }
    } catch (SecurityException e) {
        e.printStackTrace();
    } catch (NoSuchFieldException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    job.setMapOutputKeyClass(OutputKey);
    switch (query.type) {
    case Distinct:
        job.setMapperClass(DistinctQueryMap.class);
        job.setReducerClass(DistinctQueryReduce.class);
        job.setMapOutputValueClass(NullWritable.class);
        break;
    case Distribution:
        job.setMapperClass(DistributionQueryMap.class);
        job.setReducerClass(DistributionQueryReduce.class);
        job.setMapOutputValueClass(IntWritable.class);
        break;
    default:
        break;
    }
    // }
    // else
    // job.setMapperClass(RangeQueryMapNoDupAvoidance.class);

    // Set query range for the map function
    job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName());
    job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString());
    job.set(QUERY_FIELD, query.field);

    // Set shape class for the SpatialInputFormat
    SpatialSite.setShapeClass(job, shape.getClass());

    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, inputFile);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    if (!background) {
        RunningJob runningJob = JobClient.runJob(job);
        Counters counters = runningJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        final long resultCount = outputRecordCounter.getValue();

        // If outputPath not set by user, automatically delete it
        if (userOutputPath == null)
            outFs.delete(outputPath, true);

        return resultCount;
    } else {
        JobClient jc = new JobClient(job);
        lastRunningJob = jc.submitJob(job);
        return -1;
    }
}

From source file:com.ricemap.spateDB.operations.RecordCount.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing/*from  w  w w.  j  av a2 s.  c o  m*/
 * @param conf
 * @param fs
 * @param file
 * @return
 * @throws IOException 
 */
public static long recordCountMapReduce(FileSystem fs, Path file) throws IOException {
    JobConf job = new JobConf(RecordCount.class);

    Path outputPath = new Path(file.toUri().getPath() + ".linecount");
    FileSystem outFs = outputPath.getFileSystem(job);
    outFs.delete(outputPath, true);

    job.setJobName("LineCount");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Reduce.class);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(1);

    job.setInputFormat(ShapeLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    ShapeLineInputFormat.setInputPaths(job, file);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    JobClient.runJob(job);

    // Read job result
    long lineCount = 0;
    FileStatus[] results = outFs.listStatus(outputPath);
    for (FileStatus fileStatus : results) {
        if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
            LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
            Text text = new Text();
            if (lineReader.readLine(text) > 0) {
                lineCount = Long.parseLong(text.toString());
            }
            lineReader.close();
        }
    }

    outFs.delete(outputPath, true);

    return lineCount;
}

From source file:com.ricemap.spateDB.operations.Repartition.java

License:Apache License

/**
 * Repartitions an input file according to the given list of cells.
 * @param inFile/*from www . j a  va2 s.c  o  m*/
 * @param outPath
 * @param cellInfos
 * @param pack
 * @param rtree
 * @param overwrite
 * @throws IOException
 */
public static void repartitionMapReduce(Path inFile, Path outPath, Shape stockShape, long blockSize,
        CellInfo[] cellInfos, String sindex, boolean overwrite, boolean columnar) throws IOException {
    JobConf job = new JobConf(Repartition.class);
    job.setJobName("Repartition");
    FileSystem outFs = outPath.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(outPath)) {
        if (overwrite)
            outFs.delete(outPath, true);
        else
            throw new RuntimeException(
                    "Output file '" + outPath + "' already exists and overwrite flag is not set");
    }

    // Decide which map function to use depending on the type of global index
    if (sindex.equals("rtree")) {
        // Repartition without replication
        job.setMapperClass(RepartitionMapNoReplication.class);
    } else {
        // Repartition with replication (grid and r+tree)
        job.setMapperClass(RepartitionMap.class);
    }
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(stockShape.getClass());
    ShapeInputFormat.setInputPaths(job, inFile);
    job.setInputFormat(ShapeInputFormat.class);
    boolean pack = sindex.equals("r+tree");
    boolean expand = sindex.equals("rtree");
    job.setBoolean(SpatialSite.PACK_CELLS, pack);
    job.setBoolean(SpatialSite.EXPAND_CELLS, expand);
    job.setStrings(SpatialSite.STORAGE_MODE, columnar ? "columnar" : "normal");

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    // Set default parameters for reading input file
    SpatialSite.setShapeClass(job, stockShape.getClass());

    FileOutputFormat.setOutputPath(job, outPath);
    if (sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        // For now, the two types of local index are the same
        job.setOutputFormat(RTreeGridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }
    // Copy block size from source file if it's globally indexed
    FileSystem inFs = inFile.getFileSystem(job);

    if (blockSize == 0) {
        GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inFile);
        if (globalIndex != null) {
            blockSize = inFs.getFileStatus(new Path(inFile, globalIndex.iterator().next().filename))
                    .getBlockSize();
            LOG.info("Automatically setting block size to " + blockSize);
        }
    }

    if (blockSize != 0)
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blockSize);
    SpatialSite.setCells(job, cellInfos);
    job.setBoolean(SpatialSite.OVERWRITE, overwrite);

    // Set reduce function
    job.setReducerClass(RepartitionReduce.class);
    job.setNumReduceTasks(
            Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));

    // Set output committer that combines output files together
    job.setOutputCommitter(RepartitionOutputCommitter.class);

    JobClient.runJob(job);
}