Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.redsqirl.workflow.server.OozieManager.java

License:Open Source License

/**
 * Clean the directory where the Job details are stored
 * /*from  w  w w . ja v a2 s.  com*/
 * @param nameWf
 * @throws RemoteException
 */
public void cleanJobDirectory(final String nameWf) throws RemoteException {
    Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs());
    FileSystem fs = null;
    int numberToKeep = WorkflowPrefManager.getNbOozieDirToKeep();
    try {
        fs = NameNodeVar.getFS();
        FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() {

            @Override
            public boolean accept(Path arg0) {
                return arg0.getName().startsWith(nameWf + "_");
            }
        });
        Arrays.sort(children, 0, children.length, new Comparator<FileStatus>() {

            @Override
            public int compare(FileStatus arg0, FileStatus arg1) {
                return (int) ((arg0.getModificationTime() - arg1.getModificationTime()) / 10000);
            }
        });
        for (int i = 0; i < children.length - numberToKeep; ++i) {
            fs.delete(children[i].getPath(), true);
        }
    } catch (Exception e1) {
        logger.error(e1);
    }
}

From source file:com.redsqirl.workflow.server.Workflow.java

License:Open Source License

/**
 * Clean the backup directory//from w ww.  jav  a  2  s  .  co  m
 * 
 * @throws IOException
 */
public void cleanUpBackup() throws IOException {
    String path = WorkflowPrefManager.getBackupPath();
    int nbBackup = WorkflowPrefManager.getNbBackup();

    FileSystem fs = NameNodeVar.getFS();
    // FileStatus stat = fs.getFileStatus(new Path(path));
    FileStatus[] fsA = fs.listStatus(new Path(path), new PathFilter() {

        @Override
        public boolean accept(Path arg0) {
            return arg0.getName().matches(".*[0-9]{14}(.rs|.srs)$");
        }
    });
    logger.debug("Backup directory: " + fsA.length + " files, " + nbBackup + " to keep, "
            + Math.max(0, fsA.length - nbBackup) + " to remove");
    if (fsA.length > nbBackup) {
        int numberToRemove = fsA.length - nbBackup;
        Map<Path, Long> pathToRemove = new HashMap<Path, Long>();
        Path pathMin = null;
        Long min = Long.MAX_VALUE;
        for (FileStatus stat : fsA) {
            if (pathToRemove.size() < numberToRemove) {
                pathToRemove.put(stat.getPath(), stat.getModificationTime());
            } else if (min > stat.getModificationTime()) {
                pathToRemove.remove(pathMin);
                pathToRemove.put(stat.getPath(), stat.getModificationTime());
            }
            if (min > stat.getModificationTime()) {
                min = stat.getModificationTime();
                pathMin = stat.getPath();
            }

        }
        for (Path pathDel : pathToRemove.keySet()) {
            fs.delete(pathDel, false);
        }
    }
    // fs.close();
}

From source file:com.redsqirl.workflow.server.Workflow.java

License:Open Source License

/**
 * Backup the workflow/*from  w ww.  j a  va  2s .c o m*/
 * 
 * @throws RemoteException
 */
public String backup() throws RemoteException {
    String path = getBackupName(createBackupDir());
    boolean save_swp = isSaved();
    logger.debug("back up path " + path);
    String error = save(path);
    saved = save_swp;
    try {
        if (error != null) {
            logger.warn("Fail to back up: " + error);
            FileSystem fs = NameNodeVar.getFS();
            fs.delete(new Path(path), false);
        }
        logger.debug("Clean up back up");
        cleanUpBackup();
    } catch (Exception e) {
        logger.warn(e.getMessage());
        logger.warn("Failed cleaning up backup directory");
    }

    return path;
}

From source file:com.revolutionanalytics.hadoop.hdfs.FileUtils.java

License:Apache License

private static void delete(Configuration cfg, FileSystem srcFS, Path src, boolean recursive)
        throws IOException {
    Trash trashTmp = new Trash(srcFS, cfg);
    if (trashTmp.moveToTrash(src)) {
        System.out.println("Moved to trash: " + src);
        return;/*  www  .  j a  v a2s.co m*/
    }
    if (srcFS.delete(src, true)) {
        System.out.println("Deleted " + src);
    } else {
        if (!srcFS.exists(src)) {
            throw new FileNotFoundException("cannot remove " + src + ": No such file or directory.");
        }
        throw new IOException("Delete failed " + src);
    }
}

From source file:com.ricemap.spateDB.operations.FileMBR.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing//from w ww.j a va2  s . c o  m
 * @param conf
 * @param fs
 * @param file
 * @return
 * @throws IOException 
 */
public static <S extends Shape> Prism fileMBRMapReduce(FileSystem fs, Path file, S stockShape,
        boolean background) throws IOException {
    // Quickly get file MBR if it is globally indexed
    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(fs, file);
    if (globalIndex != null) {
        // Return the MBR of the global index.
        // Compute file size by adding up sizes of all files assuming they are
        // not compressed
        long totalLength = 0;
        for (Partition p : globalIndex) {
            Path filePath = new Path(file, p.filename);
            if (fs.exists(filePath))
                totalLength += fs.getFileStatus(filePath).getLen();
        }
        sizeOfLastProcessedFile = totalLength;
        return globalIndex.getMBR();
    }
    JobConf job = new JobConf(FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(file.toUri().getPath() + ".mbr_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("FileMBR");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Prism.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Reduce.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeInputFormat.class);
    SpatialSite.setShapeClass(job, stockShape.getClass());
    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, file);
    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputCommitter(MBROutputCommitter.class);

    // Submit the job
    if (background) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
        return null;
    } else {
        lastSubmittedJob = JobClient.runJob(job);
        Counters counters = lastSubmittedJob.getCounters();
        Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
        FileMBR.sizeOfLastProcessedFile = inputBytesCounter.getValue();

        // Read job result
        FileStatus[] results = outFs.listStatus(outputPath);
        Prism mbr = new Prism();
        for (FileStatus fileStatus : results) {
            if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
                LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
                Text text = new Text();
                if (lineReader.readLine(text) > 0) {
                    mbr.fromText(text);
                }
                lineReader.close();
            }
        }

        outFs.delete(outputPath, true);

        return mbr;
    }
}

From source file:com.ricemap.spateDB.operations.LineRandomizer.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing//from  ww  w . j  a v a 2  s . com
 * @param conf
 * @param infs
 * @param infile
 * @return
 * @throws IOException 
 */
public static void randomizerMapReduce(Path infile, Path outfile, boolean overwrite) throws IOException {
    JobConf job = new JobConf(LineRandomizer.class);

    FileSystem outfs = outfile.getFileSystem(job);

    if (overwrite)
        outfs.delete(outfile, true);

    job.setJobName("Randomizer");
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    FileSystem infs = infile.getFileSystem(job);
    int numOfPartitions = (int) Math
            .ceil((double) infs.getFileStatus(infile).getLen() / infs.getDefaultBlockSize(outfile));
    job.setInt(NumOfPartitions, numOfPartitions);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, infile);

    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outfile);

    // Submit the job
    JobClient.runJob(job);
}

From source file:com.ricemap.spateDB.operations.Plot.java

License:Apache License

/**
 * @param args//from  w  ww  .  j  a v  a  2s  . co  m
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    System.setProperty("java.awt.headless", "true");
    CommandLineArguments cla = new CommandLineArguments(args);
    JobConf conf = new JobConf(Plot.class);
    Path[] files = cla.getPaths();
    if (files.length < 2) {
        printUsage();
        throw new RuntimeException("Illegal arguments. File names missing");
    }

    Path inFile = files[0];
    FileSystem inFs = inFile.getFileSystem(conf);
    if (!inFs.exists(inFile)) {
        printUsage();
        throw new RuntimeException("Input file does not exist");
    }

    boolean overwrite = cla.isOverwrite();
    Path outFile = files[1];
    FileSystem outFs = outFile.getFileSystem(conf);
    if (outFs.exists(outFile)) {
        if (overwrite)
            outFs.delete(outFile, true);
        else
            throw new RuntimeException("Output file exists and overwrite flag is not set");
    }

    boolean showBorders = cla.is("borders");
    boolean showBlockCount = cla.is("showblockcount");
    boolean showRecordCount = cla.is("showrecordcount");
    Shape shape = cla.getShape(true);

    int width = cla.getWidth(1000);
    int height = cla.getHeight(1000);

    Color color = cla.getColor();

    plot(inFile, outFile, shape, width, height, color, showBorders, showBlockCount, showRecordCount);

    System.out.println("Values range: [" + min_value + "," + max_value + "]");
}

From source file:com.ricemap.spateDB.operations.RangeQuery.java

License:Apache License

/**
 * Performs a range query using MapReduce
 * /*from w ww.  j  ava  2  s .c  o  m*/
 * @param fs
 * @param inputFile
 * @param queryRange
 * @param shape
 * @param output
 * @return
 * @throws IOException
 */
public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape,
        Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    FileSystem outFs = inputFile.getFileSystem(job);
    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(
                    inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000));
        } while (outFs.exists(outputPath));
    } else {
        if (outFs.exists(outputPath)) {
            if (overwrite) {
                outFs.delete(outputPath, true);
            } else {
                throw new RuntimeException("Output path already exists and -overwrite flag is not set");
            }
        }
    }

    job.setJobName("RangeQuery");
    job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    RangeFilter.setQueryRange(job, queryShape); // Set query range for
    // filter

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(3);

    // Decide which map function to use depending on how blocks are indexed
    // And also which input format to use
    if (SpatialSite.isRTree(fs, inputFile)) {
        // RTree indexed file
        LOG.info("Searching an RTree indexed file");
        job.setInputFormat(RTreeInputFormat.class);
    } else {
        // A file with no local index
        LOG.info("Searching a non local-indexed file");
        job.setInputFormat(ShapeInputFormat.class);
    }

    GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile);
    // if (gIndex != null && gIndex.isReplicated()){
    // job.setMapperClass(RangeQueryMap.class);

    Class<?> OutputKey = NullWritable.class;
    try {
        Class<?> c = shape.getClass();
        Field f = c.getDeclaredField(query.field);
        f.setAccessible(true);
        if (f.getType().equals(Integer.TYPE)) {
            OutputKey = IntWritable.class;
        } else if (f.getType().equals(Double.TYPE)) {
            OutputKey = DoubleWritable.class;
        } else if (f.getType().equals(Long.TYPE)) {
            OutputKey = LongWritable.class;
        }
    } catch (SecurityException e) {
        e.printStackTrace();
    } catch (NoSuchFieldException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    job.setMapOutputKeyClass(OutputKey);
    switch (query.type) {
    case Distinct:
        job.setMapperClass(DistinctQueryMap.class);
        job.setReducerClass(DistinctQueryReduce.class);
        job.setMapOutputValueClass(NullWritable.class);
        break;
    case Distribution:
        job.setMapperClass(DistributionQueryMap.class);
        job.setReducerClass(DistributionQueryReduce.class);
        job.setMapOutputValueClass(IntWritable.class);
        break;
    default:
        break;
    }
    // }
    // else
    // job.setMapperClass(RangeQueryMapNoDupAvoidance.class);

    // Set query range for the map function
    job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName());
    job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString());
    job.set(QUERY_FIELD, query.field);

    // Set shape class for the SpatialInputFormat
    SpatialSite.setShapeClass(job, shape.getClass());

    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, inputFile);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    if (!background) {
        RunningJob runningJob = JobClient.runJob(job);
        Counters counters = runningJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        final long resultCount = outputRecordCounter.getValue();

        // If outputPath not set by user, automatically delete it
        if (userOutputPath == null)
            outFs.delete(outputPath, true);

        return resultCount;
    } else {
        JobClient jc = new JobClient(job);
        lastRunningJob = jc.submitJob(job);
        return -1;
    }
}

From source file:com.ricemap.spateDB.operations.RecordCount.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing/*from  w  w w.  j  av a2 s.  c o  m*/
 * @param conf
 * @param fs
 * @param file
 * @return
 * @throws IOException 
 */
public static long recordCountMapReduce(FileSystem fs, Path file) throws IOException {
    JobConf job = new JobConf(RecordCount.class);

    Path outputPath = new Path(file.toUri().getPath() + ".linecount");
    FileSystem outFs = outputPath.getFileSystem(job);
    outFs.delete(outputPath, true);

    job.setJobName("LineCount");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Reduce.class);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(1);

    job.setInputFormat(ShapeLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    ShapeLineInputFormat.setInputPaths(job, file);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    JobClient.runJob(job);

    // Read job result
    long lineCount = 0;
    FileStatus[] results = outFs.listStatus(outputPath);
    for (FileStatus fileStatus : results) {
        if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
            LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
            Text text = new Text();
            if (lineReader.readLine(text) > 0) {
                lineCount = Long.parseLong(text.toString());
            }
            lineReader.close();
        }
    }

    outFs.delete(outputPath, true);

    return lineCount;
}

From source file:com.ricemap.spateDB.operations.Repartition.java

License:Apache License

/**
 * Repartitions an input file according to the given list of cells.
 * @param inFile/*from www . j a  va2 s.c  o  m*/
 * @param outPath
 * @param cellInfos
 * @param pack
 * @param rtree
 * @param overwrite
 * @throws IOException
 */
public static void repartitionMapReduce(Path inFile, Path outPath, Shape stockShape, long blockSize,
        CellInfo[] cellInfos, String sindex, boolean overwrite, boolean columnar) throws IOException {
    JobConf job = new JobConf(Repartition.class);
    job.setJobName("Repartition");
    FileSystem outFs = outPath.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(outPath)) {
        if (overwrite)
            outFs.delete(outPath, true);
        else
            throw new RuntimeException(
                    "Output file '" + outPath + "' already exists and overwrite flag is not set");
    }

    // Decide which map function to use depending on the type of global index
    if (sindex.equals("rtree")) {
        // Repartition without replication
        job.setMapperClass(RepartitionMapNoReplication.class);
    } else {
        // Repartition with replication (grid and r+tree)
        job.setMapperClass(RepartitionMap.class);
    }
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(stockShape.getClass());
    ShapeInputFormat.setInputPaths(job, inFile);
    job.setInputFormat(ShapeInputFormat.class);
    boolean pack = sindex.equals("r+tree");
    boolean expand = sindex.equals("rtree");
    job.setBoolean(SpatialSite.PACK_CELLS, pack);
    job.setBoolean(SpatialSite.EXPAND_CELLS, expand);
    job.setStrings(SpatialSite.STORAGE_MODE, columnar ? "columnar" : "normal");

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    // Set default parameters for reading input file
    SpatialSite.setShapeClass(job, stockShape.getClass());

    FileOutputFormat.setOutputPath(job, outPath);
    if (sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        // For now, the two types of local index are the same
        job.setOutputFormat(RTreeGridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }
    // Copy block size from source file if it's globally indexed
    FileSystem inFs = inFile.getFileSystem(job);

    if (blockSize == 0) {
        GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inFile);
        if (globalIndex != null) {
            blockSize = inFs.getFileStatus(new Path(inFile, globalIndex.iterator().next().filename))
                    .getBlockSize();
            LOG.info("Automatically setting block size to " + blockSize);
        }
    }

    if (blockSize != 0)
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blockSize);
    SpatialSite.setCells(job, cellInfos);
    job.setBoolean(SpatialSite.OVERWRITE, overwrite);

    // Set reduce function
    job.setReducerClass(RepartitionReduce.class);
    job.setNumReduceTasks(
            Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));

    // Set output committer that combines output files together
    job.setOutputCommitter(RepartitionOutputCommitter.class);

    JobClient.runJob(job);
}