Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.datatorrent.stram.util.FSUtil.java

License:Apache License

/**
 * Copied from FileUtil to transfer ownership
 *
 * @param srcFS/*  ww  w .  j av  a 2  s  .c o m*/
 * @param srcStatus
 * @param dstFS
 * @param dst
 * @param deleteSource
 * @param overwrite
 * @param conf
 * @return
 * @throws IOException
 */
public static boolean copy(FileSystem srcFS, FileStatus srcStatus, FileSystem dstFS, Path dst,
        boolean deleteSource, boolean overwrite, Configuration conf) throws IOException {
    Path src = srcStatus.getPath();
    //dst = checkDest(src.getName(), dstFS, dst, overwrite);
    if (srcStatus.isDirectory()) {
        //checkDependencies(srcFS, src, dstFS, dst);
        if (!mkdirs(dstFS, dst)) {
            return false;
        }

        FileStatus contents[] = srcFS.listStatus(src);
        for (int i = 0; i < contents.length; i++) {
            copy(srcFS, contents[i], dstFS, new Path(dst, contents[i].getPath().getName()), deleteSource,
                    overwrite, conf);
        }
    } else {
        InputStream in = null;
        OutputStream out = null;
        try {
            in = srcFS.open(src);
            out = dstFS.create(dst, overwrite);
            org.apache.hadoop.io.IOUtils.copyBytes(in, out, conf, true);
        } catch (IOException e) {
            org.apache.hadoop.io.IOUtils.closeStream(out);
            org.apache.hadoop.io.IOUtils.closeStream(in);
            throw e;
        }
    }

    // TODO: change group and limit write to group
    if (srcStatus.isDirectory()) {
        dstFS.setPermission(dst, new FsPermission((short) 0777));
    } else {
        dstFS.setPermission(dst, new FsPermission((short) 0777)/*"ugo+w"*/);
    }
    //dstFS.setOwner(dst, null, srcStatus.getGroup());

    /*
        try {
          // transfer owner
          // DOES NOT WORK only super user can change file owner
          dstFS.setOwner(dst, srcStatus.getOwner(), srcStatus.getGroup());
        } catch (IOException e) {
          LOG.warn("Failed to change owner on {} to {}", dst, srcStatus.getOwner(), e);
          throw e;
        }
    */
    if (deleteSource) {
        return srcFS.delete(src, true);
    } else {
        return true;
    }

}

From source file:com.davidgildeh.hadoop.utils.FileUtils.java

License:Apache License

/**
 * Delete a file on HDFS/* w w w.j a  v a 2  s  . com*/
 * 
 * @param path          The path to the file on HDFS
 * @throws IOException 
 */
public static void deleteFile(String path) throws IOException {

    Path fsPath = new Path(path);
    FileSystem fileSystem = getFileSystem(fsPath);
    checkFileExists(fileSystem, fsPath);

    // Delete file
    fileSystem.delete(fsPath, true);
    fileSystem.close();
}

From source file:com.digitalpebble.behemoth.languageidentification.LanguageIdDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    Options options = new Options();
    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    // create the parser
    CommandLineParser parser = new GnuParser();

    options.addOption("h", "help", false, "print this message");
    options.addOption("i", "input", true, "input file or directory");
    options.addOption("o", "output", true, "output Behemoth corpus");
    options.addOption("w", "overwrite", false, "overwrite the output");

    Path inputPath = null;// w w w.j av a2 s  . c  om
    Path outputPath = null;

    boolean overWrite = false;

    // parse the command line arguments
    CommandLine cmdLine = null;
    try {
        cmdLine = parser.parse(options, args);
        String input = cmdLine.getOptionValue("i");
        String output = cmdLine.getOptionValue("o");
        if (cmdLine.hasOption("help")) {
            formatter.printHelp("LanguageIdDriver", options);
            return 0;
        }
        if (input == null | output == null) {
            formatter.printHelp("LanguageIdDriver", options);
            return -1;
        }
        inputPath = new Path(input);
        outputPath = new Path(output);
        if (cmdLine.hasOption("overwrite")) {
            overWrite = true;
        }
    } catch (ParseException e) {
        formatter.printHelp("LanguageIdDriver", options);
    }

    // check whether needs overwriting
    if (FileSystem.get(outputPath.toUri(), getConf()).exists(outputPath)) {
        if (!overWrite) {
            System.out.println("Output path " + outputPath + " already exists. Use option -w to overwrite.");
            return 0;
        } else
            fs.delete(outputPath, true);
    }

    JobConf job = new JobConf(getConf());
    job.setJarByClass(this.getClass());

    job.setJobName("Processing with Language Identifier");

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BehemothDocument.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    job.setMapperClass(LanguageIdMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (log.isInfoEnabled()) {
            log.info("LanguagedIdDriver completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        log.error(e.getMessage(), e);
        fs.delete(outputPath, true);
        return -1;
    } finally {
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.mahout.util.Mahout2LibSVM.java

License:Apache License

public int run(String[] args) throws Exception {

    Options options = new Options();
    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    // create the parser
    CommandLineParser parser = new GnuParser();

    options.addOption("h", "help", false, "print this message");
    options.addOption("v", "vector", true, "input vector sequencefile");
    options.addOption("l", "label", true, "input vector sequencefile");
    options.addOption("o", "output", true, "output Behemoth corpus");

    // parse the command line arguments
    CommandLine line = null;// w w w. j  a  v  a  2  s.  c  o m
    try {
        line = parser.parse(options, args);
        if (line.hasOption("help")) {
            formatter.printHelp("CorpusGenerator", options);
            return 0;
        }
        if (!line.hasOption("v") | !line.hasOption("o") | !line.hasOption("l")) {
            formatter.printHelp("CorpusGenerator", options);
            return -1;
        }
    } catch (ParseException e) {
        formatter.printHelp("CorpusGenerator", options);
    }

    Path vectorPath = new Path(line.getOptionValue("v"));
    Path labelPath = new Path(line.getOptionValue("l"));
    String output = line.getOptionValue("o");

    Path tempOutput = new Path(vectorPath.getParent(), "temp-" + System.currentTimeMillis());

    // extracts the string representations from the vectors
    int retVal = vectorToString(vectorPath, tempOutput);
    if (retVal != 0) {
        HadoopUtil.delete(getConf(), tempOutput);
        return retVal;
    }

    Path tempOutput2 = new Path(vectorPath.getParent(), "temp-" + System.currentTimeMillis());

    retVal = convert(tempOutput, labelPath, tempOutput2);

    // delete the temp output
    HadoopUtil.delete(getConf(), tempOutput);

    if (retVal != 0) {
        HadoopUtil.delete(getConf(), tempOutput2);
        return retVal;
    }

    // convert tempOutput to standard file
    BufferedWriter bow = new BufferedWriter(new FileWriter(new File(output)));

    // the label dictionary is not dumped to text
    int labelMaxIndex = 0;
    Map<String, Integer> labelIndex = new HashMap<String, Integer>();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] fss = fs.listStatus(tempOutput2);
    try {
        for (FileStatus status : fss) {
            Path path = status.getPath();
            // skips the _log or _SUCCESS files
            if (!path.getName().startsWith("part-") && !path.getName().equals(tempOutput2.getName()))
                continue;
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
            // read the key + values in that file
            Text key = new Text();
            Text value = new Text();
            while (reader.next(key, value)) {
                String label = key.toString();
                // replace the label by its index
                Integer indexLabel = labelIndex.get(label);
                if (indexLabel == null) {
                    indexLabel = new Integer(labelMaxIndex);
                    labelIndex.put(label, indexLabel);
                    labelMaxIndex++;
                }
                String val = value.toString();
                bow.append(indexLabel.toString()).append(val).append("\n");
            }
            reader.close();
        }
        bow.flush();
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        bow.close();
        fs.delete(tempOutput2, true);
    }
    return 0;
}

From source file:com.digitalpebble.behemoth.solr.LucidWorksIndexerJob.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    if (args.length != 2) {
        String syntax = "com.digitalpebble.solr.LucidWorksIndexerJob in solrURL";
        System.err.println(syntax);
        return -1;
    }//from w  w  w  .  j a va2  s  .  c om

    Path inputPath = new Path(args[0]);
    String solrURL = args[1];

    JobConf job = new JobConf(getConf());

    job.setJarByClass(this.getClass());

    job.setJobName("Indexing " + inputPath + " into LucidWorks");

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(LucidWorksOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    job.setMapperClass(IdentityMapper.class);
    // no reducer : send straight to SOLR at end of mapping
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt());
    FileOutputFormat.setOutputPath(job, tmp);

    job.set("solr.server.url", solrURL);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("LucidWorksIndexerJob completed. Time " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error(e);
    } finally {
        fs.delete(tmp, true);
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.solr.SOLRIndexerJob.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    if (args.length != 2) {
        String syntax = "com.digitalpebble.solr.SOLRIndexerJob in solrURL";
        System.err.println(syntax);
        return -1;
    }//from   ww w . j  a v a  2s.c o  m

    Path inputPath = new Path(args[0]);
    String solrURL = args[1];

    JobConf job = new JobConf(getConf());

    job.setJarByClass(this.getClass());

    job.setJobName("Indexing " + inputPath + " into SOLR");

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SOLROutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    job.setMapperClass(IdentityMapper.class);
    // no reducer : send straight to SOLR at end of mapping
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt());
    FileOutputFormat.setOutputPath(job, tmp);

    job.set("solr.server.url", solrURL);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("SOLRIndexerJob completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error(e);
    } finally {
        fs.delete(tmp, true);
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.uima.UIMADriver.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    if (args.length != 3) {
        String syntax = "com.digitalpebble.behemoth.uima.UIMADriver in out path_pear_file";
        System.err.println(syntax);
        return -1;
    }//  ww w .j  a v a2  s.  c  om

    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    String pearPath = args[2];

    // check that the GATE application has been stored on HDFS
    Path zap = new Path(pearPath);
    if (fs.exists(zap) == false) {
        System.err.println("The UIMA application " + pearPath + "can't be found on HDFS - aborting");
        return -1;
    }

    JobConf job = new JobConf(getConf());
    job.setJarByClass(this.getClass());
    job.setJobName("Processing with UIMA application : " + pearPath);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BehemothDocument.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    job.setMapperClass(UIMAMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    // push the UIMA pear onto the DistributedCache
    DistributedCache.addCacheFile(new URI(pearPath), job);

    job.set("uima.pear.path", pearPath);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("UIMADriver completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error("Exception", e);
        fs.delete(outputPath, true);
    } finally {
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.util.CorpusFilter.java

License:Apache License

public int run(String[] args) throws Exception {

    Options options = new Options();
    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    // create the parser
    CommandLineParser parser = new GnuParser();

    options.addOption("h", "help", false, "print this message");
    options.addOption("i", "input", true, "input Behemoth corpus");
    options.addOption("o", "output", true, "output Behemoth corpus");

    // parse the command line arguments
    CommandLine line = null;/*from w w  w.j av  a 2  s  .  c o m*/
    try {
        line = parser.parse(options, args);
        String input = line.getOptionValue("i");
        String output = line.getOptionValue("o");
        if (line.hasOption("help")) {
            formatter.printHelp("CorpusFilter", options);
            return 0;
        }
        if (input == null | output == null) {
            formatter.printHelp("CorpusFilter", options);
            return -1;
        }
    } catch (ParseException e) {
        formatter.printHelp("CorpusFilter", options);
    }

    final FileSystem fs = FileSystem.get(getConf());

    Path inputPath = new Path(line.getOptionValue("i"));
    Path outputPath = new Path(line.getOptionValue("o"));

    JobConf job = new JobConf(getConf());
    job.setJarByClass(this.getClass());

    job.setJobName("CorpusFilter : " + inputPath.toString());

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BehemothDocument.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    boolean isFilterRequired = BehemothMapper.isRequired(job);
    // should be the case here
    if (!isFilterRequired) {
        System.err.println("No filters configured. Check your behemoth-site.xml");
        return -1;
    }
    job.setMapperClass(BehemothMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    try {
        JobClient.runJob(job);
    } catch (Exception e) {
        e.printStackTrace();
        fs.delete(outputPath, true);
    } finally {
    }

    return 0;
}

From source file:com.ebay.erl.mobius.core.JobSetup.java

License:Apache License

private static void ensureOutputDelete(Path outputFolder, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    outputFolder = fs.makeQualified(outputFolder);
    if (fs.exists(outputFolder)) {
        LOGGER.info("Deleting " + outputFolder.toString());
        fs.delete(outputFolder, true);
    }//from ww  w  . j  a v a2  s. c o  m
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void persist(Path outputDir, boolean overwrite) throws IOException {
    FileSystem fs = outputDir.getFileSystem(conf);
    if (overwrite) {
        fs.delete(outputDir, true); // CHECK second arg
    }//w  w w .ja v  a2 s  .c om
    DistributedRowMatrixWriter.write(outputDir, conf, topicTermCounts);
}