Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true))
                throw new IOException("Failed to delete earlier output of task: " + taskId);

            if (!fs.rename(taskOutput, finalOutputPath))
                throw new IOException("Failed to save output of task: " + taskId);
        }//from  w ww  .  j  a v a2  s . c o m

        LOG.debug("Moved {} to {}", taskOutput, finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths)
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
        }
    }
}

From source file:cascading.tap.hadoop.ZipInputFormatTest.java

License:Open Source License

public void testSplits() throws Exception {
    JobConf job = new JobConf();
    FileSystem currentFs = FileSystem.get(job);

    Path file = new Path(workDir, "test.zip");

    Reporter reporter = Reporter.NULL;//from  www  .java 2  s.c om

    int seed = new Random().nextInt();
    LOG.info("seed = " + seed);
    Random random = new Random(seed);
    FileInputFormat.setInputPaths(job, file);

    for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream);
        long length = 0;

        LOG.debug("creating; zip file with entries = " + entries);

        // for each entry in the zip file
        for (int entryCounter = 0; entryCounter < entries; entryCounter++) {
            // construct zip entries splitting MAX_LENGTH between entries
            long entryLength = MAX_LENGTH / entries;
            ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt");
            zipEntry.setMethod(ZipEntry.DEFLATED);
            zos.putNextEntry(zipEntry);

            for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) {
                zos.write(Long.toString(length).getBytes());
                zos.write("\n".getBytes());
            }

            zos.flush();
            zos.closeEntry();
        }

        zos.flush();
        zos.close();

        currentFs.delete(file, true);

        OutputStream outputStream = currentFs.create(file);

        byteArrayOutputStream.writeTo(outputStream);
        outputStream.close();

        ZipInputFormat format = new ZipInputFormat();
        format.configure(job);
        LongWritable key = new LongWritable();
        Text value = new Text();
        InputSplit[] splits = format.getSplits(job, 100);

        BitSet bits = new BitSet((int) length);
        for (int j = 0; j < splits.length; j++) {
            LOG.debug("split[" + j + "]= " + splits[j]);
            RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter);

            try {
                int count = 0;

                while (reader.next(key, value)) {
                    int v = Integer.parseInt(value.toString());
                    LOG.debug("read " + v);

                    if (bits.get(v))
                        LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos());

                    assertFalse("key in multiple partitions.", bits.get(v));
                    bits.set(v);
                    count++;
                }

                LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count);
            } finally {
                reader.close();
            }
        }

        assertEquals("some keys in no partition.", length, bits.cardinality());
    }
}

From source file:cascading.tap.Hfs.java

License:Open Source License

@Override
public boolean deletePath(JobConf conf) throws IOException {
    if (LOG.isDebugEnabled())
        LOG.debug("deleting: " + getQualifiedPath(conf));

    // do not delete the root directory
    if (getQualifiedPath(conf).depth() == 0)
        return true;

    FileSystem fileSystem = getFileSystem(conf);

    try {/* w  ww . ja v a  2s.  c  om*/
        return fileSystem.delete(getPath(), true);
    } catch (NullPointerException exception) {
        // hack to get around npe thrown when fs reaches root directory
        if (!(fileSystem instanceof NativeS3FileSystem))
            throw exception;
    }

    return true;
}

From source file:cgl.hadoop.apps.runner.DataAnalysis.java

License:Open Source License

/**
 * Launch the MapReduce computation./*from w  w  w. j a va2s  .co  m*/
 * This method first, remove any previous working directories and create a new one
 * Then the data (file names) is copied to this new directory and launch the 
 * MapReduce (map-only though) computation.
 * @param numMapTasks - Number of map tasks.
 * @param numReduceTasks - Number of reduce tasks =0.
 * @param programDir - The directory where the Cap3 program is.
 * @param execName - Name of the executable.
 * @param dataDir - Directory where the data is located.
 * @param outputDir - Output directory to place the output.
 * @param cmdArgs - These are the command line arguments to the Cap3 program.
 * @throws Exception - Throws any exception occurs in this program.
 */
void launch(int numReduceTasks, String programDir, String execName, String workingDir, String databaseArchive,
        String databaseName, String dataDir, String outputDir, String cmdArgs) throws Exception {

    Configuration conf = new Configuration();
    Job job = new Job(conf, execName);

    // First get the file system handler, delete any previous files, add the
    // files and write the data to it, then pass its name as a parameter to
    // job
    Path hdMainDir = new Path(outputDir);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(hdMainDir, true);

    Path hdOutDir = new Path(hdMainDir, "out");

    // Starting the data analysis.
    Configuration jc = job.getConfiguration();

    jc.set(WORKING_DIR, workingDir);
    jc.set(EXECUTABLE, execName);
    jc.set(PROGRAM_DIR, programDir); // this the name of the executable archive
    jc.set(DB_ARCHIVE, databaseArchive);
    jc.set(DB_NAME, databaseName);
    jc.set(PARAMETERS, cmdArgs);
    jc.set(OUTPUT_DIR, outputDir);

    // using distributed cache
    // flush it
    //DistributedCache.releaseCache(new URI(programDir), jc);
    //DistributedCache.releaseCache(new URI(databaseArchive), jc);
    //DistributedCache.purgeCache(jc);
    // reput the data into cache
    long startTime = System.currentTimeMillis();
    //DistributedCache.addCacheArchive(new URI(databaseArchive), jc);
    DistributedCache.addCacheArchive(new URI(programDir), jc);
    System.out.println(
            "Add Distributed Cache in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    FileInputFormat.setInputPaths(job, dataDir);
    FileOutputFormat.setOutputPath(job, hdOutDir);

    job.setJarByClass(DataAnalysis.class);
    job.setMapperClass(RunnerMap.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(DataFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numReduceTasks);

    startTime = System.currentTimeMillis();

    int exitStatus = job.waitForCompletion(true) ? 0 : 1;
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    //clean the cache

    System.exit(exitStatus);
}

From source file:cmd.freebase2rdf4mr.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }// www  . j a va  2s  . c om

    Configuration configuration = getConf();
    boolean overrideOutput = configuration.getBoolean(OPTION_OVERRIDE_OUTPUT, OPTION_OVERRIDE_OUTPUT_DEFAULT);

    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Tool driver = new Freebase2RDFDriver(configuration);
    driver.run(new String[] { args[0], args[1] });

    return 0;
}

From source file:cmd.infer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*  w  w w  . j  a  v  a  2  s  . com*/

    Configuration configuration = getConf();
    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);

    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Tool infer = new InferDriver(configuration);
    infer.run(new String[] { args[0], args[1] });

    return 0;
}

From source file:cmd.rdf2adjacencylist.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from ww  w . j a va 2  s .  c  o  m

    Configuration configuration = getConf();
    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERWRITE_OUTPUT,
            Constants.OPTION_OVERWRITE_OUTPUT_DEFAULT);

    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Tool tool = new Rdf2AdjacencyListDriver(configuration);
    tool.run(new String[] { args[0], args[1] });

    return 0;
}

From source file:cmd.stats.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from   ww w  .  j a v a2s.c o  m*/

    Configuration configuration = getConf();
    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);

    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Tool stats = new StatsDriver(configuration);
    stats.run(new String[] { args[0], args[1] });

    return 0;
}

From source file:cmd.tdbloader4.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from   ww  w .  j av  a 2  s .  co  m

    Configuration configuration = getConf();
    configuration.set(Constants.RUN_ID, String.valueOf(System.currentTimeMillis()));
    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    boolean copyToLocal = configuration.getBoolean(Constants.OPTION_COPY_TO_LOCAL,
            Constants.OPTION_COPY_TO_LOCAL_DEFAULT);
    boolean verify = configuration.getBoolean(Constants.OPTION_VERIFY, Constants.OPTION_VERIFY_DEFAULT);
    boolean runLocal = configuration.getBoolean(Constants.OPTION_RUN_LOCAL, Constants.OPTION_RUN_LOCAL_DEFAULT);

    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_1), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_2), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_3), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_4), true);
    }

    if ((copyToLocal) || (runLocal)) {
        File path = new File(args[1]);
        path.mkdirs();
    }

    Tool first = new FirstDriver(configuration);
    int status = first.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_1 });
    if (status != 0) {
        return status;
    }

    createOffsetsFile(fs, args[1] + OUTPUT_PATH_POSTFIX_1, args[1] + OUTPUT_PATH_POSTFIX_1);
    Path offsets = new Path(args[1] + OUTPUT_PATH_POSTFIX_1, Constants.OFFSETS_FILENAME);
    DistributedCache.addCacheFile(offsets.toUri(), configuration);

    Tool second = new SecondDriver(configuration);
    status = second.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_2 });
    if (status != 0) {
        return status;
    }

    Tool third = new ThirdDriver(configuration);
    status = third.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_3 });
    if (status != 0) {
        return status;
    }

    Tool fourth = new FourthDriver(configuration);
    status = fourth.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_3, args[1] + OUTPUT_PATH_POSTFIX_4 });
    if (status != 0) {
        return status;
    }

    if (copyToLocal) {
        Tool download = new download(configuration);
        download.run(
                new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_4, args[1] });
    }

    if (verify) {
        DatasetGraphTDB dsgMem = load(args[0]);
        Location location = new Location(args[1]);

        if (!copyToLocal) {
            // TODO: this is a sort of a cheat and it could go away (if it turns out to be too slow)!
            download.fixNodeTable2(location);
        }

        DatasetGraphTDB dsgDisk = SetupTDB.buildDataset(location);
        boolean isomorphic = isomorphic(dsgMem, dsgDisk);
        System.out.println("> " + isomorphic);
    }

    return status;
}

From source file:co.cask.cdap.data.hbase.HBase10CDH550Test.java

License:Apache License

@Override
public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod,
        Configuration conf, byte[]... families) throws IOException {
    if (conf == null) {
        conf = new Configuration();
    }/*from   w  ww.  java 2 s  .  c  o  m*/
    HTableDescriptor htd = new HTableDescriptor(tableName);
    for (byte[] family : families) {
        htd.addFamily(new HColumnDescriptor(family));
    }
    HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false);
    Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        if (!fs.delete(path, true)) {
            throw new IOException("Failed delete of " + path);
        }
    }
    return HRegion.createHRegion(info, path, conf, htd);
}