Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplace() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    S3PartitionedOutputCommitter committer = newJobCommitter();

    committer.commitJob(getJob());/*from   www  .  j a  v a2 s  . c  o  m*/
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verifyNoMoreInteractions(mockS3);

    // parent and peer directories exist
    reset(mockS3);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115"))).thenReturn(true);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=12"))).thenReturn(true);

    committer.commitJob(getJob());
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verifyNoMoreInteractions(mockS3);

    // partition directories exist and should be removed
    reset(mockS3);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=12"))).thenReturn(true);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ ))
            .thenReturn(true);

    committer.commitJob(getJob());
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ );
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verifyNoMoreInteractions(mockS3);

    // partition directories exist and should be removed
    reset(mockS3);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"))).thenReturn(true);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ ))
            .thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ ))
            .thenReturn(true);

    committer.commitJob(getJob());
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ );
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ );
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplaceWithExistsFailure() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    final S3PartitionedOutputCommitter committer = newJobCommitter();

    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ ))
            .thenReturn(true);//from www  . ja va  2  s .  c  om
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")))
            .thenThrow(new IOException("Fake IOException for exists"));

    TestUtil.assertThrows("Should throw the fake IOException", IOException.class, new Callable<Void>() {
        @Override
        public Void call() throws IOException {
            committer.commitJob(getJob());
            return null;
        }
    });

    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ );
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted);
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplaceWithDeleteFailure() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    final S3PartitionedOutputCommitter committer = newJobCommitter();

    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ ))
            .thenThrow(new IOException("Fake IOException for delete"));

    TestUtil.assertThrows("Should throw the fake IOException", IOException.class, new Callable<Void>() {
        @Override/*www. j ava 2s .c  om*/
        public Void call() throws IOException {
            committer.commitJob(getJob());
            return null;
        }
    });

    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ );
    Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted);
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplaceWithDeleteFalse() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    final S3PartitionedOutputCommitter committer = newJobCommitter();

    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ ))
            .thenReturn(false);//  w  ww  . ja va2s .com

    TestUtil.assertThrows("Should throw an IOException", IOException.class, new Callable<Void>() {
        @Override
        public Void call() throws IOException {
            committer.commitJob(getJob());
            return null;
        }
    });

    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ );
    Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted);
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestUtil.java

License:Apache License

public static void createTestOutputFiles(List<String> relativeFiles, Path attemptPath, Configuration conf)
        throws Exception {
    // create files in the attempt path that should be found by getTaskOutput
    FileSystem attemptFS = attemptPath.getFileSystem(conf);
    attemptFS.delete(attemptPath, true);
    for (String relative : relativeFiles) {
        // 0-length files are ignored, so write at least one byte
        OutputStream out = attemptFS.create(new Path(attemptPath, relative));
        out.write(34);/*from w w w  .ja v a 2s  . c o m*/
        out.close();
    }
}

From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java

License:Apache License

public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception {

    if (hbaseIndexingOpts.isDryRun) {
        return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run();
    }/*from   w  ww  . j ava  2 s . c o  m*/

    long programStartTime = System.currentTimeMillis();
    Configuration conf = getConf();

    IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification();

    conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory());
    conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY,
            new String(indexingSpec.getConfiguration(), Charsets.UTF_8));
    conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName());
    conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName());
    HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams());

    IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory(
            indexingSpec.getIndexerComponentFactory(),
            new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams());
    IndexerConf indexerConf = factory.createIndexerConf();

    Map<String, String> params = indexerConf.getGlobalParams();
    String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM);
    if (hbaseIndexingOpts.morphlineFile != null) {
        morphlineFile = hbaseIndexingOpts.morphlineFile.getPath();
    }
    if (morphlineFile != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf);
    }

    String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM);
    if (hbaseIndexingOpts.morphlineId != null) {
        morphlineId = hbaseIndexingOpts.morphlineId;
    }
    if (morphlineId != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId);
    }

    conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite());

    if (hbaseIndexingOpts.fairSchedulerPool != null) {
        conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool);
    }

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (hbaseIndexingOpts.log4jConfigFile != null) {
        Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf);
    }

    Job job = Job.getInstance(getConf());
    job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName());
    job.setJarByClass(HBaseIndexerMapper.class);
    //        job.setUserClassesTakesPrecedence(true);

    TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class,
            SolrInputDocumentWritable.class, job);

    // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults
    // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths)
    HBaseConfiguration.merge(job.getConfiguration(), getConf());

    int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1
    //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only
    LOG.info("Cluster reports {} mapper slots", mappers);

    LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}",
            new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout,
                    hbaseIndexingOpts.maxSegments });

    if (hbaseIndexingOpts.isDirectWrite()) {
        CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost);
        solrServer.setDefaultCollection(hbaseIndexingOpts.collection);

        if (hbaseIndexingOpts.clearIndex) {
            clearSolr(indexingSpec.getIndexConnectionParams());
        }

        // Run a mapper-only MR job that sends index documents directly to a live Solr instance.
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        job.submit();
        callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
        if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) {
            return -1; // job failed
        }
        commitSolr(indexingSpec.getIndexConnectionParams());
        ForkedMapReduceIndexerTool.goodbye(job, programStartTime);
        return 0;
    } else {
        FileSystem fileSystem = FileSystem.get(getConf());

        if (fileSystem.exists(hbaseIndexingOpts.outputDir)) {
            if (hbaseIndexingOpts.overwriteOutputDir) {
                LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir);
                if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) {
                    LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir);
                    return -1;
                }
            } else {
                LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to "
                        + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir);
                return -1;
            }
        }

        int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(),
                hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters
                -1, // num mappers, only of importance for file-based indexing
                hbaseIndexingOpts.reducers);

        if (hbaseIndexingOpts.isGeneratedOutputDir()) {
            LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir);
            fileSystem.delete(hbaseIndexingOpts.outputDir, true);
        }
        return exitCode;
    }
}

From source file:com.ning.metrics.collector.events.hadoop.writer.HadoopOutputChunk.java

License:Apache License

private void deleteIfExists(final Path path, final FileSystem fileSystem) throws IOException {
    if (fileSystem.exists(path) && !fileSystem.delete(path, false)) {
        throw new IOException(String.format("unable to delete %s", path));
    }//from  w ww. j  ava2s  . c  o  m
}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

static void deleteOutputFile(String path, String inputDir) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(new URI(inputDir), conf);
    if (fs.exists(new Path(path))) {
        fs.delete(new Path(path), true);
    }//from   ww  w  .j  a  va2  s  . c  o  m
}

From source file:com.pagerankcalculator.TwitterPageRank.java

/**
 * Graph Parsing//from w  w w. j  a va2 s.c o  m
 * Memasukan data mentah dan melakukan inisialisasi pagerank
 * 
 * @param in file data masukan
 * @param out direktori output
 */
public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException {

    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(GraphParsingMapper.class);
    job.setReducerClass(GraphParsingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int calculatePagerank(String in, String out, int iteration)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankCalculationMapper.class);
    job.setReducerClass(PageRankCalculationReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }//from www  . ja v a2  s .  com

    return job.waitForCompletion(true) ? 0 : 1;
}