Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplace() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    S3PartitionedOutputCommitter committer = newJobCommitter();

    committer.commitJob(getJob());/*from   www  .  j a  v a2 s  . c  o  m*/
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verifyNoMoreInteractions(mockS3);

    // parent and peer directories exist
    reset(mockS3);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115"))).thenReturn(true);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=12"))).thenReturn(true);

    committer.commitJob(getJob());
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verifyNoMoreInteractions(mockS3);

    // partition directories exist and should be removed
    reset(mockS3);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=12"))).thenReturn(true);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ ))
            .thenReturn(true);

    committer.commitJob(getJob());
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ );
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verifyNoMoreInteractions(mockS3);

    // partition directories exist and should be removed
    reset(mockS3);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"))).thenReturn(true);
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ ))
            .thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ ))
            .thenReturn(true);

    committer.commitJob(getJob());
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ );
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ );
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplaceWithExistsFailure() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    final S3PartitionedOutputCommitter committer = newJobCommitter();

    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ ))
            .thenReturn(true);//from www  . ja va  2  s .  c  om
    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")))
            .thenThrow(new IOException("Fake IOException for exists"));

    TestUtil.assertThrows("Should throw the fake IOException", IOException.class, new Callable<Void>() {
        @Override
        public Void call() throws IOException {
            committer.commitJob(getJob());
            return null;
        }
    });

    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ );
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted);
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplaceWithDeleteFailure() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    final S3PartitionedOutputCommitter committer = newJobCommitter();

    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ ))
            .thenThrow(new IOException("Fake IOException for delete"));

    TestUtil.assertThrows("Should throw the fake IOException", IOException.class, new Callable<Void>() {
        @Override/*www. j ava 2s .c  om*/
        public Void call() throws IOException {
            committer.commitJob(getJob());
            return null;
        }
    });

    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ );
    Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted);
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java

License:Apache License

@Test
public void testReplaceWithDeleteFalse() throws Exception {
    FileSystem mockS3 = getMockS3();

    getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace");

    final S3PartitionedOutputCommitter committer = newJobCommitter();

    when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"))).thenReturn(true);
    when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ ))
            .thenReturn(false);//  w  ww  . ja va2s .com

    TestUtil.assertThrows("Should throw an IOException", IOException.class, new Callable<Void>() {
        @Override
        public Void call() throws IOException {
            committer.commitJob(getJob());
            return null;
        }
    });

    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"));
    verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"));
    verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ );
    Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted);
    verifyNoMoreInteractions(mockS3);
}

From source file:com.netflix.bdp.s3.TestUtil.java

License:Apache License

public static void createTestOutputFiles(List<String> relativeFiles, Path attemptPath, Configuration conf)
        throws Exception {
    // create files in the attempt path that should be found by getTaskOutput
    FileSystem attemptFS = attemptPath.getFileSystem(conf);
    attemptFS.delete(attemptPath, true);
    for (String relative : relativeFiles) {
        // 0-length files are ignored, so write at least one byte
        OutputStream out = attemptFS.create(new Path(attemptPath, relative));
        out.write(34);/*from w w w  .ja v a 2s  . c o m*/
        out.close();
    }
}

From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java

License:Apache License

public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception {

    if (hbaseIndexingOpts.isDryRun) {
        return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run();
    }/*from   w  ww  . j ava  2 s . c o  m*/

    long programStartTime = System.currentTimeMillis();
    Configuration conf = getConf();

    IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification();

    conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory());
    conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY,
            new String(indexingSpec.getConfiguration(), Charsets.UTF_8));
    conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName());
    conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName());
    HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams());

    IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory(
            indexingSpec.getIndexerComponentFactory(),
            new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams());
    IndexerConf indexerConf = factory.createIndexerConf();

    Map<String, String> params = indexerConf.getGlobalParams();
    String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM);
    if (hbaseIndexingOpts.morphlineFile != null) {
        morphlineFile = hbaseIndexingOpts.morphlineFile.getPath();
    }
    if (morphlineFile != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf);
    }

    String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM);
    if (hbaseIndexingOpts.morphlineId != null) {
        morphlineId = hbaseIndexingOpts.morphlineId;
    }
    if (morphlineId != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId);
    }

    conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite());

    if (hbaseIndexingOpts.fairSchedulerPool != null) {
        conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool);
    }

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (hbaseIndexingOpts.log4jConfigFile != null) {
        Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf);
    }

    Job job = Job.getInstance(getConf());
    job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName());
    job.setJarByClass(HBaseIndexerMapper.class);
    //        job.setUserClassesTakesPrecedence(true);

    TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class,
            SolrInputDocumentWritable.class, job);

    // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults
    // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths)
    HBaseConfiguration.merge(job.getConfiguration(), getConf());

    int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1
    //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only
    LOG.info("Cluster reports {} mapper slots", mappers);

    LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}",
            new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout,
                    hbaseIndexingOpts.maxSegments });

    if (hbaseIndexingOpts.isDirectWrite()) {
        CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost);
        solrServer.setDefaultCollection(hbaseIndexingOpts.collection);

        if (hbaseIndexingOpts.clearIndex) {
            clearSolr(indexingSpec.getIndexConnectionParams());
        }

        // Run a mapper-only MR job that sends index documents directly to a live Solr instance.
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        job.submit();
        callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
        if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) {
            return -1; // job failed
        }
        commitSolr(indexingSpec.getIndexConnectionParams());
        ForkedMapReduceIndexerTool.goodbye(job, programStartTime);
        return 0;
    } else {
        FileSystem fileSystem = FileSystem.get(getConf());

        if (fileSystem.exists(hbaseIndexingOpts.outputDir)) {
            if (hbaseIndexingOpts.overwriteOutputDir) {
                LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir);
                if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) {
                    LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir);
                    return -1;
                }
            } else {
                LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to "
                        + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir);
                return -1;
            }
        }

        int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(),
                hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters
                -1, // num mappers, only of importance for file-based indexing
                hbaseIndexingOpts.reducers);

        if (hbaseIndexingOpts.isGeneratedOutputDir()) {
            LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir);
            fileSystem.delete(hbaseIndexingOpts.outputDir, true);
        }
        return exitCode;
    }
}

From source file:com.ning.metrics.collector.events.hadoop.writer.HadoopOutputChunk.java

License:Apache License

private void deleteIfExists(final Path path, final FileSystem fileSystem) throws IOException {
    if (fileSystem.exists(path) && !fileSystem.delete(path, false)) {
        throw new IOException(String.format("unable to delete %s", path));
    }//from  w ww. j  ava2s  . c  o  m
}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

static void deleteOutputFile(String path, String inputDir) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(new URI(inputDir), conf);
    if (fs.exists(new Path(path))) {
        fs.delete(new Path(path), true);
    }//from   ww  w  .j  a  va2  s  . c  o  m
}

From source file:com.pagerankcalculator.TwitterPageRank.java

/**
 * Graph Parsing//from w  w w. j  a va2 s.c o  m
 * Memasukan data mentah dan melakukan inisialisasi pagerank
 * 
 * @param in file data masukan
 * @param out direktori output
 */
public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException {

    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(GraphParsingMapper.class);
    job.setReducerClass(GraphParsingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int calculatePagerank(String in, String out, int iteration)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankCalculationMapper.class);
    job.setReducerClass(PageRankCalculationReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }//from www  . ja v a2  s .  com

    return job.waitForCompletion(true) ? 0 : 1;
}