List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java
License:Apache License
@Test public void testReplace() throws Exception { FileSystem mockS3 = getMockS3(); getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace"); S3PartitionedOutputCommitter committer = newJobCommitter(); committer.commitJob(getJob());/*from www . j a v a2 s . c o m*/ verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14")); verifyNoMoreInteractions(mockS3); // parent and peer directories exist reset(mockS3); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115"))).thenReturn(true); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=12"))).thenReturn(true); committer.commitJob(getJob()); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14")); verifyNoMoreInteractions(mockS3); // partition directories exist and should be removed reset(mockS3); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=12"))).thenReturn(true); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"))).thenReturn(true); when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ )) .thenReturn(true); committer.commitJob(getJob()); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13")); verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ ); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14")); verifyNoMoreInteractions(mockS3); // partition directories exist and should be removed reset(mockS3); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"))).thenReturn(true); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"))).thenReturn(true); when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ )) .thenReturn(true); when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ )) .thenReturn(true); committer.commitJob(getJob()); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13")); verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ ); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14")); verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ ); verifyNoMoreInteractions(mockS3); }
From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java
License:Apache License
@Test public void testReplaceWithExistsFailure() throws Exception { FileSystem mockS3 = getMockS3(); getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace"); final S3PartitionedOutputCommitter committer = newJobCommitter(); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"))).thenReturn(true); when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ )) .thenReturn(true);//from www . ja va 2 s . c om when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14"))) .thenThrow(new IOException("Fake IOException for exists")); TestUtil.assertThrows("Should throw the fake IOException", IOException.class, new Callable<Void>() { @Override public Void call() throws IOException { committer.commitJob(getJob()); return null; } }); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13")); verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161115/hour=13"), true /* recursive */ ); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")); Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted); verifyNoMoreInteractions(mockS3); }
From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java
License:Apache License
@Test public void testReplaceWithDeleteFailure() throws Exception { FileSystem mockS3 = getMockS3(); getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace"); final S3PartitionedOutputCommitter committer = newJobCommitter(); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"))).thenReturn(true); when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ )) .thenThrow(new IOException("Fake IOException for delete")); TestUtil.assertThrows("Should throw the fake IOException", IOException.class, new Callable<Void>() { @Override/*www. j ava 2s .c om*/ public Void call() throws IOException { committer.commitJob(getJob()); return null; } }); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=14")); verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=14"), true /* recursive */ ); Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted); verifyNoMoreInteractions(mockS3); }
From source file:com.netflix.bdp.s3.TestS3PartitionedJobCommit.java
License:Apache License
@Test public void testReplaceWithDeleteFalse() throws Exception { FileSystem mockS3 = getMockS3(); getJob().getConfiguration().set(S3Committer.CONFLICT_MODE, "replace"); final S3PartitionedOutputCommitter committer = newJobCommitter(); when(mockS3.exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"))).thenReturn(true); when(mockS3.delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ )) .thenReturn(false);// w ww . ja va2s .com TestUtil.assertThrows("Should throw an IOException", IOException.class, new Callable<Void>() { @Override public Void call() throws IOException { committer.commitJob(getJob()); return null; } }); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=13")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161115/hour=14")); verify(mockS3).exists(new Path(OUTPUT_PATH, "dateint=20161116/hour=13")); verify(mockS3).delete(new Path(OUTPUT_PATH, "dateint=20161116/hour=13"), true /* recursive */ ); Assert.assertTrue("Should have aborted", ((TestPartitionedOutputCommitter) committer).aborted); verifyNoMoreInteractions(mockS3); }
From source file:com.netflix.bdp.s3.TestUtil.java
License:Apache License
public static void createTestOutputFiles(List<String> relativeFiles, Path attemptPath, Configuration conf) throws Exception { // create files in the attempt path that should be found by getTaskOutput FileSystem attemptFS = attemptPath.getFileSystem(conf); attemptFS.delete(attemptPath, true); for (String relative : relativeFiles) { // 0-length files are ignored, so write at least one byte OutputStream out = attemptFS.create(new Path(attemptPath, relative)); out.write(34);/*from w w w .ja v a 2s . c o m*/ out.close(); } }
From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java
License:Apache License
public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception { if (hbaseIndexingOpts.isDryRun) { return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run(); }/*from w ww . j ava 2 s . c o m*/ long programStartTime = System.currentTimeMillis(); Configuration conf = getConf(); IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification(); conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory()); conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY, new String(indexingSpec.getConfiguration(), Charsets.UTF_8)); conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName()); conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName()); HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams()); IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory( indexingSpec.getIndexerComponentFactory(), new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams()); IndexerConf indexerConf = factory.createIndexerConf(); Map<String, String> params = indexerConf.getGlobalParams(); String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM); if (hbaseIndexingOpts.morphlineFile != null) { morphlineFile = hbaseIndexingOpts.morphlineFile.getPath(); } if (morphlineFile != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName()); ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf); } String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM); if (hbaseIndexingOpts.morphlineId != null) { morphlineId = hbaseIndexingOpts.morphlineId; } if (morphlineId != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId); } conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite()); if (hbaseIndexingOpts.fairSchedulerPool != null) { conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool); } // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (hbaseIndexingOpts.log4jConfigFile != null) { Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf()); ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf); } Job job = Job.getInstance(getConf()); job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName()); job.setJarByClass(HBaseIndexerMapper.class); // job.setUserClassesTakesPrecedence(true); TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class, SolrInputDocumentWritable.class, job); // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths) HBaseConfiguration.merge(job.getConfiguration(), getConf()); int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1 //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only LOG.info("Cluster reports {} mapper slots", mappers); LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}", new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout, hbaseIndexingOpts.maxSegments }); if (hbaseIndexingOpts.isDirectWrite()) { CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost); solrServer.setDefaultCollection(hbaseIndexingOpts.collection); if (hbaseIndexingOpts.clearIndex) { clearSolr(indexingSpec.getIndexConnectionParams()); } // Run a mapper-only MR job that sends index documents directly to a live Solr instance. job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); job.submit(); callback.jobStarted(job.getJobID().toString(), job.getTrackingURL()); if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) { return -1; // job failed } commitSolr(indexingSpec.getIndexConnectionParams()); ForkedMapReduceIndexerTool.goodbye(job, programStartTime); return 0; } else { FileSystem fileSystem = FileSystem.get(getConf()); if (fileSystem.exists(hbaseIndexingOpts.outputDir)) { if (hbaseIndexingOpts.overwriteOutputDir) { LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir); if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) { LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir); return -1; } } else { LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to " + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir); return -1; } } int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(), hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters -1, // num mappers, only of importance for file-based indexing hbaseIndexingOpts.reducers); if (hbaseIndexingOpts.isGeneratedOutputDir()) { LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir); fileSystem.delete(hbaseIndexingOpts.outputDir, true); } return exitCode; } }
From source file:com.ning.metrics.collector.events.hadoop.writer.HadoopOutputChunk.java
License:Apache License
private void deleteIfExists(final Path path, final FileSystem fileSystem) throws IOException { if (fileSystem.exists(path) && !fileSystem.delete(path, false)) { throw new IOException(String.format("unable to delete %s", path)); }//from w ww. j ava2s . c o m }
From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java
License:Apache License
static void deleteOutputFile(String path, String inputDir) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(new URI(inputDir), conf); if (fs.exists(new Path(path))) { fs.delete(new Path(path), true); }//from ww w .j a va2 s . c o m }
From source file:com.pagerankcalculator.TwitterPageRank.java
/** * Graph Parsing//from w w w. j a va2 s.c o m * Memasukan data mentah dan melakukan inisialisasi pagerank * * @param in file data masukan * @param out direktori output */ public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(GraphParsingMapper.class); job.setReducerClass(GraphParsingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pagerankcalculator.TwitterPageRank.java
public int calculatePagerank(String in, String out, int iteration) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankCalculationMapper.class); job.setReducerClass(PageRankCalculationReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); }//from www . ja v a2 s . com return job.waitForCompletion(true) ? 0 : 1; }