List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java
/** * @param args/* w ww . j a v a2s . c om*/ * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { int number_of_classes = 1; int number_of_features = 1; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features"); job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class); conf = job.getConfiguration(); // This line is mandatory. job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatArrayWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(MapArrayWritable.class); job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class); job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); number_of_classes = Integer.parseInt(args[2]); number_of_features = Integer.parseInt(args[3]); conf.setInt("number_of_classes", number_of_classes); conf.setInt("number_of_features", number_of_features); try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java
@Override protected void cleanup(Context context) throws IOException { String uri = "/user/hduser/naive_bayes_continuous.txt"; Path path = new Path(uri); try {/*from w ww .ja v a 2 s .c o m*/ FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration()); if (fs.exists(path)) fs.delete(path, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); br.write("class_id, mu(mean), std"); br.write("-------------------------------\n"); for (int i = 0; i < number_of_classes; i++) { br.write("-------- Class-" + i + "-------\n"); for (int j = 0; j < number_of_features; j++) { br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_mu"))) + ", "); br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_std"))) + "\n"); } br.write("\n"); } br.close(); } catch (Exception e) { System.out.println("File /user/hduser/naive_bayes_continuous.txt cannot be found"); } }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job. */ @Test/*from w w w .jav a 2 s .co m*/ public void testMapJob() throws Exception { final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class null, // start key null, // limit key null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe", "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh", "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job with start and limit keys. */ @Test// w w w . j ava 2 s.co m public void testMapJobWithStartAndLimitKeys() throws Exception { final Path outputFile = createOutputFile(); // Set the same entity IDs for start and limit, and we should get just the start row final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com"); final byte[] endRowKey = startEntityId.getHBaseRowKey(); final EntityId rawLimitEntityId = HBaseEntityId .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1)); // Create a test job. final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class startEntityId, rawLimitEntityId, null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job with a row filter. */ @Test/* w w w . ja va2s . c om*/ public void testMapJobWithFilter() throws Exception { final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email", new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com")); final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class null, // start key null, // limit key filter); // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a MapReduce job. */ @Test/*w ww .j a v a 2 s .c o m*/ public void testMapReduceJob() throws Exception { final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key null, // limit key null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder(); for (String line : output) { final String[] keyValue = line.split("\t"); final String emailDomain = keyValue[0]; final Set<String> names = Sets.newHashSet(keyValue[1].split(",")); builder.put(emailDomain, names); } final Map<String, Set<String>> actual = builder.build(); final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder() .put("usermail.example.com", Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu")) .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build(); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
public static void cleanupOnFailureImpl(String location, Job job) throws IOException { Path path = new Path(location); FileSystem fs = path.getFileSystem(job.getConfiguration()); if (fs.exists(path)) { fs.delete(path, true); }/*from ww w. j av a 2 s . c o m*/ }
From source file:com.mozilla.hadoop.Backup.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { return printUsage(); }/* w w w . j a va2s .c o m*/ int rc = -1; Job job = initJob(args); job.waitForCompletion(true); if (job.isSuccessful()) { rc = 0; FileSystem hdfs = null; try { hdfs = FileSystem.get(job.getConfiguration()); hdfs.delete(new Path(NAME + "-inputsource*.txt"), false); } finally { checkAndClose(hdfs); } } return rc; }
From source file:com.mozilla.hadoop.UnknownPathFinder.java
License:Apache License
/** * Deletes all of the paths specified//from w w w . j a v a 2 s .c o m * @param conf * @param paths * @return * @throws IOException */ public static boolean deleteFilesystemPaths(Configuration conf, Collection<String> paths) throws IOException { boolean success = true; FileSystem hdfs = null; try { hdfs = FileSystem.get(conf); for (String s : paths) { Path p = new Path(s); if (!hdfs.delete(p, true)) { LOG.info("Failed to delete: " + s); success = false; break; } else { LOG.info("Successfully deleted: " + s); } } } finally { if (hdfs != null) { hdfs.close(); } } return success; }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); state = new State<T>(); Path baseDirectory = new Path(basePath); hadoopConf = HadoopFileSystem.getHadoopConfiguration(); FileSystem fs = baseDirectory.getFileSystem(hadoopConf); refTruncate = reflectTruncate(fs);/* ww w . ja v a2 s.c o m*/ long currentProcessingTime = ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime(); checkForInactiveBuckets(currentProcessingTime); ((StreamingRuntimeContext) getRuntimeContext()) .registerTimer(currentProcessingTime + inactiveBucketCheckInterval, this); this.clock = new Clock() { @Override public long currentTimeMillis() { return ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime(); } }; // delete pending/in-progress files that might be left if we fail while // no checkpoint has yet been done try { if (fs.exists(baseDirectory) && cleanupOnOpen) { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(baseDirectory, true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } } catch (IOException e) { LOG.error("Error while deleting leftover pending/in-progress files: {}", e); throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e); } }