List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
public Job createSubmittableJob(String[] args) { TABLE_NAME = args[1];// w w w .jav a2 s . c o m Job job = null; try { job = new Job(new Configuration(), NAME); job.setJarByClass(HexastoreBulkImport.class); job.setMapperClass(sampler.TotalOrderPrep.Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combiner.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(TotalOrderPartitioner.class); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000")); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); Path out = new Path("out"); FileOutputFormat.setOutputPath(job, out); Configuration conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats"); HColumnDescriptor family = new HColumnDescriptor("size"); desc.addFamily(family); conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_stats")) { //hadmin.disableTable(TABLE_NAME+"_stats"); //hadmin.deleteTable(TABLE_NAME+"_stats"); } else { hadmin.createTable(desc); } FileInputFormat.setInputPaths(job, new Path(args[0])); //job.getConfiguration().setInt("mapred.map.tasks", 18); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5); job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5); //job.getConfiguration().setInt("io.sort.mb", 100); } catch (IOException e2) { e2.printStackTrace(); } return job; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ConsineMain <input> <output>"); System.exit(2);// w w w . j a v a2s. c o m } Job job1 = new Job(conf, "ConsineMain"); job1.setJarByClass(CosineMain.class); job1.setMapperClass(AggregateReadingsMapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(DoubleWritable.class); job1.setReducerClass(AggregateReadingsReducer.class); job1.setOutputKeyClass(LongWritable.class); job1.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job1, true); FileInputFormat.setInputPaths(job1, new Path(otherArgs[0])); int lastIdx = otherArgs[0].lastIndexOf("/"); String tempOutput = otherArgs[0].substring(0, lastIdx) + "/temp"; FileOutputFormat.setOutputPath(job1, new Path(tempOutput)); System.out.println("\nStarting Job-1 ..."); final long startTime = System.currentTimeMillis(); try { final long startTimeJob1 = System.currentTimeMillis(); if (!job1.waitForCompletion(true)) { System.out.println("Job-1 failed."); } else { System.out.println("Duration of Job1 " + ((System.currentTimeMillis() - startTimeJob1) / 1000.0) + " seconds."); final Job job2 = new Job(conf, "ConsineMain Aggregate"); job2.setJarByClass(CosineMain.class); job2.setInputFormatClass(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, tempOutput); CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, tempOutput); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1])); job2.setMapperClass(CartesianProductMapper.class); job2.setMapOutputKeyClass(DoubleWritable.class); job2.setMapOutputValueClass(Text.class); job2.setSortComparatorClass(DescendingKeyComparator.class); job2.setReducerClass(CartesianProductReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(DoubleWritable.class); job2.setNumReduceTasks(10); final long startTimeJob2 = System.currentTimeMillis(); System.out.println("\nStarting Job-2 ..."); if (!job2.waitForCompletion(true)) { System.out.println("Job-2 failed."); } else { System.out.println("Duration of Job2: " + ((System.currentTimeMillis() - startTimeJob2) / 1000.0) + " seconds."); } } FileSystem fs = FileSystem.get(conf); fs.delete(new Path(tempOutput), true); } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Total Duration: " + duration + " seconds."); } return 0; }
From source file:cascading.flow.hadoop.MapReduceFlowPlatformTest.java
License:Open Source License
private String remove(String path, boolean delete) throws IOException { FileSystem fs = FileSystem.get(URI.create(path), HadoopPlanner.createJobConf(getProperties())); if (delete)//from w w w .j a v a 2 s . c o m fs.delete(new Path(path), true); return path; }
From source file:cascading.flow.MapReduceFlowTest.java
License:Open Source License
private String remove(String path, boolean delete) throws IOException { FileSystem fs = FileSystem.get(URI.create(path), MultiMapReducePlanner.getJobConf(getProperties())); if (delete)//from w ww . j av a 2 s .co m fs.delete(new Path(path), true); return path; }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf/*ww w .j a v a 2 s. c om*/ */ public static void commitTask(JobConf conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id"); LOG.info("committing task: '" + taskId + "' - " + taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - " + taskOutputPath); LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath); } } }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
private static synchronized void cleanTempPath(JobConf conf, Path outputPath) throws IOException { // do the clean up of temporary directory if (outputPath != null) { FileSystem fileSys = getFSSafe(conf, outputPath); if (fileSys == null) return; if (!fileSys.exists(outputPath)) return; Path tmpDir = new Path(outputPath, TEMPORARY_PATH); LOG.info("deleting temp path " + tmpDir); if (fileSys.exists(tmpDir)) fileSys.delete(tmpDir, true); }//from ww w. j a v a 2s . c o m }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { String taskId = conf.get("mapred.task.id"); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new IOException("Failed to delete earlier output of task: " + taskId); }/* ww w . j a va 2 s. c om*/ if (!fs.rename(taskOutput, finalOutputPath)) { throw new IOException("Failed to save output of task: " + taskId); } } LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputs(conf, fs, jobOutputDir, path.getPath()); } } } }
From source file:cascading.tap.hadoop.Hfs.java
License:Open Source License
private boolean deleteFullIdentifier(Configuration conf, String fullIdentifier) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("deleting: {}", fullIdentifier); Path fullPath = new Path(fullIdentifier); // do not delete the root directory if (fullPath.depth() == 0) return true; FileSystem fileSystem = getFileSystem(conf); try {//from www . j ava 2 s . co m return fileSystem.delete(fullPath, true); } catch (NullPointerException exception) { // hack to get around npe thrown when fs reaches root directory // removes coupling to the new aws hadoop artifacts that may not be deployed if (!(fileSystem.getClass().getSimpleName().equals("NativeS3FileSystem"))) throw exception; } return true; }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf/*from www. j a v a2 s .co m*/ */ public static void commitTask(Configuration conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); LOG.info("committing task: '{}' - {}", taskId, taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '{}' - {}", taskId, taskOutputPath); LOG.info("saved output of task '{}' to {}", taskId, jobOutputPath); } } }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
private static synchronized void cleanTempPath(Configuration conf, Path outputPath) throws IOException { // do the clean up of temporary directory if (outputPath != null) { FileSystem fileSys = getFSSafe(conf, outputPath); if (fileSys == null) return; if (!fileSys.exists(outputPath)) return; Path tmpDir = new Path(outputPath, TEMPORARY_PATH); LOG.info("deleting temp path {}", tmpDir); if (fileSys.exists(tmpDir)) fileSys.delete(tmpDir, true); }/*from w w w .j ava2s . c o m*/ }