List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) throw new IOException("Failed to delete earlier output of task: " + taskId); if (!fs.rename(taskOutput, finalOutputPath)) throw new IOException("Failed to save output of task: " + taskId); }//from w ww . j a v a2 s . c o m LOG.debug("Moved {} to {}", taskOutput, finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) moveTaskOutputs(conf, fs, jobOutputDir, path.getPath()); } } }
From source file:cascading.tap.hadoop.ZipInputFormatTest.java
License:Open Source License
public void testSplits() throws Exception { JobConf job = new JobConf(); FileSystem currentFs = FileSystem.get(job); Path file = new Path(workDir, "test.zip"); Reporter reporter = Reporter.NULL;//from www .java 2 s.c om int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); FileInputFormat.setInputPaths(job, file); for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream); long length = 0; LOG.debug("creating; zip file with entries = " + entries); // for each entry in the zip file for (int entryCounter = 0; entryCounter < entries; entryCounter++) { // construct zip entries splitting MAX_LENGTH between entries long entryLength = MAX_LENGTH / entries; ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt"); zipEntry.setMethod(ZipEntry.DEFLATED); zos.putNextEntry(zipEntry); for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) { zos.write(Long.toString(length).getBytes()); zos.write("\n".getBytes()); } zos.flush(); zos.closeEntry(); } zos.flush(); zos.close(); currentFs.delete(file, true); OutputStream outputStream = currentFs.create(file); byteArrayOutputStream.writeTo(outputStream); outputStream.close(); ZipInputFormat format = new ZipInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); InputSplit[] splits = format.getSplits(job, 100); BitSet bits = new BitSet((int) length); for (int j = 0; j < splits.length; j++) { LOG.debug("split[" + j + "]= " + splits[j]); RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { int v = Integer.parseInt(value.toString()); LOG.debug("read " + v); if (bits.get(v)) LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos()); assertFalse("key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.close(); } } assertEquals("some keys in no partition.", length, bits.cardinality()); } }
From source file:cascading.tap.Hfs.java
License:Open Source License
@Override public boolean deletePath(JobConf conf) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("deleting: " + getQualifiedPath(conf)); // do not delete the root directory if (getQualifiedPath(conf).depth() == 0) return true; FileSystem fileSystem = getFileSystem(conf); try {/* w ww . ja v a 2s. c om*/ return fileSystem.delete(getPath(), true); } catch (NullPointerException exception) { // hack to get around npe thrown when fs reaches root directory if (!(fileSystem instanceof NativeS3FileSystem)) throw exception; } return true; }
From source file:cgl.hadoop.apps.runner.DataAnalysis.java
License:Open Source License
/** * Launch the MapReduce computation./*from w w w. j a va2s .co m*/ * This method first, remove any previous working directories and create a new one * Then the data (file names) is copied to this new directory and launch the * MapReduce (map-only though) computation. * @param numMapTasks - Number of map tasks. * @param numReduceTasks - Number of reduce tasks =0. * @param programDir - The directory where the Cap3 program is. * @param execName - Name of the executable. * @param dataDir - Directory where the data is located. * @param outputDir - Output directory to place the output. * @param cmdArgs - These are the command line arguments to the Cap3 program. * @throws Exception - Throws any exception occurs in this program. */ void launch(int numReduceTasks, String programDir, String execName, String workingDir, String databaseArchive, String databaseName, String dataDir, String outputDir, String cmdArgs) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, execName); // First get the file system handler, delete any previous files, add the // files and write the data to it, then pass its name as a parameter to // job Path hdMainDir = new Path(outputDir); FileSystem fs = FileSystem.get(conf); fs.delete(hdMainDir, true); Path hdOutDir = new Path(hdMainDir, "out"); // Starting the data analysis. Configuration jc = job.getConfiguration(); jc.set(WORKING_DIR, workingDir); jc.set(EXECUTABLE, execName); jc.set(PROGRAM_DIR, programDir); // this the name of the executable archive jc.set(DB_ARCHIVE, databaseArchive); jc.set(DB_NAME, databaseName); jc.set(PARAMETERS, cmdArgs); jc.set(OUTPUT_DIR, outputDir); // using distributed cache // flush it //DistributedCache.releaseCache(new URI(programDir), jc); //DistributedCache.releaseCache(new URI(databaseArchive), jc); //DistributedCache.purgeCache(jc); // reput the data into cache long startTime = System.currentTimeMillis(); //DistributedCache.addCacheArchive(new URI(databaseArchive), jc); DistributedCache.addCacheArchive(new URI(programDir), jc); System.out.println( "Add Distributed Cache in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); FileInputFormat.setInputPaths(job, dataDir); FileOutputFormat.setOutputPath(job, hdOutDir); job.setJarByClass(DataAnalysis.class); job.setMapperClass(RunnerMap.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(DataFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numReduceTasks); startTime = System.currentTimeMillis(); int exitStatus = job.waitForCompletion(true) ? 0 : 1; System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //clean the cache System.exit(exitStatus); }
From source file:cmd.freebase2rdf4mr.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }// www . j a va 2s . c om Configuration configuration = getConf(); boolean overrideOutput = configuration.getBoolean(OPTION_OVERRIDE_OUTPUT, OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Tool driver = new Freebase2RDFDriver(configuration); driver.run(new String[] { args[0], args[1] }); return 0; }
From source file:cmd.infer.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/* w w w . j a v a 2 s . com*/ Configuration configuration = getConf(); boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Tool infer = new InferDriver(configuration); infer.run(new String[] { args[0], args[1] }); return 0; }
From source file:cmd.rdf2adjacencylist.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from ww w . j a va 2 s . c o m Configuration configuration = getConf(); boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERWRITE_OUTPUT, Constants.OPTION_OVERWRITE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Tool tool = new Rdf2AdjacencyListDriver(configuration); tool.run(new String[] { args[0], args[1] }); return 0; }
From source file:cmd.stats.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from ww w . j a v a2s.c o m*/ Configuration configuration = getConf(); boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Tool stats = new StatsDriver(configuration); stats.run(new String[] { args[0], args[1] }); return 0; }
From source file:cmd.tdbloader4.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from ww w . j av a 2 s . co m Configuration configuration = getConf(); configuration.set(Constants.RUN_ID, String.valueOf(System.currentTimeMillis())); boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); boolean copyToLocal = configuration.getBoolean(Constants.OPTION_COPY_TO_LOCAL, Constants.OPTION_COPY_TO_LOCAL_DEFAULT); boolean verify = configuration.getBoolean(Constants.OPTION_VERIFY, Constants.OPTION_VERIFY_DEFAULT); boolean runLocal = configuration.getBoolean(Constants.OPTION_RUN_LOCAL, Constants.OPTION_RUN_LOCAL_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_1), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_2), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_3), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_4), true); } if ((copyToLocal) || (runLocal)) { File path = new File(args[1]); path.mkdirs(); } Tool first = new FirstDriver(configuration); int status = first.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_1 }); if (status != 0) { return status; } createOffsetsFile(fs, args[1] + OUTPUT_PATH_POSTFIX_1, args[1] + OUTPUT_PATH_POSTFIX_1); Path offsets = new Path(args[1] + OUTPUT_PATH_POSTFIX_1, Constants.OFFSETS_FILENAME); DistributedCache.addCacheFile(offsets.toUri(), configuration); Tool second = new SecondDriver(configuration); status = second.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_2 }); if (status != 0) { return status; } Tool third = new ThirdDriver(configuration); status = third.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_3 }); if (status != 0) { return status; } Tool fourth = new FourthDriver(configuration); status = fourth.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_3, args[1] + OUTPUT_PATH_POSTFIX_4 }); if (status != 0) { return status; } if (copyToLocal) { Tool download = new download(configuration); download.run( new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_4, args[1] }); } if (verify) { DatasetGraphTDB dsgMem = load(args[0]); Location location = new Location(args[1]); if (!copyToLocal) { // TODO: this is a sort of a cheat and it could go away (if it turns out to be too slow)! download.fixNodeTable2(location); } DatasetGraphTDB dsgDisk = SetupTDB.buildDataset(location); boolean isomorphic = isomorphic(dsgMem, dsgDisk); System.out.println("> " + isomorphic); } return status; }
From source file:co.cask.cdap.data.hbase.HBase10CDH550Test.java
License:Apache License
@Override public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod, Configuration conf, byte[]... families) throws IOException { if (conf == null) { conf = new Configuration(); }/*from w ww. java 2 s . c o m*/ HTableDescriptor htd = new HTableDescriptor(tableName); for (byte[] family : families) { htd.addFamily(new HColumnDescriptor(family)); } HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false); Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod); FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { if (!fs.delete(path, true)) { throw new IOException("Failed delete of " + path); } } return HRegion.createHRegion(info, path, conf, htd); }