List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.benchmark.mapred.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf;//from w ww. j a v a2 s. c om int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("pentomino <output>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } conf = new JobConf(getConf()); width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.benchmark.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/*from w w w .j av a 2s . c o m*/ */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories //final Path inDir = new Path(TMP_DIR, "in"); final Path inDir = new Path("/home/hadoop1/tmp_dir", "in"); System.out.println("inDir =" + inDir.toString()); //final Path outDir = new Path(TMP_DIR, "out"); final Path outDir = new Path("/home/hadoop1/tmp_dir", "out"); System.out.println("outDir =" + outDir.toString()); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.benchmark.mapred.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param conf the job to sample//ww w . j a va2s. com * @param partFile where to write the output file to * @throws IOException if something goes wrong */ public static void writePartitionFile(JobConf conf, Path partFile) throws IOException { TeraInputFormat inFormat = new TeraInputFormat(); TextSampler sampler = new TextSampler(); Text key = new Text(); Text value = new Text(); int partitions = conf.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks()); int samples = Math.min(10, splits.length); long recordsPerSample = sampleSize / samples; int sampleStep = splits.length / samples; long records = 0; // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null); while (reader.next(key, value)) { sampler.addKey(key); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } FileSystem outFs = partFile.getFileSystem(conf); if (outFs.exists(partFile)) { outFs.delete(partFile, false); } SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class, NullWritable.class); NullWritable nullValue = NullWritable.get(); for (Text split : sampler.createPartitions(partitions)) { writer.append(split, nullValue); } writer.close(); }
From source file:com.bigdata.diane.MiniTestDFSIO.java
License:Apache License
private static void writeTest(FileSystem fs, Configuration fsConfig) throws IOException { fs.delete(DATA_DIR, true); fs.delete(WRITE_DIR, true);// ww w . j a v a 2s . co m runIOTest(WriteMapper.class, WRITE_DIR, fsConfig); }
From source file:com.bigdog.hadoop.mapreduce.combine.WordCountCombineApp.java
public void combine() throws Exception { Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); final Path outPath = new Path(OUT_PATH); if (fileSystem.exists(outPath)) { fileSystem.delete(outPath, true); }/*from w ww . java 2 s .c o m*/ final Job job = new Job(conf, WordCountCombineApp.class.getSimpleName()); //1.1?? FileInputFormat.setInputPaths(job, INPUT_PATH); //???? //job.setInputFormatClass(TextInputFormat.class); //1.2 map job.setMapperClass(MyMapper.class); //map<k,v><k3,v3><k2,v2>?? //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(LongWritable.class); //1.3 //job.setPartitionerClass(HashPartitioner.class); //reduce? //job.setNumReduceTasks(1); //1.4 TODO ?? //1.5 job.setCombinerClass(MyCombiner.class); //2.2 reduce job.setReducerClass(MyReducer.class); //reduce job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, outPath); //? //job.setOutputFormatClass(TextOutputFormat.class); //job??JobTracker? job.waitForCompletion(true); }
From source file:com.bigdog.hadoop.mapreduce.counter.WordCountCounterApp.java
public void CustomerCounter() throws Exception { Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); final Path outPath = new Path(OUT_PATH); if (fileSystem.exists(outPath)) { fileSystem.delete(outPath, true); }//from w w w. j a va 2 s .c o m final Job job = new Job(conf, WordCountCounterApp.class.getSimpleName()); //1.1?? FileInputFormat.setInputPaths(job, INPUT_PATH); //???? //job.setInputFormatClass(TextInputFormat.class); //1.2 map job.setMapperClass(MyMapper.class); //map<k,v><k3,v3><k2,v2>?? //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(LongWritable.class); //1.3 //job.setPartitionerClass(HashPartitioner.class); //reduce? //job.setNumReduceTasks(1); //1.4 TODO ?? //1.5 TODO //2.2 reduce job.setReducerClass(MyReducer.class); //reduce job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, outPath); //? //job.setOutputFormatClass(TextOutputFormat.class); //job??JobTracker? job.waitForCompletion(true); }
From source file:com.bigdog.hadoop.mapreduce.group.GroupApp.java
public void group() throws Exception { final Configuration configuration = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), configuration); if (fileSystem.exists(new Path(OUT_PATH))) { fileSystem.delete(new Path(OUT_PATH), true); }/* ww w . ja va 2s . com*/ final Job job = new Job(configuration, GroupApp.class.getSimpleName()); //1.1 FileInputFormat.setInputPaths(job, INPUT_PATH); //?? job.setInputFormatClass(TextInputFormat.class); //1.2Mapper job.setMapperClass(MyMapper.class); //<k2,v2> job.setMapOutputKeyClass(NewK2.class); job.setMapOutputValueClass(LongWritable.class); //1.3 job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); //1.4 TODO ?? job.setGroupingComparatorClass(MyGroupingComparator.class); //1.5 TODO ?? //2.2 reduce job.setReducerClass(MyReducer.class); //<k3,v3> job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); //? job.setOutputFormatClass(TextOutputFormat.class); //???JobTracker job.waitForCompletion(true); }
From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorMapFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { int seq = 0;//from w w w .j a v a2s. c om String inputFile = (args.length > seq) ? args[seq] : ""; seq++; String outputFile = (args.length > seq) ? args[seq++] : "/tmp/hsearch-index"; String outputFileName = (args.length > seq) ? args[seq++] : "file1"; String xmlFilePath = (args.length > seq) ? args[seq++] : ""; String replaceFrom = (args.length > seq) ? args[seq++] : ""; String replaceTo = (args.length > seq) ? args[seq++] : ""; String startIndex = (args.length > seq) ? args[seq++] : ""; String endIndex = (args.length > seq) ? args[seq++] : ""; String numberOfReducerStr = (args.length > seq) ? args[seq] : "1"; int numberOfReducer = Integer.parseInt(numberOfReducerStr); if (null == inputFile || inputFile.trim().isEmpty()) { String err = KVReplicatorHFile.class + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = KVIndexer.createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputFile = outputFile.charAt(outputFile.length() - 1) == '/' ? outputFile : outputFile + "/"; outputFile = outputFile + fm.tableName; conf.set(OUTPUT_FILE_PATH, outputFile); conf.set(OUTPUT_FILE_NAME, outputFileName); conf.set(REPLACE_FROM, replaceFrom); conf.set(REPLACE_TO, replaceTo); conf.set(START_INDEX, startIndex); conf.set(END_INDEX, endIndex); Job job = Job.getInstance(conf, "KVReplicatorMapFile - Replicating Map File"); job.setJarByClass(KVReplicatorMapFile.class); job.setMapperClass(KVReplicatorMapper.class); job.setReducerClass(KVReplicatorReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(numberOfReducer); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim())); FileSystem fs = FileSystem.get(conf); Path dummyPath = new Path("/tmp", "dummy"); if (fs.exists(dummyPath)) { fs.delete(dummyPath, true); } FileOutputFormat.setOutputPath(job, dummyPath); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.blackberry.logdriver.admin.HFind.java
License:Apache License
@Override public int run(String[] args) throws Exception { final long startTime = System.currentTimeMillis(); int i = 0;//from www . j a v a 2 s .c o m while (i < args.length) { if (args[i].startsWith("-")) { break; } Path path = new Path(args[i]); FileSystem fs = path.getFileSystem(getConf()); FileStatus[] fileStatuses = fs.globStatus(path); if (fileStatuses != null) { for (FileStatus fileStatus : fileStatuses) { paths.add(fileStatus.getPath()); fileStatusCache.put(fileStatus.getPath(), fileStatus); } } i++; } while (i < args.length) { // -print action if ("-print".equals(args[i])) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } // -delete action if ("-delete".equals(args[i])) { actions.add(new FileStatusFilter() { @SuppressWarnings("deprecation") @Override public boolean accept(FileStatus fileStatus) { try { FileSystem fs = fileStatus.getPath().getFileSystem(getConf()); if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) { return fs.delete(fileStatus.getPath(), true); } } catch (IOException e) { e.printStackTrace(); } return false; } }); } // -atime test else if ("-atime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -atime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -mtime test else if ("-mtime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mtime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -amin test else if ("-amin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -amin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -mmin test else if ("-mmin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mmin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -regex test else if ("-regex".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -regex"); System.exit(1); } final Pattern p = Pattern.compile(args[i]); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if (p.matcher(fileStatus.getPath().toString()).matches()) { return true; } else { return false; } } }); } i++; } if (actions.size() == 0) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } search(); return 0; }
From source file:com.blackberry.logdriver.LockedFs.java
License:Apache License
public void delete(Configuration conf, String toDelete) throws IOException { FileSystem fs = FileSystem.get(conf); Path path = new Path(toDelete); if (fs.exists(path)) { fs.delete(path, true); } else {/* w w w.ja v a2 s .c o m*/ LOG.warn("File to delete not found:" + toDelete); } }