List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.pagerankcalculator.TwitterPageRank.java
public int sortPagerank(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankSortingMapper.class); job.setReducerClass(PageRankSortingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(1);/*from w w w . j a v a2 s .c om*/ LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setSortComparatorClass(DoubleSortDescComparator.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.panguso.lc.analysis.format.Logcenter.java
License:Open Source License
@Override public int run(String[] args) throws Exception { context = new ClassPathXmlApplicationContext("applicationContext.xml"); Properties prop = context.getBean("configProperties", Properties.class); // ??/*from www . j a v a 2 s . c o m*/ // String time = new DateTime().toString("yyyyMMddHH"); // hadoop.lib=/application/format/lib/ // hadoop.conf=/application/format/conf/ // hadoop.src=/log/src/ // hadoop.dest=/log/dest/ // hadoop.archive=/log/archive/ libPath = prop.getProperty("hadoop.lib"); confPath = prop.getProperty("hadoop.conf"); srcPath = prop.getProperty("hadoop.src"); destPath = prop.getProperty("hadoop.dest"); archivePath = prop.getProperty("hadoop.archive"); Configuration conf = getConf(); logger.info("libPath=" + libPath); logger.info("confPath=" + confPath); logger.info("srcPath=" + srcPath); logger.info("destPath=" + destPath); logger.info("archivePath=" + archivePath); FileSystem fs = FileSystem.get(conf); // --jar FileStatus[] fJars = fs.listStatus(new Path(libPath)); for (FileStatus fileStatus : fJars) { String jar = libPath + fileStatus.getPath().getName(); DistributedCache.addFileToClassPath(new Path(jar), conf, FileSystem.get(conf)); } // --? FileStatus[] fProp = fs.listStatus(new Path(confPath)); for (FileStatus fileStatus : fProp) { DistributedCache.addArchiveToClassPath(new Path(confPath + fileStatus.getPath().getName()), conf, FileSystem.get(conf)); } FileStatus[] fDirs = fs.listStatus(new Path(srcPath)); if (fDirs != null && fDirs.length > 0) { for (FileStatus file : fDirs) { // dir String currentTime = file.getPath().getName(); String srcPathWithTime = srcPath + currentTime + "/"; String destPathWithTime = destPath + currentTime + "/"; String archPathWithTime = archivePath + currentTime + "/"; // ?? if (analysisService.isSuccessful(currentTime)) { continue; } // ??job? // fs.delete(new Path(destPathWithTime), true); // ? // if (!fs.exists(new Path(srcPathWithTime))) { // logger.warn("outPath does not exist,inputPath=" + // srcPathWithTime); // analysisService.saveFailureJob(job.getJobName(), // currentTime); // return -1; // } // ?classpath";"":" Job job = new Job(conf); String jars = job.getConfiguration().get("mapred.job.classpath.files"); job.getConfiguration().set("mapred.job.classpath.files", jars.replace(";", ":")); logger.info("current dir=" + currentTime); job.setJobName("format_" + currentTime); job.setJarByClass(Logcenter.class); job.setMapperClass(FormatAnalysisMapper.class); job.setReducerClass(FormatAnalysisReducer.class); job.setCombinerClass(FormatAnalysisReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // job.setNumReduceTasks(0); // //??reduce????namenode FileInputFormat.addInputPath(job, new Path(srcPathWithTime)); FileOutputFormat.setOutputPath(job, new Path(destPathWithTime)); // ? boolean result = false; try { result = job.waitForCompletion(true); } catch (FileAlreadyExistsException e) { logger.warn(e.getMessage(), e); } if (!result) { logger.warn("job execute failure!"); analysisService.saveFailureJob(job.getJobName(), currentTime); continue; // return -1; } // , fs.delete(new Path(archPathWithTime), true); fs.rename(new Path(srcPathWithTime), new Path(archPathWithTime)); analysisService.saveSuccessJob(job.getJobName(), currentTime); } } FileSystem.closeAll(); return 0; }
From source file:com.pegasus.ResultInfo.java
License:Apache License
public int run(final String[] args) throws Exception { Configuration conf = getConf(); final FileSystem fs = FileSystem.get(conf); edge_path = new Path(conf.get("edge_path")); all_vertices = new Path(conf.get("all_vertices")); curbm_path = new Path(conf.get("iteration_state")); tempbm_path = new Path(conf.get("stage1out")); nextbm_path = new Path(conf.get("stage2out")); output_path = new Path(conf.get("stage3out")); grapherOut_path = new Path(conf.get("grapherout")); nreducers = Integer.parseInt(conf.get("num_reducers")); local_output_path = conf.get("local_output"); // initital cleanup fs.delete(tempbm_path, true); fs.delete(nextbm_path, true);//from w w w. j a v a 2 s. c om fs.delete(output_path, true); fs.delete(curbm_path, true); fs.delete(grapherOut_path, true); FileUtil.fullyDelete(new File(local_output_path)); fs.mkdirs(curbm_path); //fs.mkdirs(grapherOut_path); FileStatus[] statusArray = fs.listStatus(all_vertices); for (int index = 0; index < statusArray.length; index++) { Path temp = statusArray[index].getPath(); FileUtil.copy(fs, temp, fs, curbm_path, false, conf); } make_symmetric = 1; System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); // Iteratively calculate neighborhood function. // rotate directory for (int i = cur_iter; i < MAX_ITERATIONS; i++) { cur_iter++; System.out.println("configStage1"); JobClient.runJob(configStage1()); System.out.println("configStage2"); JobClient.runJob(configStage2()); System.out.println("configStage3"); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); ResultInfo ri = readIterationOutput(new_path); changed_nodes[iter_counter] = ri.changed; changed_nodes[iter_counter] = ri.unchanged; iter_counter++; System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged); fs.delete(curbm_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, curbm_path); // Stop when the minimum neighborhood doesn't change if (ri.changed == 0) { System.out.println("All the component ids converged. Finishing..."); fs.rename(curbm_path, grapherOut_path); break; } } FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // finishing. System.out.println("\n[PEGASUS] Connected component computed."); System.out.println("[PEGASUS] Total Iteration = " + iter_counter); return 0; }
From source file:com.phantom.hadoop.examples.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); if (args.length == 0) { System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]"); ToolRunner.printGenericCommandUsage(System.out); return 2; }//from w w w. ja v a 2 s . co m // check for passed parameters, otherwise use defaults int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH); int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT); int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH); for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-depth")) { depth = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-height")) { height = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-width")) { width = Integer.parseInt(args[++i].trim()); } } // now set the values within conf for M/R tasks to read, this // will ensure values are set preventing MAPREDUCE-4678 conf.setInt(Pentomino.WIDTH, width); conf.setInt(Pentomino.HEIGHT, height); conf.setInt(Pentomino.DEPTH, depth); Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS, OneSidedPentomino.class, Pentomino.class); int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { Job job = new Job(conf); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(PentMap.class); job.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); long inputSize = createInputDirectory(fileSys, input, pent, depth); // for forcing the number of maps FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps)); // the keys are the prefix strings job.setOutputKeyClass(Text.class); // the values are puzzle solutions job.setOutputValueClass(Text.class); job.setMapperClass(PentMap.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(1); return (job.waitForCompletion(true) ? 0 : 1); } finally { fileSys.delete(input, true); } }
From source file:com.phantom.hadoop.examples.pi.DistSum.java
License:Apache License
/** Start a job to compute sigma */ private void compute(final String name, Summation sigma) throws IOException { if (sigma.getValue() != null) throw new IOException("sigma.getValue() != null, sigma=" + sigma); // setup remote directory final FileSystem fs = FileSystem.get(getConf()); final Path dir = fs.makeQualified(new Path(parameters.remoteDir, name)); if (!Util.createNonexistingDirectory(fs, dir)) return;/*from www . j ava 2 s . c om*/ // setup a job final Job job = createJob(name, sigma); final Path outdir = new Path(dir, "out"); FileOutputFormat.setOutputPath(job, outdir); // start a map/reduce job final String startmessage = "steps/parts = " + sigma.E.getSteps() + "/" + parameters.nParts + " = " + Util.long2string(sigma.E.getSteps() / parameters.nParts); Util.runJob(name, job, parameters.machine, startmessage, timer); final List<TaskResult> results = Util.readJobOutputs(fs, outdir); Util.writeResults(name, results, fs, parameters.remoteDir); fs.delete(dir, true); // combine results final List<TaskResult> combined = Util.combine(results); final PrintWriter out = Util.createWriter(parameters.localDir, name); try { for (TaskResult r : combined) { final String s = taskResult2string(name, r); out.println(s); out.flush(); Util.out.println(s); } } finally { out.close(); } if (combined.size() == 1) { final Summation s = combined.get(0).getElement(); if (sigma.contains(s) && s.contains(sigma)) sigma.setValue(s.getValue()); } }
From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from ww w . j a va2 s.c o m */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); // setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.waitForCompletion(true); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }
From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java
License:Open Source License
private static void setupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(testInput); if (fileSystem.exists(path)) fileSystem.delete(path, true); ArrayList<MRVertex> vertices = new ArrayList<MRVertex>(); MRVertex v0 = new MRVertex(0, conf); v0.addEdgeTo(2);/* w ww . j av a 2 s . co m*/ vertices.add(v0); MRVertex v1 = new MRVertex(1, conf); v1.addEdgeTo(2); vertices.add(v1); MRVertex v2 = new MRVertex(2, conf); v2.addEdgeTo(3); vertices.add(v2); MRVertex v3 = new MRVertex(3, conf); v3.addEdgeTo(4); vertices.add(v3); MRVertex v4 = new MRVertex(4, conf); v4.addEdgeTo(5); v4.addEdgeTo(6); vertices.add(v4); MRVertex v5 = new MRVertex(5, conf); vertices.add(v5); MRVertex v6 = new MRVertex(6, conf); v6.addEdgeTo(7); vertices.add(v6); MRVertex v7 = new MRVertex(7, conf); vertices.add(v7); FSDataOutputStream out = fileSystem.create(path); for (MRVertex vertex : vertices) { Text text = vertex.toText(MRVertex.EdgeFormat.EDGES_TO); byte[] bytes = text.copyBytes(); for (byte b : bytes) out.write(b); out.write('\n'); } out.close(); fileSystem.close(); }
From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java
License:Open Source License
private static void cleanupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); ArrayList<MRVertex> branch = new ArrayList<MRVertex>(); FileStatus[] branchFiles = fileSystem.listStatus(new Path(testOutput + "/branch")); for (FileStatus status : branchFiles) readVertices(status, branch, conf); for (MRVertex vertex : branch) System.out.println(vertex.toDisplayString()); ArrayList<MRVertex> chain = new ArrayList<MRVertex>(); FileStatus[] chainFiles = fileSystem.listStatus(new Path(testOutput + "/chain")); for (FileStatus status : chainFiles) readVertices(status, chain, conf); for (MRVertex vertex : chain) System.out.println(vertex.toDisplayString()); fileSystem.delete(new Path(testInput), true); fileSystem.delete(new Path(testOutput), true); fileSystem.close();//from ww w .j av a2 s.co m }
From source file:com.philiphubbard.digraph.MRCompressChains.java
License:Open Source License
public static boolean continueIteration(Job job, Path inputPathOrig, Path outputPathOrig) throws IOException { FileSystem fileSystem = FileSystem.get(job.getConfiguration()); if (iter > 0) { Path outputPathOld = new Path(outputPathOrig.toString() + (iter - 1)); if (fileSystem.exists(outputPathOld)) fileSystem.delete(outputPathOld, true); }//w w w . jav a 2 s . c o m Counters jobCounters = job.getCounters(); long numCompressions = jobCounters.findCounter(MRCompressChains.CompressionCounter.numCompressions) .getValue(); if (numCompressions == 0) numIterWithoutCompressions++; else numIterWithoutCompressions = 0; int limit = job.getConfiguration().getInt(CONFIG_TERMINATION_COUNT, 1); boolean keepGoing = (numIterWithoutCompressions < limit); if (keepGoing) { iter++; } else { Path outputPath = new Path(outputPathOrig.toString() + iter); fileSystem.rename(outputPath, outputPathOrig); } return keepGoing; }
From source file:com.philiphubbard.digraph.MRCompressChainsTest.java
License:Open Source License
private static void setupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(testInput); if (fileSystem.exists(path)) fileSystem.delete(path, true); ArrayList<MRVertex> vertices = new ArrayList<MRVertex>(); for (int i = 0; i < 60; i++) { MRVertex vertex = new MRVertex(i, conf); vertices.add(vertex);/*from w w w. j a v a 2 s.com*/ if (i % 20 != 19) vertex.addEdgeTo(i + 1); } SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(path), SequenceFile.Writer.keyClass(IntWritable.class), SequenceFile.Writer.valueClass(BytesWritable.class)); for (MRVertex vertex : vertices) writer.append(new IntWritable(vertex.getId()), vertex.toWritable(MRVertex.EdgeFormat.EDGES_TO)); writer.close(); fileSystem.close(); }