List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.philiphubbard.digraph.MRCompressChainsTest.java
License:Open Source License
private static void cleanupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); ArrayList<MRVertex> vertices = new ArrayList<MRVertex>(); FileStatus[] files = fileSystem.listStatus(new Path(testOutput)); for (FileStatus status : files) { Path path = status.getPath(); if (path.getName().startsWith("part")) { System.out.println(path); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); while (reader.next(key, value)) vertices.add(new MRVertex(value, conf)); reader.close();//from w w w.ja v a2s .co m } } for (MRVertex vertex : vertices) System.out.println(vertex.toDisplayString()); fileSystem.delete(new Path(testInput), true); fileSystem.delete(new Path(testOutput), true); fileSystem.close(); }
From source file:com.philiphubbard.sabe.MRAssembler.java
License:Open Source License
public boolean run(Path inputPath, Path outputPath) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // Job.getInstance() copies the Configuration argument, so set its properties first. conf.setBoolean(MRVertex.CONFIG_ALLOW_EDGE_MULTIPLES, true); conf.setBoolean(MRVertex.CONFIG_COMPRESS_CHAIN_MULTIPLES_MUST_MATCH, false); conf.setInt(MRMerVertex.CONFIG_MER_LENGTH, vertexMerLength); conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true); conf.setInt(MRBuildVertices.CONFIG_COVERAGE, coverage); conf.setInt(MRCompressChains.CONFIG_TERMINATION_COUNT, 1); Job buildJob = Job.getInstance(conf); buildJob.setJobName("mrassemblerbuild"); Path buildInputPath = inputPath; Path buildOutputPath = new Path("sabe.MRAssemblerTmp"); System.out.println("sabe.MRAssembler starting vertex construction"); MRBuildMerVertices.setupJob(buildJob, buildInputPath, buildOutputPath); if (!buildJob.waitForCompletion(true)) return false; ///*from www . j av a 2 s. co m*/ Path compressInputPath = new Path(buildOutputPath.toString() + "/chain"); Path compressOutputPath = new Path(buildOutputPath.toString() + "/chainCompress"); int iter = 0; boolean keepGoing = true; MRCompressChains.beginIteration(); while (keepGoing) { Job compressJob = Job.getInstance(conf); compressJob.setJobName("mrassemblercompress"); System.out.println("sabe.MRAssembler starting compression iteration " + iter); MRCompressMerChains.setupIterationJob(compressJob, compressInputPath, compressOutputPath); if (!compressJob.waitForCompletion(true)) System.exit(1); iter++; keepGoing = MRCompressChains.continueIteration(compressJob, compressInputPath, compressOutputPath); } System.out.println("sabe.MRAssembler made " + iter + " compression iterations"); // Path branchPath = new Path(buildOutputPath.toString() + "/branch"); Path chainPath = compressOutputPath; FileSystem fileSystem = FileSystem.get(conf); Graph graph = buildCompressedGraph(conf, fileSystem, branchPath, chainPath); if (graph != null) { ArrayList<String> result = graph.assemble(); FSDataOutputStream out = fileSystem.create(outputPath); for (String seq : result) { out.writeBytes(seq); out.writeBytes("\n"); } } // fileSystem.delete(buildOutputPath, true); fileSystem.close(); return true; }
From source file:com.philiphubbard.sabe.MRAssemblerTest1.java
License:Open Source License
private static void setupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(testInput); if (fileSystem.exists(path)) fileSystem.delete(path, true); ArrayList<Text> reads = new ArrayList<Text>(); // Goal: AATTCGGCCTTCGGCAT reads.add(new Text("AATTCGGC\n")); reads.add(new Text("CTTCGGCAT\n")); reads.add(new Text("AATT\n")); reads.add(new Text("CGGCCTTCGGCAT\n")); reads.add(new Text("AATTCGGCCTTCG\n")); reads.add(new Text("GCAT\n")); FSDataOutputStream out = fileSystem.create(path); for (Text read : reads) { byte[] bytes = read.copyBytes(); for (byte b : bytes) out.write(b);/*from w ww . j a v a 2s . com*/ } out.close(); fileSystem.close(); }
From source file:com.philiphubbard.sabe.MRAssemblerTest1.java
License:Open Source License
private static void cleanupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); fileSystem.delete(new Path(testInput), true); fileSystem.delete(new Path(testOutput), true); fileSystem.close();// w w w . j a v a 2 s.c om }
From source file:com.philiphubbard.sabe.MRAssemblerTest2.java
License:Open Source License
private static void setupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(testInput); if (fileSystem.exists(path)) fileSystem.delete(path, true); ArrayList<Text> reads = new ArrayList<Text>(); // The expected result: // CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACCCGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC // Note that it has the following pattern: // segment 0: CCCTTTCTGT // segment 1, which will be repeated: TGACCCATCA // segment 2: TTGTTTAGTA // segment 3, which will be repeated: ACCCGCGGGA // segment 4: TGCCTGGCAG // segment 3, again: ACCCGCGGGA // segment 5: CGATCTCCTC // segment 1, again: TGACCCATCA // segment 6: TCGAAATTCC reads.add(new Text("CCCTTTC\n")); // Error: initial T omitted. reads.add(new Text("GTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACC")); reads.add(new Text("CGCGGGACGAT\n")); // Error: final C omitted. reads.add(new Text("CTCCTCTGACCCATCATCGAAATTC\n")); reads.add(new Text("CCCTTTCTGTTGACCCAT\n")); // Error: final C replaced with G. reads.add(new Text("CATTGTTTAGTAACCCGCGGGATGCCTGGCAGACG\n")); reads.add(new Text("CGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC\n")); // Error: C at index 14 replaced with A. reads.add(new Text("CCCTTTCTGTTGACACATCATTGTTTAGTAAC")); reads.add(new Text("CCGCGGGATGCC\n")); // Error: C at index 25 omitted. reads.add(new Text("TGGCAGACCCGCGGGACGATCTCCTTGACCCATCATCGAAATTCC\n")); reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTG\n")); // Error: G at index 10 replaced with T. reads.add(new Text("GCAGACCCGCTGGACGA\n")); reads.add(new Text("TCTCCTCTGACCCATCATCGAAATTCC\n")); reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGC")); // Error: final G omitted. reads.add(new Text("CTGGCAGACCCGC\n")); reads.add(new Text("GGACGATCTCCTCT\n")); // Error: CG at index 10 transposed to GC reads.add(new Text("GACCCATCATCGAAATTCC\n")); FSDataOutputStream out = fileSystem.create(path); for (Text read : reads) { byte[] bytes = read.copyBytes(); for (byte b : bytes) out.write(b);/*from w ww .j ava 2 s .c om*/ } out.close(); fileSystem.close(); }
From source file:com.pivotal.hawq.mapreduce.MapReduceClusterDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3 && args.length != 4) { System.err.printf("Usage: %s [generic options] <tableName> <dburl> <output> [<mapper_classname>]\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w ww . ja v a 2s .c om*/ String tableName = args[0]; String dbUrl = args[1]; Path outputPath = new Path(args[2]); Class<? extends Mapper> mapperClass = (args.length == 3) ? HAWQTableMapper.class : (Class<? extends Mapper>) Class.forName(args[3]); // delete previous output FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true); fs.close(); Job job = new Job(getConf(), "job_read_" + tableName); job.setJarByClass(MapReduceClusterDriver.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), dbUrl, null, null, tableName); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(mapperClass); job.setReducerClass(HAWQTableReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.MapReduceLocalDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2 && args.length != 3) { System.err.printf("Usage: %s [generic options] <metadata_file> <output> [<mapper_classname>]\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from ww w. ja v a 2s. co m String metadataFile = args[0]; Path outputPath = new Path(args[1]); Class<? extends Mapper> mapperClass = (args.length == 2) ? HAWQTableMapper.class : (Class<? extends Mapper>) Class.forName(args[2]); // delete previous output FileSystem fs = FileSystem.getLocal(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true); fs.close(); Job job = new Job(getConf()); job.setJarByClass(MapReduceLocalDriver.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), metadataFile); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(mapperClass); job.setReducerClass(HAWQTableReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.pt.HAWQInputFormatPerformanceTest_TPCH.java
License:Apache License
private int runMapReduceJob() throws Exception { Path outputPath = new Path("/output"); // delete previous output FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true); fs.close();//from w ww . ja va 2 s. c om Job job = new Job(getConf()); job.setJarByClass(HAWQInputFormatPerformanceTest_TPCH.class); job.setInputFormatClass(HAWQInputFormat.class); long startTime = System.currentTimeMillis(); HAWQInputFormat.setInput(job.getConfiguration(), MRFormatConfiguration.TEST_DB_URL, null, null, tableName); metadataExtractTime = System.currentTimeMillis() - startTime; FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(TPCHTableMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Void.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.placeiq.piqconnect.BlocksBuilder.java
License:Apache License
protected Job configStage1() throws Exception { FileSystem fs = FileSystem.get(getConf()); fs.delete(pathOutput, true); // useful ? Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.setBoolean(Constants.PROP_IS_VECTOR, isVector); conf.set("mapred.output.compression.type", "BLOCK"); // useful ? Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder"); job.setJarByClass(BlocksBuilder.class); job.setMapperClass(MapStage1.class); job.setReducerClass(RedStage1.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(BlockIndexWritable.class); job.setMapOutputValueClass(LightBlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); FileInputFormat.setInputPaths(job, pathEdges); SequenceFileOutputFormat.setOutputPath(job, pathOutput); SequenceFileOutputFormat.setCompressOutput(job, true); Runner.setCompression(job);/* w ww .j a va 2 s . com*/ return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
public int run(final String[] args) throws Exception { Path pathEdges = new Path(args[0]); Path pathVector = new Path(args[1]); Path workDir = new Path(args[2]); Path pathOutputStage1 = new Path(workDir, "stage1"); Path pathOutputStage2 = new Path(workDir, "stage2"); Path pathOutputVector = new Path(workDir, "result"); numberOfReducers = Integer.parseInt(args[3]); blockSize = Integer.parseInt(args[4]); int maxConvergence = Integer.parseInt(args[5]); int maxIters = Integer.parseInt(args[6]); if (maxConvergence < 0) { maxConvergence = 0;// www.ja va2 s. c o m } if (maxIters < 0 || maxIters > MAX_ITERATIONS) { maxIters = MAX_ITERATIONS; } FileSystem fs = FileSystem.get(getConf()); int n = 0; long changedNodes = Long.MAX_VALUE; while (n < maxIters && changedNodes > maxConvergence) { fs.delete(pathOutputStage1, true); fs.delete(pathOutputStage2, true); LOG.info("Start iteration " + n + " Stage1"); Job job1 = buildJob1(pathEdges, pathVector, pathOutputStage1); if (!job1.waitForCompletion(true)) { LOG.error("Failed to execute IterationStage1 for iteration #" + n); return -1; } LOG.info("Start iteration " + n + " Stage2"); Job job2 = buildJob2(pathOutputStage1, pathOutputStage2); if (!job2.waitForCompletion(true)) { LOG.error("Failed to execute IterationStage2 for iteration #" + n); return -1; } changedNodes = job2.getCounters().findCounter(PiqConnectCounter.NUMBER_INCOMPLETE_VECTOR).getValue(); long unchangedNodes = job2.getCounters().findCounter(PiqConnectCounter.NUMBER_FINAL_VECTOR).getValue(); LOG.info("End of iteration " + n + ", changedNodes=" + changedNodes + ", unchangedNodes=" + unchangedNodes); LOG.info(pathOutputStage2); fs.delete(pathVector, true); if (!fs.rename(pathOutputStage2, pathVector)) { LOG.error("failed to rename " + pathOutputStage2 + " into " + pathVector); return -1; } n++; } Job job3 = buildJob3(pathVector, pathOutputVector); if (!job3.waitForCompletion(true)) { LOG.error("Failed to execute FinalResultBuilder for iteration #" + n); return -1; } LOG.info("Connected component computed in " + n + " iterations"); return 0; }