List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:GetRetweetersAndCountPerUser.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>"); System.exit(2);//from w w w . ja v a 2 s . c o m } Job job = new Job(conf, "word count"); job.setJarByClass(RetweetersPerUser.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); System.out.println(otherArgs[0]); job.setMapperClass(TweetMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(Integer.parseInt(args[2])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); if (job.waitForCompletion(true)) { FileSystem hdfs = FileSystem.get(new URI(args[1]), conf); Path dir = new Path(args[1]); PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().startsWith("part-r-"); } }; HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>(); FileStatus[] files = hdfs.listStatus(dir, filter); Arrays.sort(files); for (int i = 0; i != files.length; i++) { Path pt = files[i].getPath(); BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt))); String line = null; while ((line = br.readLine()) != null) { String[] columns = new String[2]; columns = line.split("\t"); int key = Integer.parseInt(columns[0]); if (counts_for_user.containsKey(key)) counts_for_user.put(key, counts_for_user.get(key) + 1); else counts_for_user.put(key, 1); } br.close(); } FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count")); PrintWriter writer = new PrintWriter(fsDataOutputStream); for (Entry<Integer, Integer> e : counts_for_user.entrySet()) { writer.write(e.getKey() + "\t" + e.getValue() + "\n"); } writer.close(); fsDataOutputStream.close(); hdfs.close(); System.exit(0); } System.exit(1); }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testUnpersistentLineIndexInfo() { try {//from w w w. j a va2 s . c o m String fileName = prefix + "testPersistentLineIndexInfo"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); IndexInfo info = new IndexInfo(); info.unpersistentLineIndexInfo(in); in.close(); if (info.beginLine != 11) { fail("beginLine fail:" + info.beginLine); } if (info.endLine != 22) { fail("endLine fail:" + info.endLine); } if (info.offset != 33) { fail("offset fail:" + info.offset); } if (info.len != 44) { fail("len fail:" + info.len); } if (info.idx != 55) { fail("idx fail:" + info.idx); } fs.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testUnpersistentKeyIndexInfo() { try {/*from w w w . java2 s. c o m*/ String fileName = prefix + "testPersistentKeyIndexInfo"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); IndexInfo info = new IndexInfo(); info.unpersistentKeyIndexInfo(in); in.close(); if (info.beginKey != 111) { fail("beginKey fail:" + info.beginKey); } if (info.endKey != 222) { fail("beginKey fail:" + info.beginKey); } fs.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:HadoopUtilsTest.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration confgiruration = HBaseConfiguration.create(); FileSystem fileSystem = null; try {/*w w w. j a v a 2 s .c o m*/ fileSystem = FileSystem.get(confgiruration); FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/icntv/grade/correlate-result/2013-12-12"), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().matches("part-r-\\d*"); } }); for (FileStatus f : fileStatuses) { IOUtils.copyBytes(fileSystem.open(f.getPath()), System.out, 4096, false); } } catch (Exception e) { e.printStackTrace(); } finally { if (null != fileSystem) { fileSystem.close(); } } }
From source file:RunText.java
License:Apache License
public static void main(String[] args) throws Exception { o = new Options(); JCommander jc = null;/*from w w w . java2 s. c o m*/ try { jc = new JCommander(o, args); jc.setProgramName("./runText"); } catch (ParameterException e) { System.out.println(e.getMessage()); String[] valid = { "-p", "path", "-d", "delimiter", "v", "value", "-i", "index" }; new JCommander(o, valid).usage(); System.exit(-1); } if (o.help) { jc.usage(); System.exit(0); } path = new Path(o.path); delim = o.delimiter.getBytes()[0]; toFind = o.value; index = o.index; numThreads = o.threads; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); TextInputFormat format = new TextInputFormat(); long len = fs.getFileStatus(path).getLen() / numThreads; List<Thread> threads = Lists.newArrayList(); for (int i = 0; i < numThreads; i++) { FileSplit split = new FileSplit(path, i * len, len, new String[] { "" }); threads.add(new Thread(new RunText(split, format))); } runningThreads = new AtomicInteger(numThreads); for (Thread t : threads) { t.start(); } int prev = 0; int current; long t1 = System.nanoTime(); long t2; while (runningThreads.get() > 0) { Thread.sleep(5000); current = totalCount.get(); t2 = System.nanoTime(); System.out.println(String.format("%f records/sec", (current - prev) * 1e9 / (t2 - t1))); t1 = t2; prev = current; } for (Thread t : threads) { t.join(); } fs.close(); }
From source file:BwaInterpreter.java
License:Open Source License
private void setTotalInputLength() { try {// www. j ava 2 s . c o m FileSystem fs = FileSystem.get(this.conf); // To get the input files sizes ContentSummary cSummaryFile1 = fs.getContentSummary(new Path(options.getInputPath())); long lengthFile1 = cSummaryFile1.getLength(); long lengthFile2 = 0; if (!options.getInputPath2().isEmpty()) { ContentSummary cSummaryFile2 = fs.getContentSummary(new Path(options.getInputPath())); lengthFile2 = cSummaryFile2.getLength(); } // Total size. Depends on paired or single reads this.totalInputLength = lengthFile1 + lengthFile2; fs.close(); } catch (IOException e) { LOG.error(e.toString()); e.printStackTrace(); } }
From source file:BwaInterpreter.java
License:Open Source License
private void createOutputFolder() { try {//from w ww. j av a 2 s . c om FileSystem fs = FileSystem.get(this.conf); // Path variable Path outputDir = new Path(options.getOutputPath()); // Directory creation if (!fs.exists(outputDir)) { fs.mkdirs(outputDir); } else { fs.delete(outputDir, true); fs.mkdirs(outputDir); } fs.close(); } catch (IOException e) { LOG.error(e.toString()); e.printStackTrace(); } }
From source file:BwaInterpreter.java
License:Open Source License
private void combineOutputSamFiles(String outputHdfsDir, List<String> returnedValues) { try {/* w w w .ja v a 2 s . c om*/ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path finalHdfsOutputFile = new Path(outputHdfsDir + "/FullOutput.sam"); FSDataOutputStream outputFinalStream = fs.create(finalHdfsOutputFile, true); // We iterate over the resulting files in HDFS and agregate them into only one file. for (int i = 0; i < returnedValues.size(); i++) { LOG.info("JMAbuin:: SparkBWA :: Returned file ::" + returnedValues.get(i)); BufferedReader br = new BufferedReader( new InputStreamReader(fs.open(new Path(returnedValues.get(i))))); String line; line = br.readLine(); while (line != null) { if (i == 0 || !line.startsWith("@")) { //outputFinalStream.writeBytes(line+"\n"); outputFinalStream.write((line + "\n").getBytes()); } line = br.readLine(); } br.close(); fs.delete(new Path(returnedValues.get(i)), true); } outputFinalStream.close(); fs.close(); } catch (IOException e) { e.printStackTrace(); LOG.error(e.toString()); } }
From source file:BwaInterpreter.java
License:Open Source License
/** * Runs BWA with the specified options// w ww. ja v a2 s .c o m * @brief This function runs BWA with the input data selected and with the options also selected by the user. */ public void RunBwa() { LOG.info("JMAbuin:: Starting BWA"); Bwa bwa = new Bwa(this.options); List<String> returnedValues; if (bwa.isPairedReads()) { JavaRDD<Tuple2<String, String>> readsRDD = handlePairedReadsSorting(); returnedValues = MapPairedBwa(bwa, readsRDD); } else { JavaRDD<String> readsRDD = handleSingleReadsSorting(); returnedValues = MapSingleBwa(bwa, readsRDD); } LOG.info("BwaRDD :: Total of returned lines from RDDs :: " + returnedValues.size()); // In the case of use a reducer the final output has to be stored in just one file if (bwa.isUseReducer()) { combineOutputSamFiles(bwa.getOutputHdfsDir(), returnedValues); } else { for (String outputFile : returnedValues) { LOG.info("JMAbuin:: SparkBWA:: Returned file ::" + outputFile); } } //After the execution, if the inputTmp exists, it should be deleted try { if ((this.inputTmpFileName != null) && (!this.inputTmpFileName.isEmpty())) { FileSystem fs = FileSystem.get(this.conf); fs.delete(new Path(this.inputTmpFileName), true); fs.close(); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); LOG.error(e.toString()); } }
From source file:BwaInterpreter.java
License:Open Source License
/** * Used to perform the sort operation in HDFS * @brief This function provides a method to perform the sort phase in HDFS * @author Jos M. Abun/* ww w . jav a2 s . com*/ * @param fileName1 The first file that contains input FASTQ reads. Stored in HDFS * @param fileName2 The second file that contains input FASTQ reads. Stored in HDFS * @return A JavaRDD that contains the paired reads sorted */ public JavaRDD<Tuple2<String, String>> SortInHDFS2(String fileName1, String fileName2) { Configuration conf = this.conf; LOG.info("JMAbuin:: Starting writing reads to HDFS"); try { FileSystem fs = FileSystem.get(conf); Path outputFilePath = new Path(this.inputTmpFileName); //To write the paired reads FSDataOutputStream outputFinalStream = fs.create(outputFilePath, true); //To read paired reads from both files BufferedReader brFastqFile1 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName1)))); BufferedReader brFastqFile2 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName2)))); String lineFastq1; String lineFastq2; lineFastq1 = brFastqFile1.readLine(); lineFastq2 = brFastqFile2.readLine(); //Loop to read two files. The two of them must have the same line numbers while (lineFastq1 != null) { //The lines are written interspersed outputFinalStream.write((lineFastq1 + "\n" + lineFastq2 + "\n").getBytes()); //Next lines are readed lineFastq1 = brFastqFile1.readLine(); lineFastq2 = brFastqFile2.readLine(); } //Close the input and output files brFastqFile1.close(); brFastqFile2.close(); outputFinalStream.close(); //Now it is time to read the previous created file and create the RDD ContentSummary cSummary = fs.getContentSummary(outputFilePath); long length = cSummary.getLength(); this.totalInputLength = length; fs.close(); //In case of the user does want partitioning if (this.options.getPartitionNumber() != 0) { //These options are set to indicate the split size and get the correct vnumber of partitions this.conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf((length) / this.options.getPartitionNumber())); this.conf.set("mapreduce.input.fileinputformat.split.minsize", String.valueOf((length) / this.options.getPartitionNumber())); LOG.info("JMAbuin partitioning from HDFS:: " + String.valueOf((length) / this.options.getPartitionNumber())); //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class, String.class, this.conf).mapPartitions(new BigFastq2RDDPartitionsDouble(), true); } else { //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class, String.class, this.conf).map(new BigFastq2RDDDouble()); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); LOG.error(e.toString()); return null; } }