Java tutorial
package uni.bielefeld.cmg.sparkhit.io; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import uni.bielefeld.cmg.sparkhit.util.InfoDumper; import java.io.*; /** * Created by rhinempi on 24/01/16. * * SparkHit * * Copyright (c) 2015-2015 * Liren Huang <huanglr at cebitec.uni-bielefeld.de> * * SparkHit is free software: you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; Without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more detail. * * You should have received a copy of the GNU General Public License along * with this program. If not, see <http://www.gnu.org/licenses>. */ public class TextFileBufferInput implements InputFileManager { private String inputFile; private File inputPath; public FileInputStream inputFileStream; public BZip2CompressorInputStream inputBZip2Stream; public GzipCompressorInputStream inputGzipStream; public TarArchiveInputStream inputTarArchiveStream; public InputStreamReader inputStreamReader; public BufferedReader inputBufferReader; public int fileFormatIntMark; private InfoDumper info = new InfoDumper(); public TextFileBufferInput() { /** * Read an input path and set an input buffered reader * * For synchronized input buffer, send to each thread for * parallel reading text file * */ } /** * * @return */ public BufferedReader getBufferReader() { return inputBufferReader; } /** * * @return */ public InputStreamReader getInputStreamReader() { return inputStreamReader; } /** * * @return */ public FileInputStream getInputFileStream() { return inputFileStream; } /** * */ public void setBufferReader() { this.inputBufferReader = new BufferedReader(inputStreamReader); } /** * */ private void setInputStreamReader() { if (fileFormatIntMark == 0) { this.inputStreamReader = new InputStreamReader(inputFileStream); } else if (fileFormatIntMark == 1) { setGzipCompressorInputStream(); this.inputStreamReader = new InputStreamReader(inputGzipStream); } else if (fileFormatIntMark == 2) { setBZip2CompressorInputStream(); this.inputStreamReader = new InputStreamReader(inputBZip2Stream); } else if (fileFormatIntMark == 3) { setGzipCompressorInputStream(); setTarArchiveInputStreamFromGzipStream(); try { TarArchiveEntry currentEntry = inputTarArchiveStream.getNextTarEntry(); info.readMessage("Reading the first TarArchive Entry : " + currentEntry.getName()); info.screenDump(); } catch (IOException e) { e.printStackTrace(); } this.inputStreamReader = new InputStreamReader(inputTarArchiveStream); } else if (fileFormatIntMark == 4) { setBZip2CompressorInputStream(); setTarArchiveInputStreamFromBZipStream(); try { TarArchiveEntry currentEntry = inputTarArchiveStream.getNextTarEntry(); info.readMessage("Reading the first TarArchive Entry : " + currentEntry.getName()); info.screenDump(); } catch (IOException e) { e.printStackTrace(); } this.inputStreamReader = new InputStreamReader(inputTarArchiveStream); } else { info.readMessage( "fileFormatIntMark should be 0-4: 0 for uncompressed file; 1 for .gz file; 2 for .bz2 file; 3 for .tar.gz file; 4 for .tar.bz2 file."); info.screenDump(); } } public void setTarArchiveInputStreamFromGzipStream() { this.inputTarArchiveStream = new TarArchiveInputStream(inputGzipStream); } private void setGzipCompressorInputStream() { try { this.inputGzipStream = new GzipCompressorInputStream(inputFileStream); } catch (IOException e) { e.printStackTrace(); } } public boolean setNextTarArchiveEntry() { try { TarArchiveEntry currentEntry; if ((currentEntry = inputTarArchiveStream.getNextTarEntry()) != null) { info.readMessage("Reading next TarArchive Entry : " + currentEntry.getName()); info.screenDump(); this.inputStreamReader = new InputStreamReader(inputTarArchiveStream); setBufferReader(); return true; } else { return false; } } catch (IOException e) { e.printStackTrace(); } return false; } public void setTarArchiveInputStreamFromBZipStream() { this.inputTarArchiveStream = new TarArchiveInputStream(inputBZip2Stream); } private void setBZip2CompressorInputStream() { try { this.inputBZip2Stream = new BZip2CompressorInputStream(inputFileStream); } catch (IOException e) { e.printStackTrace(); } } /** * */ private void setFileInputStream() { try { this.inputFileStream = new FileInputStream(inputPath); } catch (FileNotFoundException e) { e.fillInStackTrace(); System.exit(0); } } /** * */ private void setInputFile() { this.inputPath = new File(inputFile); } public int checkFile(String cFile) { if (cFile.startsWith("s3")) { info.readMessage("Input file is located in S3 bucket : "); info.screenDump(); info.readMessage("\t" + cFile); info.screenDump(); info.readMessage("Reading file using Map-Reduce FASTA reader"); info.screenDump(); } else if (cFile.startsWith("hdfs")) { info.readMessage("Input file is lacated in HDFS : "); info.screenDump(); info.readMessage("\t" + cFile); info.screenDump(); info.readMessage("Reading file using hadoop file reader"); info.screenDump(); } else { info.readMessage("Input file is a local file : "); info.screenDump(); info.readMessage("\t" + cFile); info.screenDump(); File inputcFile = new File(cFile).getAbsoluteFile(); if (!inputcFile.exists()) { info.readMessage("However, it is not there, please check it again"); info.screenDump(); System.exit(0); } else if (cFile.endsWith("tar.gz")) { info.readMessage("Reading fastq file using BufferedReader"); info.screenDump(); info.readMessage("Uncompress tar.gz file using GzipCompressor and TarArchive"); info.screenDump(); return 3; } else if (cFile.endsWith("tar.bz2")) { info.readMessage("Reading fastq file using BufferedReader"); info.screenDump(); info.readMessage("Uncompress tar.bz2 file using GzipCompressor and TarArchive"); info.screenDump(); return 4; } else if (cFile.endsWith("gz")) { info.readMessage("Reading fastq file using BufferedReader"); info.screenDump(); info.readMessage("Uncompress gz file using GzipCompressor"); info.screenDump(); return 1; } else if (cFile.endsWith("bz2")) { info.readMessage("Reading fastq file using BufferedReader"); info.screenDump(); info.readMessage("Uncompress bz2 file using BZip2Compressor"); info.screenDump(); return 2; } else { info.readMessage("Reading fastq file using FastqUnitBuffer"); info.screenDump(); } } return 0; } /** * * @param inputFile */ public void setInput(String inputFile) { this.inputFile = inputFile; this.fileFormatIntMark = checkFile(inputFile); setInputFile(); setFileInputStream(); setInputStreamReader(); setBufferReader(); } /** * * @param inputFile is the input text file in String */ public void bufferInputFile(String inputFile) { this.inputFile = inputFile; } /** * * @param OutputFile */ public void bufferOutputFile(String OutputFile) { /** * this method is invalid in this class */ } }