List of usage examples for java.util.zip GZIPInputStream GZIPInputStream
public GZIPInputStream(InputStream in) throws IOException
From source file:com.act.utils.parser.GenbankInterpreter.java
/** * Parses every sequence object from the Genbank File * @throws Exception//from w w w . ja v a 2s .c om */ public void init() throws Exception { if (seq_type.equals(PROTEIN)) { Map<String, ProteinSequence> sequences; if (protFile.getName().endsWith(".gz")) { try (InputStream is = new GZIPInputStream(new FileInputStream(protFile))) { sequences = GenbankReaderHelper.readGenbankProteinSequence(is); } } else { sequences = GenbankReaderHelper.readGenbankProteinSequence(protFile); } for (AbstractSequence sequence : sequences.values()) { this.sequences.add(sequence); } } else if (seq_type.equals(DNA)) { Map<String, DNASequence> sequences; if (protFile.getName().endsWith(".gz")) { try (InputStream is = new GZIPInputStream(new FileInputStream(protFile))) { /* the AmbiguityDNACompoundSet is necessary due to the presence of ambiguous nucleotide (non-ATCG) compounds in the parsed DNA sequences */ GenbankReader genbankReader = new GenbankReader(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet())); sequences = genbankReader.process(); } } else { /* the AmbiguityDNACompoundSet is necessary due to the presence of ambiguous nucleotide (non-ATCG) compounds in the parsed DNA sequences */ GenbankReader genbankReader = new GenbankReader(protFile, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet())); sequences = genbankReader.process(); } for (AbstractSequence sequence : sequences.values()) { this.sequences.add(sequence); } } else { String msg = "No proper sequence type given; must be either DNA or Protein"; LOGGER.error(msg); throw new RuntimeException(msg); } }
From source file:it.unimi.dsi.sux4j.mph.VLLcpMonotoneMinimalPerfectHashFunction.java
public static void main(final String[] arg) throws NoSuchMethodException, IOException, JSAPException { final SimpleJSAP jsap = new SimpleJSAP(VLLcpMonotoneMinimalPerfectHashFunction.class.getName(), "Builds a variable-length LCP-based monotone minimal perfect hash function reading a newline-separated list of strings.", new Parameter[] { new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding."), new Switch("huTucker", 'h', "hu-tucker", "Use Hu-Tucker coding to reduce string length."), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new Switch("utf32", JSAP.NO_SHORTFLAG, "utf-32", "Use UTF-32 internally (handles surrogate pairs)."), new Switch("zipped", 'z', "zipped", "The string list is compressed in gzip format."), new UnflaggedOption("function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised monotone minimal perfect hash function."), new UnflaggedOption("stringFile", JSAP.STRING_PARSER, "-", JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The name of a file containing a newline-separated list of strings, or - for standard input; in the first case, strings will not be loaded into core memory."), }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;//from w ww . ja va 2s .c om final String functionName = jsapResult.getString("function"); final String stringFile = jsapResult.getString("stringFile"); final Charset encoding = (Charset) jsapResult.getObject("encoding"); final boolean zipped = jsapResult.getBoolean("zipped"); final boolean iso = jsapResult.getBoolean("iso"); final boolean utf32 = jsapResult.getBoolean("utf32"); final boolean huTucker = jsapResult.getBoolean("huTucker"); final Collection<MutableString> collection; if ("-".equals(stringFile)) { final ProgressLogger pl = new ProgressLogger(LOGGER); pl.displayLocalSpeed = true; pl.displayFreeMemory = true; pl.start("Loading strings..."); collection = new LineIterator( new FastBufferedReader( new InputStreamReader(zipped ? new GZIPInputStream(System.in) : System.in, encoding)), pl).allLines(); pl.done(); } else collection = new FileLinesCollection(stringFile, encoding.toString(), zipped); final TransformationStrategy<CharSequence> transformationStrategy = huTucker ? new HuTuckerTransformationStrategy(collection, true) : iso ? TransformationStrategies.prefixFreeIso() : utf32 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16(); BinIO.storeObject( new VLLcpMonotoneMinimalPerfectHashFunction<CharSequence>(collection, transformationStrategy), functionName); LOGGER.info("Completed."); }
From source file:de.tudarmstadt.ukp.dkpro.core.clearnlp.ClearNlpParser.java
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); parserProvider = new ModelProviderBase<AbstractDEPParser>(this, "clearnlp", "parser") { @Override//w w w . ja v a 2 s . c o m protected AbstractDEPParser produceResource(URL aUrl) throws IOException { InputStream is = null; BufferedInputStream bis = null; ObjectInputStream ois = null; GZIPInputStream gis = null; try { is = aUrl.openStream(); String language = getAggregatedProperties().getProperty(LANGUAGE); gis = new GZIPInputStream(is); bis = new BufferedInputStream(gis); ois = new ObjectInputStream(bis); AbstractDEPParser parser = NLPGetter.getDEPParser(ois, language); Properties metadata = getResourceMetaData(); SingletonTagset depTags = new SingletonTagset(Dependency.class, metadata.getProperty("dependency.tagset")); try { for (StringModel model : parser.getModels()) { for (String label : model.getLabels()) { String[] fields = label.split("_"); if (fields.length == 3) { depTags.add(fields[2]); } // else { // getContext().getLogger().log(WARNING, // "Unknown label format: [" + label + "]"); // } } } } catch (Exception e) { getContext().getLogger().log(WARNING, "Unable to find tagset information."); } addTagset(depTags); if (printTagSet) { getContext().getLogger().log(INFO, getTagset().toString()); } return parser; } catch (Exception e) { throw new IOException(e); } finally { closeQuietly(ois); closeQuietly(bis); closeQuietly(gis); closeQuietly(is); } } }; }
From source file:edu.umn.cs.spatialHadoop.visualization.FrequencyMap.java
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); int length = in.readInt(); byte[] serializedData = new byte[length]; in.readFully(serializedData);/* ww w . j a v a 2 s .com*/ ByteArrayInputStream bais = new ByteArrayInputStream(serializedData); GZIPInputStream gzis = new GZIPInputStream(bais); byte[] buffer = new byte[8]; gzis.read(buffer); ByteBuffer bbuffer = ByteBuffer.wrap(buffer); int width = bbuffer.getInt(); int height = bbuffer.getInt(); // Reallocate memory only if needed if (width != this.getWidth() || height != this.getHeight()) frequencies = new float[width][height]; buffer = new byte[getHeight() * 4]; for (int x = 0; x < getWidth(); x++) { int size = 0; while (size < buffer.length) { size += gzis.read(buffer, size, buffer.length - size); } bbuffer = ByteBuffer.wrap(buffer); for (int y = 0; y < getHeight(); y++) { frequencies[x][y] = bbuffer.getFloat(); } } }
From source file:it.unimi.dsi.sux4j.mph.ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.java
public static void main(final String[] arg) throws NoSuchMethodException, IOException, JSAPException { final SimpleJSAP jsap = new SimpleJSAP( ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.class.getName(), "Builds a monotone minimal perfect hash using a probabilistic z-fast trie as a distributor reading a newline-separated list of strings.", new Parameter[] { new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding."), new FlaggedOption("tempDir", FileStringParser.getParser(), JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'T', "temp-dir", "A directory for temporary files."), new Switch("huTucker", 'h', "hu-tucker", "Use Hu-Tucker coding to reduce string length."), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new Switch("utf32", JSAP.NO_SHORTFLAG, "utf-32", "Use UTF-32 internally (handles surrogate pairs)."), new FlaggedOption("signatureWidth", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "signature-width", "If specified, the signature width in bits; if negative, the generated function will be a dictionary."), new Switch("zipped", 'z', "zipped", "The string list is compressed in gzip format."), new FlaggedOption("log2bucket", JSAP.INTEGER_PARSER, "-1", JSAP.NOT_REQUIRED, 'b', "log2bucket", "The base 2 logarithm of the bucket size (mainly for testing)."), new UnflaggedOption("function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised monotone minimal perfect hash function."), new UnflaggedOption("stringFile", JSAP.STRING_PARSER, "-", JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The name of a file containing a newline-separated list of strings, or - for standard input; in the first case, strings will not be loaded into core memory."), }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;//ww w .ja va2 s. c om final String functionName = jsapResult.getString("function"); final String stringFile = jsapResult.getString("stringFile"); final int log2BucketSize = jsapResult.getInt("log2bucket"); final Charset encoding = (Charset) jsapResult.getObject("encoding"); final File tempDir = jsapResult.getFile("tempDir"); final boolean zipped = jsapResult.getBoolean("zipped"); final boolean iso = jsapResult.getBoolean("iso"); final boolean utf32 = jsapResult.getBoolean("utf32"); final boolean huTucker = jsapResult.getBoolean("huTucker"); final int signatureWidth = jsapResult.getInt("signatureWidth", 0); final Collection<MutableString> collection; if ("-".equals(stringFile)) { final ProgressLogger pl = new ProgressLogger(LOGGER); pl.displayLocalSpeed = true; pl.displayFreeMemory = true; pl.start("Loading strings..."); collection = new LineIterator( new FastBufferedReader( new InputStreamReader(zipped ? new GZIPInputStream(System.in) : System.in, encoding)), pl).allLines(); pl.done(); } else collection = new FileLinesCollection(stringFile, encoding.toString(), zipped); final TransformationStrategy<CharSequence> transformationStrategy = huTucker ? new HuTuckerTransformationStrategy(collection, true) : iso ? TransformationStrategies.prefixFreeIso() : utf32 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16(); BinIO.storeObject(new ZFastTrieDistributorMonotoneMinimalPerfectHashFunction<CharSequence>(collection, transformationStrategy, log2BucketSize, signatureWidth, tempDir), functionName); LOGGER.info("Completed."); }
From source file:co.cask.tigon.sql.internal.StreamBinaryGenerator.java
private void unzipFile(File libZip) throws IOException, ArchiveException { String path = libZip.toURI().getPath(); String outDir = libZip.getParentFile().getPath(); TarArchiveInputStream archiveInputStream = new TarArchiveInputStream( new GZIPInputStream(new FileInputStream(path))); try {//w w w.j av a2s . c o m TarArchiveEntry entry = archiveInputStream.getNextTarEntry(); while (entry != null) { File destFile = new File(outDir, entry.getName()); destFile.getParentFile().mkdirs(); if (!entry.isDirectory()) { ByteStreams.copy(archiveInputStream, Files.newOutputStreamSupplier(destFile)); //TODO: Set executable permission based on entry.getMode() destFile.setExecutable(true, false); } entry = archiveInputStream.getNextTarEntry(); } } finally { archiveInputStream.close(); } }
From source file:org.grameenfoundation.consulteca.utils.HttpHelpers.java
private static InputStream getInputStream(HttpResponse httpResponse) throws IllegalStateException, IOException { InputStream inputStream = httpResponse.getEntity().getContent(); Header contentEncoding = httpResponse.getFirstHeader("Content-Encoding"); if (contentEncoding != null && contentEncoding.getValue().equalsIgnoreCase("gzip")) { inputStream = new GZIPInputStream(inputStream); }// w w w. j a v a2s.co m return inputStream; }
From source file:com.t3.model.AssetLoader.java
protected List<String> decode(byte[] indexData) throws IOException { BufferedReader reader = new BufferedReader( new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(indexData)))); List<String> list = new ArrayList<String>(); String line = null;//from w w w . j a v a 2 s .c o m while ((line = reader.readLine()) != null) { list.add(line); } return list; }
From source file:com.googlecode.android_scripting.ZipExtractorTask.java
private long unzip() throws Exception { long extractedSize = 0l; Enumeration<? extends ZipEntry> entries; if (mInput.isFile() && mInput.getName().contains(".gz")) { InputStream stream = new FileInputStream(mInput); GZIPInputStream gzipStream = new GZIPInputStream(stream); InputSource is = new InputSource(gzipStream); InputStream input = new BufferedInputStream(is.getByteStream()); File destination = new File(mOutput, "php"); ByteArrayBuffer baf = new ByteArrayBuffer(255000); int current = 0; while ((current = input.read()) != -1) { baf.append((byte) current); }//from ww w .j av a 2 s . c o m FileOutputStream output = new FileOutputStream(destination); output.write(baf.toByteArray()); output.close(); Log.d("written!"); return baf.toByteArray().length; } ZipFile zip = new ZipFile(mInput); long uncompressedSize = getOriginalSize(zip); publishProgress(0, (int) uncompressedSize); entries = zip.entries(); try { while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); if (entry.isDirectory()) { // Not all zip files actually include separate directory entries. // We'll just ignore them // and create them as necessary for each actual entry. continue; } File destination = new File(mOutput, entry.getName()); if (!destination.getParentFile().exists()) { destination.getParentFile().mkdirs(); } if (destination.exists() && mContext != null && !mReplaceAll) { Replace answer = showDialog(entry.getName()); switch (answer) { case YES: break; case NO: continue; case YESTOALL: mReplaceAll = true; break; default: return extractedSize; } } ProgressReportingOutputStream outStream = new ProgressReportingOutputStream(destination); extractedSize += IoUtils.copy(zip.getInputStream(entry), outStream); outStream.close(); } } finally { try { zip.close(); } catch (Exception e) { // swallow this exception, we are only interested in the original one } } Log.v("Extraction is complete."); return extractedSize; }
From source file:com.slytechs.capture.StreamFactory.java
public <T extends InputCapture<? extends FilePacket>> T newInput(final Class<T> t, final File file, Filter<ProtocolFilterTarget> filter) throws IOException { final BufferedInputStream b = new BufferedInputStream(new FileInputStream(file)); b.mark(1024); // Buffer first 1K of stream so we can rewind /*/* w w w .ja v a 2 s .c o m*/ * Check the stream, without decompression first */ if (formatType(Channels.newChannel(b)) != null) { b.close(); /* * This is a plain uncompressed file, open up a FileChannel. It will be * much faster */ return newInput(t, new RandomAccessFile(file, "rw").getChannel(), filter); } /* * Try with gunziped stream, second */ b.reset(); // Rewind if (formatType(Channels.newChannel(new GZIPInputStream(b))) != null) { b.close(); /* * Now reopen the same file, but this time without the buffered * inputstream in the middle. Try to make things as efficient as possible. * TODO: implement much faster channel based GZIP decompression algorithm */ return newInput(t, Channels.newChannel(new GZIPInputStream(new FileInputStream(file))), filter); } throw new IllegalArgumentException( "File is not any compressed or decompressed known format [" + file.getName() + "]"); }