List of usage examples for java.io StringBufferInputStream close
public void close() throws IOException
From source file:com.knowgate.dfs.FileSystem.java
/** * Download an HTML page and all its referenced files into a ZIP * @param sBasePath String Base path for page and its referenced files * @param sFilePath String File path from sBasePath * @param oOutStrm OutputStream where ZIP is written * @param sDefaultEncoding Character encoding of file to be downloaded * @throws IOException// www . j a va 2 s.co m * @since 7.0 */ public void downloadhtmlpage(String sBasePath, String sFilePath, OutputStream oOutStrm, String sDefaultEncoding) throws IOException { if (DebugFile.trace) { DebugFile.writeln("Begin FileSystem.downloadhtmlpage(" + sBasePath + "," + sFilePath + ",[OutputStream]," + sDefaultEncoding + ")"); DebugFile.incIdent(); } String sEncoding = sDefaultEncoding; String sBaseHref = ""; boolean bAutoDetectEncoding = (sDefaultEncoding == null); TreeSet<String> oFiles = new TreeSet<String>(); TreeSet<String> oEntries = new TreeSet<String>(); Perl5Matcher oMatcher = new Perl5Matcher(); Perl5Matcher oReplacer = new Perl5Matcher(); Perl5Compiler oCompiler = new Perl5Compiler(); if (sDefaultEncoding == null) sDefaultEncoding = "ASCII"; try { String sHtml = readfilestr(sBasePath + sFilePath, sDefaultEncoding); if (null == sHtml) { if (DebugFile.trace) { DebugFile.writeln("Could not read file " + sBasePath + sFilePath); DebugFile.decIdent(); throw new IOException("Could not read file " + sBasePath + sFilePath); } } if (DebugFile.trace) { DebugFile.writeln( String.valueOf(sHtml.length()) + " characters readed from file " + sBasePath + sFilePath); } if (bAutoDetectEncoding) { if (oMatcher.contains(sHtml, oCompiler.compile( "<meta\\x20+http-equiv=(\"|')?Content-Type(\"|')?\\x20+content=(\"|')?text/html;\\x20+charset=(\\w|-){3,32}(\"|')?>", Perl5Compiler.CASE_INSENSITIVE_MASK))) { if (DebugFile.trace) DebugFile.writeln("<meta http-equiv> tag found"); String sHttpEquiv = oMatcher.getMatch().toString(); int iCharset = Gadgets.indexOfIgnoreCase(sHttpEquiv, "charset="); if (iCharset > 0) { int iQuoute = sHttpEquiv.indexOf('"', iCharset); if (iQuoute < 0) iQuoute = sHttpEquiv.indexOf((char) 39, iCharset); if (iQuoute < 0) { bAutoDetectEncoding = true; } else { sEncoding = sHttpEquiv.substring(iCharset + 8, iQuoute); if (DebugFile.trace) DebugFile.writeln("setting charset encoding to " + sEncoding); bAutoDetectEncoding = false; try { byte[] aTest = new String("Test").getBytes(sEncoding); } catch (UnsupportedEncodingException uex) { bAutoDetectEncoding = true; } } } else { bAutoDetectEncoding = true; } } else { bAutoDetectEncoding = true; } } if (bAutoDetectEncoding) { if (DebugFile.trace) DebugFile.writeln("Autodetecting encoding"); ByteArrayInputStream oHtmlStrm = new ByteArrayInputStream(sHtml.getBytes(sDefaultEncoding)); sEncoding = new CharacterSetDetector().detect(oHtmlStrm, sDefaultEncoding); oHtmlStrm.close(); if (DebugFile.trace) DebugFile.writeln("Encoding set to " + sEncoding); } Pattern oPattern = oCompiler.compile("<base(\\x20)+href=(\"|')?([^'\"\\r\\n]+)(\"|')?(\\x20)*/?>", Perl5Compiler.CASE_INSENSITIVE_MASK); if (oMatcher.contains(sHtml, oPattern)) { sBaseHref = Gadgets.chomp(oMatcher.getMatch().group(3), "/"); if (DebugFile.trace) DebugFile.writeln("<base href=" + sBaseHref + ">"); } PatternMatcherInput oMatchInput = new PatternMatcherInput(sHtml); oPattern = oCompiler.compile( "\\x20(src=|background=|background-image:url\\x28)(\"|')?([^'\"\\r\\n]+)(\"|')?(\\x20|\\x29|/|>)", Perl5Compiler.CASE_INSENSITIVE_MASK); StringSubstitution oSrcSubs = new StringSubstitution(); int nMatches = 0; while (oMatcher.contains(oMatchInput, oPattern)) { nMatches++; String sMatch = oMatcher.getMatch().toString(); String sAttr = oMatcher.getMatch().group(1); String sQuo = oMatcher.getMatch().group(2); if (sQuo == null) sQuo = ""; String sSrc = oMatcher.getMatch().group(3); if (DebugFile.trace) DebugFile.writeln("Source file found at " + sSrc); String sEnd = oMatcher.getMatch().group(5); if (!oFiles.contains(sSrc)) oFiles.add(sSrc); String sFilename = sSrc.substring(sSrc.replace('\\', '/').lastIndexOf('/') + 1); if (DebugFile.trace) DebugFile.writeln("StringSubstitution.setSubstitution(" + sMatch + " replace with " + sMatch.substring(0, sAttr.length() + 1) + sQuo + sFilename + sQuo + sEnd + ")"); oSrcSubs.setSubstitution(sMatch.substring(0, sAttr.length() + 1) + sQuo + sFilename + sQuo + sEnd); sHtml = Util.substitute(oReplacer, oCompiler.compile(sMatch), oSrcSubs, sHtml, Util.SUBSTITUTE_ALL); } //wend oMatchInput = new PatternMatcherInput(sHtml); oPattern = oCompiler.compile( "<link\\x20+(rel=(\"|')?stylesheet(\"|')?\\x20+)?(type=(\"|')?text/css(\"|')?\\x20+)?href=(\"|')?([^'\"\\r\\n]+)(\"|')?"); while (oMatcher.contains(oMatchInput, oPattern)) { nMatches++; String sMatch = oMatcher.getMatch().toString(); String sSrc = oMatcher.getMatch().group(8); String sFilename = sSrc.substring(sSrc.replace('\\', '/').lastIndexOf('/') + 1); if (!oFiles.contains(sSrc)) oFiles.add(sSrc); if (DebugFile.trace) DebugFile.writeln("StringSubstitution.setSubstitution(" + sMatch + " replace with " + Gadgets.replace(sMatch, sSrc, sFilename) + ")"); oSrcSubs.setSubstitution(Gadgets.replace(sMatch, sSrc, sFilename)); sHtml = Util.substitute(oReplacer, oCompiler.compile(sMatch), oSrcSubs, sHtml); } // wend if (DebugFile.trace) { DebugFile.writeln(String.valueOf(nMatches) + " matches found"); DebugFile.write("\n" + sHtml + "\n"); } ZipOutputStream oZOut = new ZipOutputStream(oOutStrm); String sLocalName = sFilePath.substring(sFilePath.replace('\\', '/').lastIndexOf('/') + 1); int iDot = sLocalName.lastIndexOf('.'); if (iDot > 0) sLocalName = Gadgets.ASCIIEncode(sLocalName.substring(0, iDot)).toLowerCase() + ".html"; else sLocalName = Gadgets.ASCIIEncode(sLocalName).toLowerCase(); oEntries.add(sLocalName); if (DebugFile.trace) DebugFile.writeln("Putting entry " + sLocalName + " into ZIP"); oZOut.putNextEntry(new ZipEntry(sLocalName)); StringBufferInputStream oHtml = new StringBufferInputStream(sHtml); new StreamPipe().between(oHtml, oZOut); oHtml.close(); oZOut.closeEntry(); for (String sName : oFiles) { String sZipEntryName = sName.substring(sName.replace('\\', '/').lastIndexOf('/') + 1); if (!oEntries.contains(sZipEntryName)) { oEntries.add(sZipEntryName); if (DebugFile.trace) DebugFile.writeln("Putting entry " + sZipEntryName + " into ZIP"); oZOut.putNextEntry(new ZipEntry(sZipEntryName)); if (sName.startsWith("http://") || sName.startsWith("https://") || sName.startsWith("file://") || sBaseHref.length() > 0) { try { new StreamPipe().between(new ByteArrayInputStream(readfilebin(sBaseHref + sName)), oZOut); } catch (IOException ioe) { if (DebugFile.trace) { DebugFile.decIdent(); DebugFile.writeln("Could not download file " + sName); } } } else { try { byte[] aFile = readfilebin( sBasePath + (sName.startsWith("/") ? sName.substring(1) : sName)); if (null != aFile) { if (aFile.length > 0) new StreamPipe().between(new ByteArrayInputStream(aFile), oZOut); } else { DebugFile.writeln("Could not find file " + sBasePath + (sName.startsWith("/") ? sName.substring(1) : sName)); } } catch (IOException ioe) { if (DebugFile.trace) { DebugFile.decIdent(); DebugFile.writeln("Could not download file " + sBasePath + (sName.startsWith("/") ? sName.substring(1) : sName)); } } } oZOut.closeEntry(); } // fi (sName!=sLocalName) } // next oZOut.close(); } catch (MalformedPatternException mpe) { } catch (FTPException ftpe) { } if (DebugFile.trace) { DebugFile.decIdent(); DebugFile.writeln("End FileSystem.downloadhtmlpage()"); } }