List of usage examples for java.io InputStream reset
public synchronized void reset() throws IOException
mark
method was last called on this input stream. From source file:com.ezdi.rtf.testRTFParser.RTFObjDataParser.java
private byte[] handleEmbeddedPOIFS(InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException { byte[] ret = null; try (NPOIFSFileSystem fs = new NPOIFSFileSystem(is)) { DirectoryNode root = fs.getRoot(); if (root == null) { return ret; }/*from w w w.j av a 2s .com*/ if (root.hasEntry("Package")) { Entry ooxml = root.getEntry("Package"); TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml)); ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(stream, out); ret = out.toByteArray(); } else { // try poifs POIFSDocumentType type = POIFSDocumentType.detectType(root); if (type == POIFSDocumentType.OLE10_NATIVE) { try { // Try to un-wrap the OLE10Native record: Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root); ret = ole.getDataBuffer(); } catch (Ole10NativeException ex) { // Not a valid OLE10Native record, skip it } } else if (type == POIFSDocumentType.COMP_OBJ) { DocumentEntry contentsEntry; try { contentsEntry = (DocumentEntry) root.getEntry("CONTENTS"); } catch (FileNotFoundException ioe) { contentsEntry = (DocumentEntry) root.getEntry("Contents"); } try (DocumentInputStream inp = new DocumentInputStream(contentsEntry)) { ret = new byte[contentsEntry.getSize()]; inp.readFully(ret); } } else { ByteArrayOutputStream out = new ByteArrayOutputStream(); is.reset(); IOUtils.copy(is, out); ret = out.toByteArray(); metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension()); metadata.set(Metadata.CONTENT_TYPE, type.getType().toString()); } } } return ret; }
From source file:org.apache.tika.parser.rtf.RTFObjDataParser.java
private byte[] handleEmbeddedPOIFS(InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException { byte[] ret = null; try (NPOIFSFileSystem fs = new NPOIFSFileSystem(is)) { DirectoryNode root = fs.getRoot(); if (root == null) { return ret; }//from ww w . ja v a 2s . co m if (root.hasEntry("Package")) { Entry ooxml = root.getEntry("Package"); TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml)); ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(stream, out); ret = out.toByteArray(); } else { //try poifs POIFSDocumentType type = POIFSDocumentType.detectType(root); if (type == POIFSDocumentType.OLE10_NATIVE) { try { // Try to un-wrap the OLE10Native record: Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root); ret = ole.getDataBuffer(); } catch (Ole10NativeException ex) { // Not a valid OLE10Native record, skip it } } else if (type == POIFSDocumentType.COMP_OBJ) { DocumentEntry contentsEntry; try { contentsEntry = (DocumentEntry) root.getEntry("CONTENTS"); } catch (FileNotFoundException ioe) { contentsEntry = (DocumentEntry) root.getEntry("Contents"); } try (DocumentInputStream inp = new DocumentInputStream(contentsEntry)) { ret = new byte[contentsEntry.getSize()]; inp.readFully(ret); } } else { ByteArrayOutputStream out = new ByteArrayOutputStream(); is.reset(); IOUtils.copy(is, out); ret = out.toByteArray(); metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension()); metadata.set(Metadata.CONTENT_TYPE, type.getType().toString()); } } } return ret; }
From source file:org.apache.tika.parser.pkg.PackageParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { //lazily load the MediaTypeRegistry at parse time //only want to call getDefaultConfig() once, and can't //load statically because of the ForkParser TikaConfig config = context.get(TikaConfig.class); MediaTypeRegistry mediaTypeRegistry = null; if (config != null) { mediaTypeRegistry = config.getMediaTypeRegistry(); } else {//from ww w .j a v a 2 s .co m if (bufferedMediaTypeRegistry == null) { //buffer this for next time. synchronized (lock) { //now that we're locked, check again if (bufferedMediaTypeRegistry == null) { bufferedMediaTypeRegistry = TikaConfig.getDefaultConfig().getMediaTypeRegistry(); } } } mediaTypeRegistry = bufferedMediaTypeRegistry; } // Ensure that the stream supports the mark feature if (!stream.markSupported()) { stream = new BufferedInputStream(stream); } TemporaryResources tmp = new TemporaryResources(); ArchiveInputStream ais = null; try { ArchiveStreamFactory factory = context.get(ArchiveStreamFactory.class, new ArchiveStreamFactory()); // At the end we want to close the archive stream to release // any associated resources, but the underlying document stream // should not be closed ais = factory.createArchiveInputStream(new CloseShieldInputStream(stream)); } catch (StreamingNotSupportedException sne) { // Most archive formats work on streams, but a few need files if (sne.getFormat().equals(ArchiveStreamFactory.SEVEN_Z)) { // Rework as a file, and wrap stream.reset(); TikaInputStream tstream = TikaInputStream.get(stream, tmp); // Seven Zip suports passwords, was one given? String password = null; PasswordProvider provider = context.get(PasswordProvider.class); if (provider != null) { password = provider.getPassword(metadata); } SevenZFile sevenz; if (password == null) { sevenz = new SevenZFile(tstream.getFile()); } else { sevenz = new SevenZFile(tstream.getFile(), password.getBytes("UnicodeLittleUnmarked")); } // Pending a fix for COMPRESS-269 / TIKA-1525, this bit is a little nasty ais = new SevenZWrapper(sevenz); } else { tmp.close(); throw new TikaException("Unknown non-streaming format " + sne.getFormat(), sne); } } catch (ArchiveException e) { tmp.close(); throw new TikaException("Unable to unpack document stream", e); } updateMediaType(ais, mediaTypeRegistry, metadata); // Use the delegate parser to parse the contained document EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try { ArchiveEntry entry = ais.getNextEntry(); while (entry != null) { if (!entry.isDirectory()) { parseEntry(ais, entry, extractor, metadata, xhtml); } entry = ais.getNextEntry(); } } catch (UnsupportedZipFeatureException zfe) { // If it's an encrypted document of unknown password, report as such if (zfe.getFeature() == Feature.ENCRYPTION) { throw new EncryptedDocumentException(zfe); } // Otherwise throw the exception throw new TikaException("UnsupportedZipFeature", zfe); } catch (PasswordRequiredException pre) { throw new EncryptedDocumentException(pre); } finally { ais.close(); tmp.close(); } xhtml.endDocument(); }
From source file:org.openspotlight.storage.test.AbstractStorageSessionTest.java
@Test public void shouldWorkWithInputStreamPropertiesOnExplicitFlush() throws Exception { final StorageSession session = explicitFlushInjector.getInstance(StorageSession.class); final StorageNode newNode = session.withPartition(ExamplePartition.DEFAULT).createNodeWithType("newNode1") .withSimpleKey("sequence", "1").withSimpleKey("name", "name").andCreate(); final InputStream stream = new ByteArrayInputStream("streamValue".getBytes()); newNode.setSimpleProperty(session, "streamProperty", stream); final StorageNode nullNode = session.withPartition(ExamplePartition.DEFAULT).createCriteria() .withNodeType("newNode1").withProperty("sequence").equalsTo("1").withProperty("name") .equalsTo("name").buildCriteria().andSearchUnique(session); assertThat(nullNode, is(nullValue())); session.flushTransient();//from ww w . j a v a 2 s .c o m final StorageNode loadedNode = session.withPartition(ExamplePartition.DEFAULT).createCriteria() .withNodeType("newNode1").withProperty("sequence").equalsTo("1").withProperty("name") .equalsTo("name").buildCriteria().andSearchUnique(session); stream.reset(); assertThat(IOUtils.contentEquals(newNode.getPropertyValueAsStream(session, "streamProperty"), stream), is(true)); final InputStream loaded1 = loadedNode.getPropertyValueAsStream(session, "streamProperty"); final ByteArrayOutputStream temporary1 = new ByteArrayOutputStream(); IOUtils.copy(loaded1, temporary1); final String asString1 = new String(temporary1.toByteArray()); final ByteArrayOutputStream temporary2 = new ByteArrayOutputStream(); final InputStream loaded2 = loadedNode.getPropertyValueAsStream(session, "streamProperty"); IOUtils.copy(loaded2, temporary2); final String asString2 = new String(temporary2.toByteArray()); assertThat(asString1, is("streamValue")); assertThat(asString2, is("streamValue")); }
From source file:org.exist.collections.Collection.java
/** Stores an XML document in the database. {@link #validateXMLResourceInternal(org.exist.storage.txn.Txn, * org.exist.storage.DBBroker, org.exist.xmldb.XmldbURI, CollectionConfiguration, org.exist.collections.Collection.ValidateBlock)} * should have been called previously in order to acquire a write lock for the document. Launches the finish trigger. * /*from w w w .j a v a2 s. co m*/ * @param transaction * @param broker * @param info * @param source * @param privileged * * @throws EXistException * @throws PermissionDeniedException * @throws TriggerException * @throws SAXException * @throws LockException */ public void store(final Txn transaction, final DBBroker broker, final IndexInfo info, final InputSource source, boolean privileged) throws EXistException, PermissionDeniedException, TriggerException, SAXException, LockException { storeXMLInternal(transaction, broker, info, privileged, new StoreBlock() { @Override public void run() throws EXistException, SAXException { try { final InputStream is = source.getByteStream(); if (is != null && is.markSupported()) { is.reset(); } else { final Reader cs = source.getCharacterStream(); if (cs != null && cs.markSupported()) { cs.reset(); } } } catch (final IOException e) { // mark is not supported: exception is expected, do nothing LOG.debug( "InputStream or CharacterStream underlying the InputSource does not support marking and therefore cannot be re-read."); } final XMLReader reader = getReader(broker, false, info.getCollectionConfig()); info.setReader(reader, null); try { reader.parse(source); } catch (final IOException e) { throw new EXistException(e); } finally { releaseReader(broker, info, reader); } } }); }
From source file:org.opentox.toxotis.client.http.PostHttpClient.java
private void postMultiPart() throws ServiceInvocationException { String charset = "UTF-8"; String LINE_FEED = "\r\n"; InputStream is; try {//from w w w.j av a 2 s .c o m getPostLock().lock(); // LOCK if (fileContentToPost != null) { is = new FileInputStream(fileContentToPost); } else { is = inputStream; } // creates a unique boundary based on time stamp long boundaryTs = System.currentTimeMillis(); String boundary = "===" + boundaryTs + "==="; HttpURLConnection httpConn = connect(getUri().toURI()); httpConn.setUseCaches(false); httpConn.setDoOutput(true); // indicates POST method httpConn.setDoInput(true); httpConn.setRequestProperty("Content-Type", "multipart/form-data;boundary=\"" + boundary + "\""); httpConn.setRequestProperty("User-Agent", "CodeJava Agent"); httpConn.setRequestProperty("Test", "Bonjour"); OutputStream outputStream = httpConn.getOutputStream(); PrintWriter writer = new PrintWriter(new OutputStreamWriter(outputStream, charset), true); final int nParams = postParameters.size(); if (nParams > 0) { for (Map.Entry<String, List<String>> e : postParameters.entrySet()) { List<String> values = e.getValue(); for (String value : values) { writer.append("--" + boundary).append(LINE_FEED); writer.append("Content-Disposition: form-data; name=\"" + e.getKey() + "\"") .append(LINE_FEED); writer.append("Content-Type: text/plain; charset=" + charset).append(LINE_FEED); writer.append(LINE_FEED); writer.append(value).append(LINE_FEED); writer.flush(); } } } if (is.read() > 0) { is.reset(); writer.append("--" + boundary).append(LINE_FEED); writer.append("Content-Disposition: form-data; name=\"" + fileUploadFieldName + "\"; filename=\"" + fileUploadFilename + "\"").append(LINE_FEED); writer.append( "Content-Type: " + java.net.URLConnection.guessContentTypeFromName(fileUploadFilename)) .append(LINE_FEED); writer.append("Content-Transfer-Encoding: binary").append(LINE_FEED); writer.append(LINE_FEED); writer.flush(); byte[] buffer = new byte[4096]; int bytesRead = -1; while ((bytesRead = is.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } outputStream.flush(); is.close(); writer.append(LINE_FEED); writer.flush(); writer.append(LINE_FEED).flush(); } writer.append("--" + boundary + "--").append(LINE_FEED); writer.close(); } catch (final IOException ex) { ConnectionException postException = new ConnectionException( "Exception caught while posting the parameters to the " + "remote web service located at '" + getUri() + "'", ex); postException.setActor(getUri() != null ? getUri().toString() : "N/A"); throw postException; } finally { getPostLock().unlock(); // UNLOCK } }
From source file:nl.minbzk.dwr.zoeken.enricher.processor.TikaProcessor.java
/** * Introduce encoding hints into the metadata if possible, and return the detected (or default) stream encoding through the following process: * /* w ww . java 2s. c o m*/ * In case of text/html or application/xhtml+xml: * * 1) Take the Content-Type as the mediaType + referenceEncoding, if it was given. Otherwise use the envelopeEncoding, if it was given. 2) Process it through the ROME-derived XmlReader, using DEFAULT_ENCODING as the fall-back default. * * In case of anything else: * * 1) Use the reference encoding if it was given. Otherwise use the envelopeEncoding, if it was given. 2) If no encoding hints were given, attempt to detect it using ICU4J's CharsetDetector and match it against a set of plausible encodings. 3) If no plausible encoding was found, return the DEFAULT_ENCODING as the fall-back default. * * Additionally, if a language was given as an envelope field (specified in FetchSettings) we also add it to the metadata. * * @param bufferedStream * @param metadata * @param envelopeEncoding * @param referenceEncoding * @param envelopeLanguage * @param mediaType * @param job * @return Charset * @throws IOException */ public static Charset introduceEncodingHints(final InputStream bufferedStream, final Metadata metadata, final String envelopeEncoding, final String referenceEncoding, final String envelopeLanguage, final MediaType mediaType, final EnricherJob job) throws IOException { Charset inputEncoding = Charset.forName(DEFAULT_ENCODING); boolean isLanguageDetectionEnabled = job.getLanguageDetectionParameter() != null; // The encoding is either the reference encoding, or the language / envelope encoding if null String[] optionalLanguageEncoding = envelopeLanguage != null ? retrieveLanguageEncoding(envelopeLanguage) : envelopeEncoding != null ? retrieveLanguageEncoding(envelopeEncoding) : new String[] {}; String preferenceEncoding = referenceEncoding != null ? referenceEncoding : optionalLanguageEncoding.length == 2 ? optionalLanguageEncoding[1] : null; // If language detection has been disabled, don't do anything; // If a language was derived from the language-encoding parameter (typically DRELANGUAGE) use it; // If a default language has been requested, prefer it; String language = null; if (isLanguageDetectionEnabled) { if (job.getLanguageDetectionDefault() != null) language = job.getLanguageDetectionDefault(); else if (optionalLanguageEncoding.length == 2) { if (job.getLanguageDetectionSupported() != null && !job.getLanguageDetectionSupported().contains(optionalLanguageEncoding[0])) logger.warn("The envelope or encoding-hint derived language (" + optionalLanguageEncoding[0] + ") does not fall within the list of supported languages for this job - resorting to META and n-gram detection"); else language = optionalLanguageEncoding[0]; } } if (mediaType != null && (mediaType.getSubtype().startsWith("html") || mediaType.getSubtype().startsWith("xhtml"))) { XmlHtmlReader reader = new XmlHtmlReader(bufferedStream, mediaType + (preferenceEncoding != null ? "; charset=" + preferenceEncoding : ""), true, DEFAULT_ENCODING); // Always reset the stream, as XmlHtmlReader has already consumed some bytes try { bufferedStream.reset(); } catch (IOException e) { // Do nothing } // At the cost of which, we can now derive the encoding inputEncoding = Charset.forName(reader.getEncoding()); // If the language encoding wasn't specified within the envelope, it might have been specified in the META tags if (isLanguageDetectionEnabled && job.getLanguageDetectionDefault() == null && language == null && reader.getLanguages() != null && reader.getLanguages().size() > 0) { if (job.getLanguageDetectionSupported() != null && !job.getLanguageDetectionSupported().contains(reader.getLanguages().get(0))) logger.warn("The detected META language (" + reader.getLanguages().get(0) + ") does not fall within the list of supported languages for this job - resorting to n-gram detection"); else language = reader.getLanguages().get(0); } } else { if (preferenceEncoding != null) inputEncoding = Charset.forName(preferenceEncoding); else { try { CharsetDetector detector = new CharsetDetector().setText(bufferedStream); boolean isPlausible = false; CharsetMatch[] matches = detector.detectAll(); if (language != null && PLAUSIBLE_ENCODING_MAPPING.containsKey(language)) for (CharsetMatch match : matches) { String encodingName = match.getName(); // Give preference to certain character sets based on a possibly given language if (PLAUSIBLE_ENCODING_MAPPING.get(language).contains(encodingName)) { inputEncoding = Charset.forName(encodingName); isPlausible = true; break; } } else if (job.getLanguageDetectionParameter() != null && job.getLanguageDetectionSupported() != null && job.getLanguageDetectionSupported().size() > 0) { // Detract all non-plausible encodings List<String> nonPlausibleEncodings = new ArrayList<String>(); for (Map.Entry<String, List<String>> entry : PLAUSIBLE_ENCODING_MAPPING.entrySet()) if (!job.getLanguageDetectionSupported().contains(entry.getKey())) nonPlausibleEncodings.addAll(entry.getValue()); nonPlausibleEncodings.remove("UTF-8"); nonPlausibleEncodings.remove("UTF-16"); // Then detect against those for (CharsetMatch match : matches) { String encodingName = match.getName(); if (!nonPlausibleEncodings.contains(encodingName)) { inputEncoding = Charset.forName(encodingName); isPlausible = true; break; } } } // Use the first encoding from the matches if no plausible one was found if (!isPlausible && matches.length > 0) inputEncoding = Charset.forName(matches[0].getName()); } catch (Exception e) { // Use the default encoding } bufferedStream.reset(); } } // Add the Content-Type to the metadata metadata.set(HttpHeaders.CONTENT_TYPE, mediaType + (inputEncoding != null ? "; charset=" + inputEncoding : "")); metadata.set(HttpHeaders.CONTENT_ENCODING, inputEncoding.toString()); // Add the Content-Language to the metadata if (language != null) try { LanguageMappingType languageMapping = LanguageMappingType.valueOf(language); metadata.set(HttpHeaders.CONTENT_LANGUAGE, languageMapping.language); } catch (IllegalArgumentException e) { metadata.set(HttpHeaders.CONTENT_LANGUAGE, language); } if (logger.isDebugEnabled()) logger.debug("Given import envelope or reference encoding is " + preferenceEncoding + " - stream is thought to contain " + inputEncoding + (language != null ? " - using original language hint " + language : "")); return inputEncoding; }
From source file:io.fabric8.tooling.archetype.commands.ArchetypeCreateAction.java
@Override protected Object doExecute() throws Exception { Archetype archetype = archetypeService.getArchetype(archetypeGAV); if (archetype != null) { System.out.println(String.format("Generating %s:%s in %s", archetype.groupId, archetype.artifactId, target.getCanonicalPath())); InputStream archetypeInputStream = fetchArchetype(archetype); if (archetypeInputStream == null) { System.err.println("No archetype found for \"" + archetypeGAV + "\" coordinates"); return null; }/* w w w . ja v a 2 s . co m*/ ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtils.copy(archetypeInputStream, baos); IOUtils.closeQuietly(archetypeInputStream); InputStream stream = new ByteArrayInputStream(baos.toByteArray()); String defaultGroupId = "io.fabric8"; String defaultArtifactId = archetype.artifactId + "-example"; String defaultVersion = "1.0-SNAPSHOT"; System.out.println("----- Configure archetype -----"); String groupId = ShellUtils.readLine(session, String.format("Define value for property 'groupId' (%s):", defaultGroupId), false); String artifactId = ShellUtils.readLine(session, String.format("Define value for property 'artifactId' (%s):", defaultArtifactId), false); String version = ShellUtils.readLine(session, String.format("Define value for property 'version' (%s):", defaultVersion), false); groupId = groupId == null || groupId.trim().equals("") ? defaultGroupId : groupId; artifactId = artifactId == null || artifactId.trim().equals("") ? defaultArtifactId : artifactId; version = version == null || version.trim().equals("") ? defaultVersion : version; String defaultPackageName = (groupId + "." + artifactId).replaceAll("-", "."); String packageName = ShellUtils.readLine(session, String.format("Define value for property 'package' (%s):", defaultPackageName), false); packageName = packageName == null || packageName.trim().equals("") ? defaultPackageName : packageName; ArchetypeHelper helper = new ArchetypeHelper(stream, target, groupId, artifactId, version); helper.setPackageName(packageName); Map<String, String> properties = helper.parseProperties(); // ask for replacement properties suggesting the defaults if (!properties.isEmpty()) { System.out.println("----- Configure additional properties -----"); for (String key : properties.keySet()) { String p = ShellUtils.readLine(session, String.format("Define value for property '%s' (%s):", key, properties.get(key)), false); p = p == null || p.trim().equals("") ? properties.get(key) : p; properties.put(key, p); } } helper.setOverrideProperties(properties); stream.reset(); helper.execute(); } else { System.err.println("No archetype found for \"" + archetypeGAV + "\" coordinates"); } return null; }
From source file:Main.java
/** * detectEncoding.java - Returns the character encoding of an input stream containin an XML file.<br/> * Copyright (c) 2009 Alexander Hristov . * * Licensed under the LGPL License - http://www.gnu.org/licenses/lgpl.txt * Returns the character encoding of an input stream containin an XML file.<br/> * //from w ww .j a v a 2s . c o m * The encoding is detected using the guidelines specified in the * <a href='http://www.w3.org/TR/xml/#sec-guessing'>XML W3C Specification</a>, * and the method was designed to be as fast as possible, without extensive * string operations or regular expressions<br/> <br/> * * <code> * A sample use would be<br/><br/> * InputStream in = ...; <br/> * String encoding = detectEncoding(in);<br/> * BufferedReader reader = new BufferedReader(new InputStreamReader(in,encoding)); <br/> * </code><br/> * * and from that point you can happily read text from the input stream. * * @param in * Stream containing the data to be read. The stream must support * mark()/reset(), otherwise the caller should wrap that stream in a * {@link BufferedInputStream} before invokin the method. After the * call, the stream is positioned at the < character (this means * that if there were any byte-order-marks, they are skipped). * * @return Detected encoding, using the canonical name in java.io (see <a * href= * 'http://java.sun.com/j2se/1.4.2/docs/guide/intl/encoding.doc.html'>Supported * Encodings</a> ). * * @author Alexander Hristov */ public static String detectEncoding(InputStream in) throws IOException { String encoding = null; in.mark(400); int ignoreBytes = 0; boolean readEncoding = false; byte[] buffer = new byte[400]; int read = in.read(buffer, 0, 4); switch (buffer[0]) { case (byte) 0x00: if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0xFE && buffer[3] == (byte) 0xFF) { ignoreBytes = 4; encoding = "UTF_32BE"; } else if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x3C) { encoding = "UTF_32BE"; readEncoding = true; } else if (buffer[1] == (byte) 0x3C && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x3F) { encoding = "UnicodeBigUnmarked"; readEncoding = true; } break; case (byte) 0xFF: if (buffer[1] == (byte) 0xFE && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x00) { ignoreBytes = 4; encoding = "UTF_32LE"; } else if (buffer[1] == (byte) 0xFE) { ignoreBytes = 2; encoding = "UnicodeLittleUnmarked"; } break; case (byte) 0x3C: readEncoding = true; if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x00) { encoding = "UTF_32LE"; } else if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0x3F && buffer[3] == (byte) 0x00) { encoding = "UnicodeLittleUnmarked"; } else if (buffer[1] == (byte) 0x3F && buffer[2] == (byte) 0x78 && buffer[3] == (byte) 0x6D) { encoding = "ASCII"; } break; case (byte) 0xFE: if (buffer[1] == (byte) 0xFF) { encoding = "UnicodeBigUnmarked"; ignoreBytes = 2; } break; case (byte) 0xEF: if (buffer[1] == (byte) 0xBB && buffer[2] == (byte) 0xBF) { encoding = "UTF8"; ignoreBytes = 3; } break; case (byte) 0x4C: if (buffer[1] == (byte) 0x6F && buffer[2] == (byte) 0xA7 && buffer[3] == (byte) 0x94) { encoding = "CP037"; } break; } if (encoding == null) { encoding = System.getProperty("file.encoding"); } if (readEncoding) { read = in.read(buffer, 4, buffer.length - 4); Charset cs = Charset.forName(encoding); String s = new String(buffer, 4, read, cs); int pos = s.indexOf("encoding"); if (pos == -1) { encoding = System.getProperty("file.encoding"); } else { int limit = s.indexOf("?>"); char delim; int start = s.indexOf(delim = '\'', pos); if (start == -1 || start >= limit) start = s.indexOf(delim = '"', pos); if (start == -1 || start >= limit) throw (new IOException("Encoding error " + buffer)); int end = s.indexOf(delim, start + 1); if (end == -1 || end >= limit) throw (new IOException("Encoding error " + buffer)); encoding = s.substring(start + 1, end); } } in.reset(); while (ignoreBytes-- > 0) in.read(); return encoding; }