List of usage examples for java.io BufferedInputStream markSupported
public boolean markSupported()
mark
and reset
methods. From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReaderSpi.java
@Override public boolean canDecodeInput(BufferedInputStream stream) throws IOException { if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from w w w . j av a 2 s. c o m*/ dbgLog.fine("applying the dta test\n"); byte[] b = new byte[DTA_HEADER_SIZE]; if (stream.markSupported()) { stream.mark(0); } int nbytes = stream.read(b, 0, DTA_HEADER_SIZE); if (nbytes == 0) { throw new IOException(); } //printHexDump(b, "hex dump of the byte-array"); if (stream.markSupported()) { stream.reset(); } boolean DEBUG = false; dbgLog.info("hex dump: 1st 4bytes =>" + new String(Hex.encodeHex(b)) + "<-"); if (b[2] != 1) { dbgLog.fine("3rd byte is not 1: given file is not stata-dta type"); return false; } else if ((b[1] != 1) && (b[1] != 2)) { dbgLog.fine("2nd byte is neither 0 nor 1: this file is not stata-dta type"); return false; } else if (!DTAFileReaderSpi.stataReleaseNumber.containsKey(b[0])) { dbgLog.fine("1st byte (" + b[0] + ") is not within the ingestable range [rel. 3-10]:" + "this file is NOT stata-dta type"); return false; } else { dbgLog.fine("this file is stata-dta type: " + DTAFileReaderSpi.stataReleaseNumber.get(b[0]) + "(No in HEX=" + b[0] + ")"); return true; } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReaderSpi.java
@Override public boolean canDecodeInput(Object source) throws IOException { if (!(source instanceof BufferedInputStream)) { return false; }//w w w. ja v a 2s . c o m if (source == null) { throw new IllegalArgumentException("source == null!"); } BufferedInputStream stream = (BufferedInputStream) source; dbgLog.fine("applying the por test\n"); byte[] b = new byte[POR_HEADER_SIZE]; if (stream.markSupported()) { stream.mark(0); } int nbytes = stream.read(b, 0, POR_HEADER_SIZE); //printHexDump(b, "hex dump of the byte-array"); if (nbytes == 0) { throw new IOException(); } else if (nbytes < 491) { // size test dbgLog.fine("this file is NOT spss-por type"); return false; } if (stream.markSupported()) { stream.reset(); } boolean DEBUG = false; //windows [0D0A]=> [1310] = [CR/LF] //unix [0A] => [10] //mac [0D] => [13] // 3char [0D0D0A]=> [131310] spss for windows rel 15 // expected results // unix case: [0A] : [80], [161], [242], [323], [404], [485] // windows case: [0D0A] : [81], [163], [245], [327], [409], [491] // : [0D0D0A] : [82], [165], [248], [331], [414], [495] // convert b into a ByteBuffer ByteBuffer buff = ByteBuffer.wrap(b); byte[] nlch = new byte[36]; int pos1; int pos2; int pos3; int ucase = 0; int wcase = 0; int mcase = 0; int three = 0; int nolines = 6; int nocols = 80; for (int i = 0; i < nolines; ++i) { int baseBias = nocols * (i + 1); // 1-char case pos1 = baseBias + i; buff.position(pos1); dbgLog.finer("\tposition(1)=" + buff.position()); int j = 6 * i; nlch[j] = buff.get(); if (nlch[j] == 10) { ucase++; } else if (nlch[j] == 13) { mcase++; } // 2-char case pos2 = baseBias + 2 * i; buff.position(pos2); dbgLog.finer("\tposition(2)=" + buff.position()); nlch[j + 1] = buff.get(); nlch[j + 2] = buff.get(); // 3-char case pos3 = baseBias + 3 * i; buff.position(pos3); dbgLog.finer("\tposition(3)=" + buff.position()); nlch[j + 3] = buff.get(); nlch[j + 4] = buff.get(); nlch[j + 5] = buff.get(); dbgLog.finer(i + "-th iteration position =" + nlch[j] + "\t" + nlch[j + 1] + "\t" + nlch[j + 2]); dbgLog.finer(i + "-th iteration position =" + nlch[j + 3] + "\t" + nlch[j + 4] + "\t" + nlch[j + 5]); if ((nlch[j + 3] == 13) && (nlch[j + 4] == 13) && (nlch[j + 5] == 10)) { three++; } else if ((nlch[j + 1] == 13) && (nlch[j + 2] == 10)) { wcase++; } buff.rewind(); } if (three == nolines) { dbgLog.fine("0D0D0A case"); windowsNewLine = false; } else if ((ucase == nolines) && (wcase < nolines)) { dbgLog.fine("0A case"); windowsNewLine = false; } else if ((ucase < nolines) && (wcase == nolines)) { dbgLog.fine("0D0A case"); } else if ((mcase == nolines) && (wcase < nolines)) { dbgLog.fine("0D case"); windowsNewLine = false; } buff.rewind(); int PORmarkPosition = POR_MARK_POSITION_DEFAULT; if (windowsNewLine) { PORmarkPosition = PORmarkPosition + 5; } else if (three == nolines) { PORmarkPosition = PORmarkPosition + 10; } byte[] pormark = new byte[8]; buff.position(PORmarkPosition); buff.get(pormark, 0, 8); String pormarks = new String(pormark); dbgLog.fine( "pormark[hex: 53 50 53 53 50 4F 52 54 == SPSSPORT] =>" + new String(Hex.encodeHex(pormark)) + "<-"); if (pormarks.equals(POR_MARK)) { dbgLog.fine("this file is spss-por type"); return true; } else { dbgLog.fine("this file is NOT spss-por type"); } return false; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReaderSpi.java
@Override public boolean canDecodeInput(BufferedInputStream stream) throws IOException { if (stream == null) { throw new IllegalArgumentException("file == null!"); }//from ww w . j a va 2 s.c o m dbgLog.fine("applying the por test\n"); byte[] b = new byte[POR_HEADER_SIZE]; if (stream.markSupported()) { stream.mark(0); } int nbytes = stream.read(b, 0, POR_HEADER_SIZE); //printHexDump(b, "hex dump of the byte-array"); if (nbytes == 0) { throw new IOException(); } else if (nbytes < 491) { // size test dbgLog.fine("this file is NOT spss-por type"); return false; } if (stream.markSupported()) { stream.reset(); } boolean DEBUG = false; //windows [0D0A]=> [1310] = [CR/LF] //unix [0A] => [10] //mac [0D] => [13] // 3char [0D0D0A]=> [131310] spss for windows rel 15 // expected results // unix case: [0A] : [80], [161], [242], [323], [404], [485] // windows case: [0D0A] : [81], [163], [245], [327], [409], [491] // : [0D0D0A] : [82], [165], [248], [331], [414], [495] // convert b into a ByteBuffer ByteBuffer buff = ByteBuffer.wrap(b); byte[] nlch = new byte[36]; int pos1; int pos2; int pos3; int ucase = 0; int wcase = 0; int mcase = 0; int three = 0; int nolines = 6; int nocols = 80; for (int i = 0; i < nolines; ++i) { int baseBias = nocols * (i + 1); // 1-char case pos1 = baseBias + i; buff.position(pos1); dbgLog.finer("\tposition(1)=" + buff.position()); int j = 6 * i; nlch[j] = buff.get(); if (nlch[j] == 10) { ucase++; } else if (nlch[j] == 13) { mcase++; } // 2-char case pos2 = baseBias + 2 * i; buff.position(pos2); dbgLog.finer("\tposition(2)=" + buff.position()); nlch[j + 1] = buff.get(); nlch[j + 2] = buff.get(); // 3-char case pos3 = baseBias + 3 * i; buff.position(pos3); dbgLog.finer("\tposition(3)=" + buff.position()); nlch[j + 3] = buff.get(); nlch[j + 4] = buff.get(); nlch[j + 5] = buff.get(); dbgLog.finer(i + "-th iteration position =" + nlch[j] + "\t" + nlch[j + 1] + "\t" + nlch[j + 2]); dbgLog.finer(i + "-th iteration position =" + nlch[j + 3] + "\t" + nlch[j + 4] + "\t" + nlch[j + 5]); if ((nlch[j + 3] == 13) && (nlch[j + 4] == 13) && (nlch[j + 5] == 10)) { three++; } else if ((nlch[j + 1] == 13) && (nlch[j + 2] == 10)) { wcase++; } buff.rewind(); } if (three == nolines) { dbgLog.fine("0D0D0A case"); windowsNewLine = false; } else if ((ucase == nolines) && (wcase < nolines)) { dbgLog.fine("0A case"); windowsNewLine = false; } else if ((ucase < nolines) && (wcase == nolines)) { dbgLog.fine("0D0A case"); } else if ((mcase == nolines) && (wcase < nolines)) { dbgLog.fine("0D case"); windowsNewLine = false; } buff.rewind(); int PORmarkPosition = POR_MARK_POSITION_DEFAULT; if (windowsNewLine) { PORmarkPosition = PORmarkPosition + 5; } else if (three == nolines) { PORmarkPosition = PORmarkPosition + 10; } byte[] pormark = new byte[8]; buff.position(PORmarkPosition); buff.get(pormark, 0, 8); String pormarks = new String(pormark); //dbgLog.fine("pormark =>" + pormarks + "<-"); dbgLog.fine( "pormark[hex: 53 50 53 53 50 4F 52 54 == SPSSPORT] =>" + new String(Hex.encodeHex(pormark)) + "<-"); if (pormarks.equals(POR_MARK)) { dbgLog.fine("this file is spss-por type"); return true; } else { dbgLog.fine("this file is NOT spss-por type"); } return false; }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.por.PORFileReader.java
private File decodeHeader(BufferedInputStream stream) throws IOException { File tempPORfile = null;//from w w w .jav a 2s . c o m if (stream == null) { throw new IllegalArgumentException("file == null!"); } byte[] headerByes = new byte[POR_HEADER_SIZE]; if (stream.markSupported()) { stream.mark(1000); } int nbytes = stream.read(headerByes, 0, POR_HEADER_SIZE); //printHexDump(headerByes, "hex dump of the byte-array"); if (nbytes == 0) { throw new IOException("decodeHeader: reading failure"); } else if (nbytes < 491) { // Size test: by defnition, it must have at least // 491-byte header, i.e., the file size less than this threshold // is not a POR file dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("file is not spss-por type"); } // rewind the current reading position back to the beginning if (stream.markSupported()) { stream.reset(); } // line-terminating characters are usually one or two by defnition // however, a POR file saved by a genuine SPSS for Windows // had a three-character line terminator, i.e., failed to remove the // original file's one-character terminator when it was opened, and // saved it with the default two-character terminator without // removing original terminators. So we have to expect such a rare // case // // terminator // windows [0D0A]=> [1310] = [CR/LF] // unix [0A] => [10] // mac [0D] => [13] // 3char [0D0D0A]=> [131310] spss for windows rel 15 // // terminating characters should be found at the following // column positions[counting from 0]: // unix case: [0A] : [80], [161], [242], [323], [404], [485] // windows case: [0D0A] : [81], [163], [245], [327], [409], [491] // : [0D0D0A] : [82], [165], [248], [331], [414], [495] // convert b into a ByteBuffer ByteBuffer buff = ByteBuffer.wrap(headerByes); byte[] nlch = new byte[36]; int pos1; int pos2; int pos3; int ucase = 0; int wcase = 0; int mcase = 0; int three = 0; int nolines = 6; int nocols = 80; for (int i = 0; i < nolines; ++i) { int baseBias = nocols * (i + 1); // 1-char case pos1 = baseBias + i; buff.position(pos1); dbgLog.finer("\tposition(1)=" + buff.position()); int j = 6 * i; nlch[j] = buff.get(); if (nlch[j] == 10) { ucase++; } else if (nlch[j] == 13) { mcase++; } // 2-char case pos2 = baseBias + 2 * i; buff.position(pos2); dbgLog.finer("\tposition(2)=" + buff.position()); nlch[j + 1] = buff.get(); nlch[j + 2] = buff.get(); // 3-char case pos3 = baseBias + 3 * i; buff.position(pos3); dbgLog.finer("\tposition(3)=" + buff.position()); nlch[j + 3] = buff.get(); nlch[j + 4] = buff.get(); nlch[j + 5] = buff.get(); dbgLog.finer(i + "-th iteration position =" + nlch[j] + "\t" + nlch[j + 1] + "\t" + nlch[j + 2]); dbgLog.finer(i + "-th iteration position =" + nlch[j + 3] + "\t" + nlch[j + 4] + "\t" + nlch[j + 5]); if ((nlch[j + 3] == 13) && (nlch[j + 4] == 13) && (nlch[j + 5] == 10)) { three++; } else if ((nlch[j + 1] == 13) && (nlch[j + 2] == 10)) { wcase++; } buff.rewind(); } boolean windowsNewLine = true; if (three == nolines) { windowsNewLine = false; // lineTerminator = "0D0D0A" } else if ((ucase == nolines) && (wcase < nolines)) { windowsNewLine = false; // lineTerminator = "0A" } else if ((ucase < nolines) && (wcase == nolines)) { windowsNewLine = true; //lineTerminator = "0D0A" } else if ((mcase == nolines) && (wcase < nolines)) { windowsNewLine = false; //lineTerminator = "0D" } buff.rewind(); int PORmarkPosition = POR_MARK_POSITION_DEFAULT; if (windowsNewLine) { PORmarkPosition = PORmarkPosition + 5; } else if (three == nolines) { PORmarkPosition = PORmarkPosition + 10; } byte[] pormark = new byte[8]; buff.position(PORmarkPosition); buff.get(pormark, 0, 8); String pormarks = new String(pormark); //dbgLog.fine("pormark =>" + pormarks + "<-"); dbgLog.fine( "pormark[hex: 53 50 53 53 50 4F 52 54 == SPSSPORT] =>" + new String(Hex.encodeHex(pormark)) + "<-"); if (pormarks.equals(POR_MARK)) { dbgLog.fine("POR ID toke test: Passed"); init(); smd.getFileInformation().put("mimeType", MIME_TYPE); smd.getFileInformation().put("fileFormat", MIME_TYPE); } else { dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("decodeHeader: POR ID token was not found"); } // save the POR file without new line characters FileOutputStream fileOutPOR = null; Writer fileWriter = null; // Scanner class can handle three-character line-terminator Scanner porScanner = null; try { tempPORfile = File.createTempFile("tempPORfile.", ".por"); fileOutPOR = new FileOutputStream(tempPORfile); fileWriter = new BufferedWriter(new OutputStreamWriter(fileOutPOR, "utf8")); porScanner = new Scanner(stream); // Because 64-bit and 32-bit machines decode POR's first 40-byte // sequence differently, the first 5 leader lines are skipped from // the new-line-stripped file int lineCounter = 0; while (porScanner.hasNextLine()) { lineCounter++; if (lineCounter <= 5) { String line = porScanner.nextLine().toString(); dbgLog.fine("line=" + lineCounter + ":" + line.length() + ":" + line); } else { fileWriter.write(porScanner.nextLine().toString()); } } } finally { try { if (fileWriter != null) { fileWriter.close(); } } catch (IOException ex) { ex.printStackTrace(); } if (porScanner != null) { porScanner.close(); } } return tempPORfile; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReader.java
private File decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeHeader(): start"); File tempPORfile = null;//from ww w . j a v a 2 s . c o m if (stream == null) { throw new IllegalArgumentException("file == null!"); } byte[] headerByes = new byte[POR_HEADER_SIZE]; if (stream.markSupported()) { stream.mark(1000); } int nbytes = stream.read(headerByes, 0, POR_HEADER_SIZE); //printHexDump(headerByes, "hex dump of the byte-array"); if (nbytes == 0) { throw new IOException("decodeHeader: reading failure"); } else if (nbytes < 491) { // Size test: by defnition, it must have at least // 491-byte header, i.e., the file size less than this threshold // is not a POR file dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("file is not spss-por type"); } // rewind the current reading position back to the beginning if (stream.markSupported()) { stream.reset(); } // line-terminating characters are usually one or two by defnition // however, a POR file saved by a genuine SPSS for Windows // had a three-character line terminator, i.e., failed to remove the // original file's one-character terminator when it was opened, and // saved it with the default two-character terminator without // removing original terminators. So we have to expect such a rare // case // // terminator // windows [0D0A]=> [1310] = [CR/LF] // unix [0A] => [10] // mac [0D] => [13] // 3char [0D0D0A]=> [131310] spss for windows rel 15 // // terminating characters should be found at the following // column positions[counting from 0]: // unix case: [0A] : [80], [161], [242], [323], [404], [485] // windows case: [0D0A] : [81], [163], [245], [327], [409], [491] // : [0D0D0A] : [82], [165], [248], [331], [414], [495] // convert b into a ByteBuffer ByteBuffer buff = ByteBuffer.wrap(headerByes); byte[] nlch = new byte[36]; int pos1; int pos2; int pos3; int ucase = 0; int wcase = 0; int mcase = 0; int three = 0; int nolines = 6; int nocols = 80; for (int i = 0; i < nolines; ++i) { int baseBias = nocols * (i + 1); // 1-char case pos1 = baseBias + i; buff.position(pos1); dbgLog.finer("\tposition(1)=" + buff.position()); int j = 6 * i; nlch[j] = buff.get(); if (nlch[j] == 10) { ucase++; } else if (nlch[j] == 13) { mcase++; } // 2-char case pos2 = baseBias + 2 * i; buff.position(pos2); dbgLog.finer("\tposition(2)=" + buff.position()); nlch[j + 1] = buff.get(); nlch[j + 2] = buff.get(); // 3-char case pos3 = baseBias + 3 * i; buff.position(pos3); dbgLog.finer("\tposition(3)=" + buff.position()); nlch[j + 3] = buff.get(); nlch[j + 4] = buff.get(); nlch[j + 5] = buff.get(); dbgLog.finer(i + "-th iteration position =" + nlch[j] + "\t" + nlch[j + 1] + "\t" + nlch[j + 2]); dbgLog.finer(i + "-th iteration position =" + nlch[j + 3] + "\t" + nlch[j + 4] + "\t" + nlch[j + 5]); if ((nlch[j + 3] == 13) && (nlch[j + 4] == 13) && (nlch[j + 5] == 10)) { three++; } else if ((nlch[j + 1] == 13) && (nlch[j + 2] == 10)) { wcase++; } buff.rewind(); } boolean windowsNewLine = true; if (three == nolines) { windowsNewLine = false; // lineTerminator = "0D0D0A" } else if ((ucase == nolines) && (wcase < nolines)) { windowsNewLine = false; // lineTerminator = "0A" } else if ((ucase < nolines) && (wcase == nolines)) { windowsNewLine = true; //lineTerminator = "0D0A" } else if ((mcase == nolines) && (wcase < nolines)) { windowsNewLine = false; //lineTerminator = "0D" } buff.rewind(); int PORmarkPosition = POR_MARK_POSITION_DEFAULT; if (windowsNewLine) { PORmarkPosition = PORmarkPosition + 5; } else if (three == nolines) { PORmarkPosition = PORmarkPosition + 10; } byte[] pormark = new byte[8]; buff.position(PORmarkPosition); buff.get(pormark, 0, 8); String pormarks = new String(pormark); //dbgLog.fine("pormark =>" + pormarks + "<-"); dbgLog.fine( "pormark[hex: 53 50 53 53 50 4F 52 54 == SPSSPORT] =>" + new String(Hex.encodeHex(pormark)) + "<-"); if (pormarks.equals(POR_MARK)) { dbgLog.fine("POR ID toke test: Passed"); init(); dataTable.setOriginalFileFormat(MIME_TYPE); dataTable.setUnf("UNF:6:NOTCALCULATED"); } else { dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("decodeHeader: POR ID token was not found"); } // save the POR file without new line characters FileOutputStream fileOutPOR = null; Writer fileWriter = null; // Scanner class can handle three-character line-terminator Scanner porScanner = null; try { tempPORfile = File.createTempFile("tempPORfile.", ".por"); fileOutPOR = new FileOutputStream(tempPORfile); fileWriter = new BufferedWriter(new OutputStreamWriter(fileOutPOR, "utf8")); porScanner = new Scanner(stream); // Because 64-bit and 32-bit machines decode POR's first 40-byte // sequence differently, the first 5 leader lines are skipped from // the new-line-stripped file int lineCounter = 0; while (porScanner.hasNextLine()) { lineCounter++; if (lineCounter <= 5) { String line = porScanner.nextLine(); dbgLog.fine("line=" + lineCounter + ":" + line.length() + ":" + line); } else { fileWriter.write(porScanner.nextLine()); } } } finally { try { if (fileWriter != null) { fileWriter.close(); } } catch (IOException ex) { ex.printStackTrace(); } if (porScanner != null) { porScanner.close(); } } return tempPORfile; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeHeader(): start"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//w w w .j a v a 2 s . c om // the length of the magic number is 4 (1-byte character * 4) // its value is expected to be $FL2 byte[] b = new byte[SAV_MAGIC_NUMBER_LENGTH]; try { if (stream.markSupported()) { stream.mark(100); } int nbytes = stream.read(b, 0, SAV_MAGIC_NUMBER_LENGTH); if (nbytes == 0) { throw new IOException(); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } //printHexDump(b, "hex dump of the byte-array"); String hdr4sav = new String(b); dbgLog.fine("from string=" + hdr4sav); if (hdr4sav.equals(SAV_FILE_SIGNATURE)) { dbgLog.fine("this file is spss-sav type"); // initialize version-specific parameter init(); dataTable.setOriginalFileFormat(MIME_TYPE[0]); dataTable.setUnf("UNF:6:"); } else { dbgLog.fine("this file is NOT spss-sav type"); throw new IllegalArgumentException("given file is not spss-sav type"); } dbgLog.fine("***** decodeHeader(): end *****"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeHeader(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }// w w w . j a v a 2 s . c o m // the length of the magic number is 4 (1-byte character * 4) // its value is expected to be $FL2 byte[] b = new byte[SAV_MAGIC_NUMBER_LENGTH]; try { if (stream.markSupported()) { stream.mark(100); } int nbytes = stream.read(b, 0, SAV_MAGIC_NUMBER_LENGTH); if (nbytes == 0) { throw new IOException(); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } //printHexDump(b, "hex dump of the byte-array"); String hdr4sav = new String(b); dbgLog.fine("from string=" + hdr4sav); if (hdr4sav.equals(SAV_FILE_SIGNATURE)) { dbgLog.fine("this file is spss-sav type"); // initialize version-specific parameter init(); smd.getFileInformation().put("mimeType", MIME_TYPE[0]); smd.getFileInformation().put("fileFormat", MIME_TYPE[0]); } else { dbgLog.fine("this file is NOT spss-sav type"); throw new IllegalArgumentException("given file is not spss-sav type"); } smd.getFileInformation().put("charset", defaultCharSet); dbgLog.fine("smd dump:" + smd.toString()); dbgLog.fine("***** decodeHeader(): end *****"); }
From source file:org.apache.flex.compiler.filespecs.CombinedFile.java
/** * Get the BOM tag of a stream./*from ww w . j a va 2s .c o m*/ * * @param strm BufferedInputStream to be checked. * @return {@link BOM} type. * @throws IOException Error. */ public static BOM getBOM(BufferedInputStream strm) throws IOException { assert (strm.markSupported()) : "getBOM call on stream which does not support mark"; // Peek the first 4 bytes. final byte[] peek = new byte[4]; strm.mark(4); strm.read(peek); strm.reset(); // Try matching 4-byte BOM tags. final byte[] quadruplet = Arrays.copyOf(peek, 4); if (Arrays.equals(BOM.UTF_32_BE.pattern, quadruplet)) return BOM.UTF_32_BE; else if (Arrays.equals(BOM.UTF_32_LE.pattern, quadruplet)) return BOM.UTF_32_LE; // Try matching 3-byte BOM tags. final byte[] triplet = Arrays.copyOf(peek, 3); if (Arrays.equals(BOM.UTF_8.pattern, triplet)) return BOM.UTF_8; // Try matching 2-byte BOM tags. final byte[] twin = Arrays.copyOf(peek, 2); if (Arrays.equals(BOM.UTF_16_BE.pattern, twin)) return BOM.UTF_16_BE; else if (Arrays.equals(BOM.UTF_16_LE.pattern, twin)) return BOM.UTF_16_LE; // No BOM tag. return BOM.NONE; }
From source file:org.opf_labs.fmts.fidget.TikaIdentifier.java
static final IdentificationResult fromStream(final MimeTypes mimeRepo, final InputStream stream, URI loc) { // Get a buffered input stream that supports marks BufferedInputStream mrkStr = new BufferedInputStream(stream); assert (mrkStr.markSupported()); // put the mark at the begining, and should be comfortable for Tika id // length/* w w w. j av a2 s. co m*/ mrkStr.mark(mimeRepo.getMinLength() * 2); // identify and time long start = new Date().getTime(); MediaType mime = identify(mimeRepo, stream, loc); long duration = new Date().getTime() - start; // Now reset the stream and hash IdentificationResult result; try { mrkStr.reset(); result = new IdentificationResult(hash64K(stream), loc, mime, duration); } catch (IOException excep) { // OK couldn't read or hash stream, record error and what we have result = new IdentificationResult("", IdentificationResult.ERROR_LOC, mime, duration); } return result; }