Java tutorial
package com.fasterxml.jackson.dataformat.smile; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.lang.ref.SoftReference; import java.math.BigDecimal; import java.math.BigInteger; import java.util.Arrays; import com.fasterxml.jackson.core.*; import com.fasterxml.jackson.core.base.ParserBase; import com.fasterxml.jackson.core.io.IOContext; import com.fasterxml.jackson.core.sym.BytesToNameCanonicalizer; import com.fasterxml.jackson.core.sym.Name; import static com.fasterxml.jackson.dataformat.smile.SmileConstants.BYTE_MARKER_END_OF_STRING; public class SmileParser extends ParserBase { /** * Enumeration that defines all togglable features for Smile generators. */ public enum Feature { /** * Feature that determines whether 4-byte Smile header is mandatory in input, * or optional. If enabled, it means that only input that starts with the header * is accepted as valid; if disabled, header is optional. In latter case,r * settings for content are assumed to be defaults. */ REQUIRE_HEADER(true); final boolean _defaultState; final int _mask; /** * Method that calculates bit set (flags) of all features that * are enabled by default. */ public static int collectDefaults() { int flags = 0; for (Feature f : values()) { if (f.enabledByDefault()) { flags |= f.getMask(); } } return flags; } private Feature(boolean defaultState) { _defaultState = defaultState; _mask = (1 << ordinal()); } public boolean enabledByDefault() { return _defaultState; } public int getMask() { return _mask; } } private final static int[] NO_INTS = new int[0]; private final static String[] NO_STRINGS = new String[0]; /* /********************************************************** /* Configuration /********************************************************** */ /** * Codec used for data binding when (if) requested. */ protected ObjectCodec _objectCodec; /** * Flag that indicates whether content can legally have raw (unquoted) * binary data. Since this information is included both in header and * in actual binary data blocks there is redundancy, and we want to * ensure settings are compliant. Using application may also want to * know this setting in case it does some direct (random) access. */ protected boolean _mayContainRawBinary; /** * Helper object used for low-level recycling of Smile-generator * specific buffers. */ final protected SmileBufferRecycler<String> _smileBufferRecycler; /* /********************************************************** /* Input source config, state (from ex StreamBasedParserBase) /********************************************************** */ /** * Input stream that can be used for reading more content, if one * in use. May be null, if input comes just as a full buffer, * or if the stream has been closed. */ protected InputStream _inputStream; /** * Current buffer from which data is read; generally data is read into * buffer from input source, but in some cases pre-loaded buffer * is handed to the parser. */ protected byte[] _inputBuffer; /** * Flag that indicates whether the input buffer is recycable (and * needs to be returned to recycler once we are done) or not. *<p> * If it is not, it also means that parser can NOT modify underlying * buffer. */ protected boolean _bufferRecyclable; /* /********************************************************** /* Additional parsing state /********************************************************** */ /** * Flag that indicates that the current token has not yet * been fully processed, and needs to be finished for * some access (or skipped to obtain the next token) */ protected boolean _tokenIncomplete = false; /** * Type byte of the current token */ protected int _typeByte; /** * Specific flag that is set when we encountered a 32-bit * floating point value; needed since numeric super classes do * not track distinction between float and double, but Smile * format does, and we want to retain that separation. */ protected boolean _got32BitFloat; /* /********************************************************** /* Symbol handling, decoding /********************************************************** */ /** * Symbol table that contains field names encountered so far */ final protected BytesToNameCanonicalizer _symbols; /** * Temporary buffer used for name parsing. */ protected int[] _quadBuffer = NO_INTS; /** * Quads used for hash calculation */ protected int _quad1, _quad2; /** * Array of recently seen field names, which may be back referenced * by later fields. * Defaults set to enable handling even if no header found. */ protected String[] _seenNames = NO_STRINGS; protected int _seenNameCount = 0; /** * Array of recently seen field names, which may be back referenced * by later fields * Defaults set to disable handling if no header found. */ protected String[] _seenStringValues = null; protected int _seenStringValueCount = -1; /* /********************************************************** /* Thread-local recycling /********************************************************** */ /** * <code>ThreadLocal</code> contains a {@link java.lang.ref.SoftReference} * to a buffer recycler used to provide a low-cost * buffer recycling for Smile-specific buffers. */ final protected static ThreadLocal<SoftReference<SmileBufferRecycler<String>>> _smileRecyclerRef = new ThreadLocal<SoftReference<SmileBufferRecycler<String>>>(); /* /********************************************************** /* Life-cycle /********************************************************** */ public SmileParser(IOContext ctxt, int parserFeatures, int smileFeatures, ObjectCodec codec, BytesToNameCanonicalizer sym, InputStream in, byte[] inputBuffer, int start, int end, boolean bufferRecyclable) { super(ctxt, parserFeatures); _objectCodec = codec; _symbols = sym; _inputStream = in; _inputBuffer = inputBuffer; _inputPtr = start; _inputEnd = end; _bufferRecyclable = bufferRecyclable; _tokenInputRow = -1; _tokenInputCol = -1; _smileBufferRecycler = _smileBufferRecycler(); } @Override public ObjectCodec getCodec() { return _objectCodec; } @Override public void setCodec(ObjectCodec c) { _objectCodec = c; } /** * Helper method called when it looks like input might contain the signature; * and it is necessary to detect and handle signature to get configuration * information it might have. * * @return True if valid signature was found and handled; false if not */ protected boolean handleSignature(boolean consumeFirstByte, boolean throwException) throws IOException, JsonParseException { if (consumeFirstByte) { ++_inputPtr; } if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } if (_inputBuffer[_inputPtr] != SmileConstants.HEADER_BYTE_2) { if (throwException) { _reportError("Malformed content: signature not valid, starts with 0x3a but followed by 0x" + Integer.toHexString(_inputBuffer[_inputPtr]) + ", not 0x29"); } return false; } if (++_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } if (_inputBuffer[_inputPtr] != SmileConstants.HEADER_BYTE_3) { if (throwException) { _reportError("Malformed content: signature not valid, starts with 0x3a, 0x29, but followed by 0x" + Integer.toHexString(_inputBuffer[_inputPtr]) + ", not 0xA"); } return false; } // Good enough; just need version info from 4th byte... if (++_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int ch = _inputBuffer[_inputPtr++]; int versionBits = (ch >> 4) & 0x0F; // but failure with version number is fatal, can not ignore if (versionBits != SmileConstants.HEADER_VERSION_0) { _reportError("Header version number bits (0x" + Integer.toHexString(versionBits) + ") indicate unrecognized version; only 0x0 handled by parser"); } // can avoid tracking names, if explicitly disabled if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_NAMES) == 0) { _seenNames = null; _seenNameCount = -1; } // conversely, shared string values must be explicitly enabled if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_STRING_VALUES) != 0) { _seenStringValues = NO_STRINGS; _seenStringValueCount = 0; } _mayContainRawBinary = ((ch & SmileConstants.HEADER_BIT_HAS_RAW_BINARY) != 0); return true; } protected final static SmileBufferRecycler<String> _smileBufferRecycler() { SoftReference<SmileBufferRecycler<String>> ref = _smileRecyclerRef.get(); SmileBufferRecycler<String> br = (ref == null) ? null : ref.get(); if (br == null) { br = new SmileBufferRecycler<String>(); _smileRecyclerRef.set(new SoftReference<SmileBufferRecycler<String>>(br)); } return br; } /* /********************************************************** /* Versioned /********************************************************** */ @Override public Version version() { return PackageVersion.VERSION; } /* /********************************************************** /* Former StreamBasedParserBase methods /********************************************************** */ @Override public int releaseBuffered(OutputStream out) throws IOException { int count = _inputEnd - _inputPtr; if (count < 1) { return 0; } // let's just advance ptr to end int origPtr = _inputPtr; out.write(_inputBuffer, origPtr, count); return count; } @Override public Object getInputSource() { return _inputStream; } /** * Overridden since we do not really have character-based locations, * but we do have byte offset to specify. */ @Override public JsonLocation getTokenLocation() { // token location is correctly managed... return new JsonLocation(_ioContext.getSourceReference(), _tokenInputTotal, // bytes -1, -1, (int) _tokenInputTotal); // char offset, line, column } /** * Overridden since we do not really have character-based locations, * but we do have byte offset to specify. */ @Override public JsonLocation getCurrentLocation() { final long offset = _currInputProcessed + _inputPtr; return new JsonLocation(_ioContext.getSourceReference(), offset, // bytes -1, -1, (int) offset); // char offset, line, column } /* /********************************************************** /* Low-level reading, other /********************************************************** */ @Override protected final boolean loadMore() throws IOException { _currInputProcessed += _inputEnd; //_currInputRowStart -= _inputEnd; if (_inputStream != null) { int count = _inputStream.read(_inputBuffer, 0, _inputBuffer.length); if (count > 0) { _inputPtr = 0; _inputEnd = count; return true; } // End of input _closeInput(); // Should never return 0, so let's fail if (count == 0) { throw new IOException("InputStream.read() returned 0 characters when trying to read " + _inputBuffer.length + " bytes"); } } return false; } /** * Helper method that will try to load at least specified number bytes in * input buffer, possible moving existing data around if necessary * * @since 1.6 */ protected final boolean _loadToHaveAtLeast(int minAvailable) throws IOException { // No input stream, no leading (either we are closed, or have non-stream input source) if (_inputStream == null) { return false; } // Need to move remaining data in front? int amount = _inputEnd - _inputPtr; if (amount > 0 && _inputPtr > 0) { _currInputProcessed += _inputPtr; //_currInputRowStart -= _inputPtr; System.arraycopy(_inputBuffer, _inputPtr, _inputBuffer, 0, amount); _inputEnd = amount; } else { _inputEnd = 0; } _inputPtr = 0; while (_inputEnd < minAvailable) { int count = _inputStream.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd); if (count < 1) { // End of input _closeInput(); // Should never return 0, so let's fail if (count == 0) { throw new IOException( "InputStream.read() returned 0 characters when trying to read " + amount + " bytes"); } return false; } _inputEnd += count; } return true; } @Override protected void _closeInput() throws IOException { /* 25-Nov-2008, tatus: As per [JACKSON-16] we are not to call close() * on the underlying InputStream, unless we "own" it, or auto-closing * feature is enabled. */ if (_inputStream != null) { if (_ioContext.isResourceManaged() || isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE)) { _inputStream.close(); } _inputStream = null; } } /* /********************************************************** /* Overridden methods /********************************************************** */ @Override protected void _finishString() throws IOException, JsonParseException { // should never be called; but must be defined for superclass _throwInternal(); } @Override public void close() throws IOException { super.close(); // Merge found symbols, if any: _symbols.release(); } @Override public boolean hasTextCharacters() { if (_currToken == JsonToken.VALUE_STRING) { // yes; is or can be made available efficiently as char[] return _textBuffer.hasTextAsCharacters(); } if (_currToken == JsonToken.FIELD_NAME) { // not necessarily; possible but: return _nameCopied; } // other types, no benefit from accessing as char[] return false; } /** * Method called to release internal buffers owned by the base * reader. This may be called along with {@link #_closeInput} (for * example, when explicitly closing this reader instance), or * separately (if need be). */ @Override protected void _releaseBuffers() throws IOException { super._releaseBuffers(); if (_bufferRecyclable) { byte[] buf = _inputBuffer; if (buf != null) { _inputBuffer = null; _ioContext.releaseReadIOBuffer(buf); } } { String[] nameBuf = _seenNames; if (nameBuf != null && nameBuf.length > 0) { _seenNames = null; /* 28-Jun-2011, tatu: With 1.9, caller needs to clear the buffer; * but we only need to clear up to count as it is not a hash area */ if (_seenNameCount > 0) { Arrays.fill(nameBuf, 0, _seenNameCount, null); } _smileBufferRecycler.releaseSeenNamesBuffer(nameBuf); } } { String[] valueBuf = _seenStringValues; if (valueBuf != null && valueBuf.length > 0) { _seenStringValues = null; /* 28-Jun-2011, tatu: With 1.9, caller needs to clear the buffer; * but we only need to clear up to count as it is not a hash area */ if (_seenStringValueCount > 0) { Arrays.fill(valueBuf, 0, _seenStringValueCount, null); } _smileBufferRecycler.releaseSeenStringValuesBuffer(valueBuf); } } } /* /********************************************************** /* Extended API /********************************************************** */ public boolean mayContainRawBinary() { return _mayContainRawBinary; } /* /********************************************************** /* JsonParser impl /********************************************************** */ @Override public JsonToken nextToken() throws IOException, JsonParseException { _numTypesValid = NR_UNKNOWN; // For longer tokens (text, binary), we'll only read when requested if (_tokenIncomplete) { _skipIncomplete(); } _tokenInputTotal = _currInputProcessed + _inputPtr; // also: clear any data retained so far _binaryValue = null; // Two main modes: values, and field names. if (_parsingContext.inObject() && _currToken != JsonToken.FIELD_NAME) { return (_currToken = _handleFieldName()); } if (_inputPtr >= _inputEnd) { if (!loadMore()) { _handleEOF(); /* NOTE: here we can and should close input, release buffers, * since this is "hard" EOF, not a boundary imposed by * header token. */ close(); return (_currToken = null); } } int ch = _inputBuffer[_inputPtr++]; _typeByte = ch; switch ((ch >> 5) & 0x7) { case 0: // short shared string value reference if (ch == 0) { // important: this is invalid, don't accept _reportError("Invalid token byte 0x00"); } return _handleSharedString(ch - 1); case 1: // simple literals, numbers { int typeBits = ch & 0x1F; if (typeBits < 4) { switch (typeBits) { case 0x00: _textBuffer.resetWithEmpty(); return (_currToken = JsonToken.VALUE_STRING); case 0x01: return (_currToken = JsonToken.VALUE_NULL); case 0x02: // false return (_currToken = JsonToken.VALUE_FALSE); default: // 0x03 == true return (_currToken = JsonToken.VALUE_TRUE); } } // next 3 bytes define subtype if (typeBits < 8) { // VInt (zigzag), BigInteger if ((typeBits & 0x3) <= 0x2) { // 0x3 reserved (should never occur) _tokenIncomplete = true; _numTypesValid = 0; return (_currToken = JsonToken.VALUE_NUMBER_INT); } break; } if (typeBits < 12) { // floating-point int subtype = typeBits & 0x3; if (subtype <= 0x2) { // 0x3 reserved (should never occur) _tokenIncomplete = true; _numTypesValid = 0; _got32BitFloat = (subtype == 0); return (_currToken = JsonToken.VALUE_NUMBER_FLOAT); } break; } if (typeBits == 0x1A) { // == 0x3A == ':' -> possibly header signature for next chunk? if (handleSignature(false, false)) { /* Ok, now; end-marker and header both imply doc boundary and a * 'null token'; but if both are seen, they are collapsed. * We can check this by looking at current token; if it's null, * need to get non-null token */ if (_currToken == null) { return nextToken(); } return (_currToken = null); } } _reportError("Unrecognized token byte 0x3A (malformed segment header?"); } // and everything else is reserved, for now break; case 2: // tiny ASCII // fall through case 3: // short ASCII // fall through case 4: // tiny Unicode // fall through case 5: // short Unicode // No need to decode, unless we have to keep track of back-references (for shared string values) _currToken = JsonToken.VALUE_STRING; if (_seenStringValueCount >= 0) { // shared text values enabled _addSeenStringValue(); } else { _tokenIncomplete = true; } return _currToken; case 6: // small integers; zigzag encoded _numberInt = SmileUtil.zigzagDecode(ch & 0x1F); _numTypesValid = NR_INT; return (_currToken = JsonToken.VALUE_NUMBER_INT); case 7: // binary/long-text/long-shared/start-end-markers switch (ch & 0x1F) { case 0x00: // long variable length ASCII case 0x04: // long variable length unicode _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_STRING); case 0x08: // binary, 7-bit (0xE8) _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_EMBEDDED_OBJECT); case 0x0C: // long shared string (0xEC) case 0x0D: case 0x0E: case 0x0F: if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } return _handleSharedString(((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF)); case 0x18: // START_ARRAY _parsingContext = _parsingContext.createChildArrayContext(-1, -1); return (_currToken = JsonToken.START_ARRAY); case 0x19: // END_ARRAY if (!_parsingContext.inArray()) { _reportMismatchedEndMarker(']', '}'); } _parsingContext = _parsingContext.getParent(); return (_currToken = JsonToken.END_ARRAY); case 0x1A: // START_OBJECT _parsingContext = _parsingContext.createChildObjectContext(-1, -1); return (_currToken = JsonToken.START_OBJECT); case 0x1B: // not used in this mode; would be END_OBJECT _reportError("Invalid type marker byte 0xFB in value mode (would be END_OBJECT in key mode)"); case 0x1D: // binary, raw _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_EMBEDDED_OBJECT); case 0x1F: // 0xFF, end of content return (_currToken = null); } break; } // If we get this far, type byte is corrupt _reportError("Invalid type marker byte 0x" + Integer.toHexString(ch & 0xFF) + " for expected value token"); return null; } private final JsonToken _handleSharedString(int index) throws IOException, JsonParseException { if (index >= _seenStringValueCount) { _reportInvalidSharedStringValue(index); } _textBuffer.resetWithString(_seenStringValues[index]); return (_currToken = JsonToken.VALUE_STRING); } private final void _addSeenStringValue() throws IOException, JsonParseException { _finishToken(); if (_seenStringValueCount < _seenStringValues.length) { // !!! TODO: actually only store char[], first time around? _seenStringValues[_seenStringValueCount++] = _textBuffer.contentsAsString(); return; } _expandSeenStringValues(); } private final void _expandSeenStringValues() { String[] oldShared = _seenStringValues; int len = oldShared.length; String[] newShared; if (len == 0) { newShared = _smileBufferRecycler.allocSeenStringValuesBuffer(); if (newShared == null) { newShared = new String[SmileBufferRecycler.DEFAULT_STRING_VALUE_BUFFER_LENGTH]; } } else if (len == SmileConstants.MAX_SHARED_STRING_VALUES) { // too many? Just flush... newShared = oldShared; _seenStringValueCount = 0; // could also clear, but let's not yet bother } else { int newSize = (len == SmileBufferRecycler.DEFAULT_NAME_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_STRING_VALUES; newShared = new String[newSize]; System.arraycopy(oldShared, 0, newShared, 0, oldShared.length); } _seenStringValues = newShared; _seenStringValues[_seenStringValueCount++] = _textBuffer.contentsAsString(); } // base impl is fine: //public String getCurrentName() throws IOException, JsonParseException @Override public NumberType getNumberType() throws IOException, JsonParseException { if (_got32BitFloat) { return NumberType.FLOAT; } return super.getNumberType(); } /* /********************************************************** /* Public API, traversal, nextXxxValue/nextFieldName /********************************************************** */ @Override public boolean nextFieldName(SerializableString str) throws IOException, JsonParseException { // Two parsing modes; can only succeed if expecting field name, so handle that first: if (_parsingContext.inObject() && _currToken != JsonToken.FIELD_NAME) { byte[] nameBytes = str.asQuotedUTF8(); final int byteLen = nameBytes.length; // need room for type byte, name bytes, possibly end marker, so: if ((_inputPtr + byteLen + 1) < _inputEnd) { // maybe... int ptr = _inputPtr; int ch = _inputBuffer[ptr++]; _typeByte = ch; main_switch: switch ((ch >> 6) & 3) { case 0: // misc, including end marker switch (ch) { case 0x20: // empty String as name, legal if unusual _currToken = JsonToken.FIELD_NAME; _inputPtr = ptr; _parsingContext.setCurrentName(""); return (byteLen == 0); case 0x30: // long shared case 0x31: case 0x32: case 0x33: { int index = ((ch & 0x3) << 8) + (_inputBuffer[ptr++] & 0xFF); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } String name = _seenNames[index]; _parsingContext.setCurrentName(name); _inputPtr = ptr; _currToken = JsonToken.FIELD_NAME; return (name.equals(str.getValue())); } //case 0x34: // long ASCII/Unicode name; let's not even try... } break; case 1: // short shared, can fully process { int index = (ch & 0x3F); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } _parsingContext.setCurrentName(_seenNames[index]); String name = _seenNames[index]; _parsingContext.setCurrentName(name); _inputPtr = ptr; _currToken = JsonToken.FIELD_NAME; return (name.equals(str.getValue())); } case 2: // short ASCII { int len = 1 + (ch & 0x3f); if (len == byteLen) { int i = 0; for (; i < len; ++i) { if (nameBytes[i] != _inputBuffer[ptr + i]) { break main_switch; } } // yes, does match... _inputPtr = ptr + len; final String name = str.getValue(); if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return true; } } break; case 3: // short Unicode // all valid, except for 0xFF { int len = (ch & 0x3F); if (len > 0x37) { if (len == 0x3B) { _currToken = JsonToken.END_OBJECT; if (!_parsingContext.inObject()) { _reportMismatchedEndMarker('}', ']'); } _inputPtr = ptr; _parsingContext = _parsingContext.getParent(); return false; } // error, but let's not worry about that here break; } len += 2; // values from 2 to 57... if (len == byteLen) { int i = 0; for (; i < len; ++i) { if (nameBytes[i] != _inputBuffer[ptr + i]) { break main_switch; } } // yes, does match... _inputPtr = ptr + len; final String name = str.getValue(); if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return true; } } break; } } // otherwise fall back to default processing: JsonToken t = _handleFieldName(); _currToken = t; return (t == JsonToken.FIELD_NAME) && str.getValue().equals(_parsingContext.getCurrentName()); } // otherwise just fall back to default handling; should occur rarely return (nextToken() == JsonToken.FIELD_NAME) && str.getValue().equals(getCurrentName()); } @Override public String nextTextValue() throws IOException, JsonParseException { // can't get text value if expecting name, so if (!_parsingContext.inObject() || _currToken == JsonToken.FIELD_NAME) { if (_tokenIncomplete) { _skipIncomplete(); } int ptr = _inputPtr; if (ptr >= _inputEnd) { if (!loadMore()) { _handleEOF(); close(); _currToken = null; return null; } ptr = _inputPtr; } int ch = _inputBuffer[ptr++]; _tokenInputTotal = _currInputProcessed + _inputPtr; // also: clear any data retained so far _binaryValue = null; _typeByte = ch; switch ((ch >> 5) & 0x7) { case 0: // short shared string value reference if (ch == 0) { // important: this is invalid, don't accept _reportError("Invalid token byte 0x00"); } // _handleSharedString... { --ch; if (ch >= _seenStringValueCount) { _reportInvalidSharedStringValue(ch); } _inputPtr = ptr; String text = _seenStringValues[ch]; _textBuffer.resetWithString(text); _currToken = JsonToken.VALUE_STRING; return text; } case 1: // simple literals, numbers { int typeBits = ch & 0x1F; if (typeBits == 0x00) { _inputPtr = ptr; _textBuffer.resetWithEmpty(); _currToken = JsonToken.VALUE_STRING; return ""; } } break; case 2: // tiny ASCII // fall through case 3: // short ASCII _currToken = JsonToken.VALUE_STRING; _inputPtr = ptr; _decodeShortAsciiValue(1 + (ch & 0x3F)); { // No need to decode, unless we have to keep track of back-references (for shared string values) String text; if (_seenStringValueCount >= 0) { // shared text values enabled if (_seenStringValueCount < _seenStringValues.length) { text = _textBuffer.contentsAsString(); _seenStringValues[_seenStringValueCount++] = text; } else { _expandSeenStringValues(); text = _textBuffer.contentsAsString(); } } else { text = _textBuffer.contentsAsString(); } return text; } case 4: // tiny Unicode // fall through case 5: // short Unicode _currToken = JsonToken.VALUE_STRING; _inputPtr = ptr; _decodeShortUnicodeValue(2 + (ch & 0x3F)); { // No need to decode, unless we have to keep track of back-references (for shared string values) String text; if (_seenStringValueCount >= 0) { // shared text values enabled if (_seenStringValueCount < _seenStringValues.length) { text = _textBuffer.contentsAsString(); _seenStringValues[_seenStringValueCount++] = text; } else { _expandSeenStringValues(); text = _textBuffer.contentsAsString(); } } else { text = _textBuffer.contentsAsString(); } return text; } case 6: // small integers; zigzag encoded break; case 7: // binary/long-text/long-shared/start-end-markers // TODO: support longer strings too? /* switch (ch & 0x1F) { case 0x00: // long variable length ASCII case 0x04: // long variable length unicode _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_STRING); case 0x08: // binary, 7-bit break main; case 0x0C: // long shared string case 0x0D: case 0x0E: case 0x0F: if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } return _handleSharedString(((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF)); } break; */ break; } } // otherwise fall back to generic handling: return (nextToken() == JsonToken.VALUE_STRING) ? getText() : null; } @Override public int nextIntValue(int defaultValue) throws IOException, JsonParseException { if (nextToken() == JsonToken.VALUE_NUMBER_INT) { return getIntValue(); } return defaultValue; } @Override public long nextLongValue(long defaultValue) throws IOException, JsonParseException { if (nextToken() == JsonToken.VALUE_NUMBER_INT) { return getLongValue(); } return defaultValue; } @Override public Boolean nextBooleanValue() throws IOException, JsonParseException { switch (nextToken()) { case VALUE_TRUE: return Boolean.TRUE; case VALUE_FALSE: return Boolean.FALSE; default: return null; } } /* /********************************************************** /* Public API, access to token information, text /********************************************************** */ /** * Method for accessing textual representation of the current event; * if no current event (before first call to {@link #nextToken}, or * after encountering end-of-input), returns null. * Method can be called for any event. */ @Override public String getText() throws IOException, JsonParseException { if (_tokenIncomplete) { _tokenIncomplete = false; // Let's inline part of "_finishToken", common case int tb = _typeByte; int type = (tb >> 5) & 0x7; if (type == 2 || type == 3) { // tiny & short ASCII _decodeShortAsciiValue(1 + (tb & 0x3F)); return _textBuffer.contentsAsString(); } if (type == 4 || type == 5) { // tiny & short Unicode // short unicode; note, lengths 2 - 65 (off-by-one compared to ASCII) _decodeShortUnicodeValue(2 + (tb & 0x3F)); return _textBuffer.contentsAsString(); } _finishToken(); } if (_currToken == JsonToken.VALUE_STRING) { return _textBuffer.contentsAsString(); } JsonToken t = _currToken; if (t == null) { // null only before/after document return null; } if (t == JsonToken.FIELD_NAME) { return _parsingContext.getCurrentName(); } if (t.isNumeric()) { // TODO: optimize? return getNumberValue().toString(); } return _currToken.asString(); } @Override public char[] getTextCharacters() throws IOException, JsonParseException { if (_currToken != null) { // null only before/after document if (_tokenIncomplete) { _finishToken(); } switch (_currToken) { case VALUE_STRING: return _textBuffer.getTextBuffer(); case FIELD_NAME: if (!_nameCopied) { String name = _parsingContext.getCurrentName(); int nameLen = name.length(); if (_nameCopyBuffer == null) { _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen); } else if (_nameCopyBuffer.length < nameLen) { _nameCopyBuffer = new char[nameLen]; } name.getChars(0, nameLen, _nameCopyBuffer, 0); _nameCopied = true; } return _nameCopyBuffer; // fall through case VALUE_NUMBER_INT: case VALUE_NUMBER_FLOAT: // TODO: optimize return getNumberValue().toString().toCharArray(); default: return _currToken.asCharArray(); } } return null; } @Override public int getTextLength() throws IOException, JsonParseException { if (_currToken != null) { // null only before/after document if (_tokenIncomplete) { _finishToken(); } switch (_currToken) { case VALUE_STRING: return _textBuffer.size(); case FIELD_NAME: return _parsingContext.getCurrentName().length(); // fall through case VALUE_NUMBER_INT: case VALUE_NUMBER_FLOAT: // TODO: optimize return getNumberValue().toString().length(); default: return _currToken.asCharArray().length; } } return 0; } @Override public int getTextOffset() throws IOException, JsonParseException { return 0; } @Override public String getValueAsString() throws IOException, JsonParseException { if (_currToken != JsonToken.VALUE_STRING) { if (_currToken == null || _currToken == JsonToken.VALUE_NULL || !_currToken.isScalarValue()) { return null; } } return getText(); } @Override public String getValueAsString(String defaultValue) throws IOException, JsonParseException { if (_currToken != JsonToken.VALUE_STRING) { if (_currToken == null || _currToken == JsonToken.VALUE_NULL || !_currToken.isScalarValue()) { return defaultValue; } } return getText(); } /* /********************************************************** /* Public API, access to token information, binary /********************************************************** */ @Override public byte[] getBinaryValue(Base64Variant b64variant) throws IOException, JsonParseException { if (_tokenIncomplete) { _finishToken(); } if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT) { // Todo, maybe: support base64 for text? _reportError("Current token (" + _currToken + ") not VALUE_EMBEDDED_OBJECT, can not access as binary"); } return _binaryValue; } @Override public Object getEmbeddedObject() throws IOException, JsonParseException { if (_tokenIncomplete) { _finishToken(); } if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT) { return _binaryValue; } return null; } @Override public int readBinaryValue(Base64Variant b64variant, OutputStream out) throws IOException, JsonParseException { if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT) { // Todo, maybe: support base64 for text? _reportError("Current token (" + _currToken + ") not VALUE_EMBEDDED_OBJECT, can not access as binary"); } // Ok, first, unlikely (but legal?) case where someone already requested binary data: if (!_tokenIncomplete) { if (_binaryValue == null) { // most likely already read... return 0; } final int len = _binaryValue.length; out.write(_binaryValue, 0, len); return len; } // otherwise, handle, mark as complete // first, raw inlined binary data (simple) if (_typeByte == SmileConstants.TOKEN_MISC_BINARY_RAW) { final int totalCount = _readUnsignedVInt(); int left = totalCount; while (left > 0) { int avail = _inputEnd - _inputPtr; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); avail = _inputEnd - _inputPtr; } int count = Math.min(avail, left); out.write(_inputBuffer, _inputPtr, count); _inputPtr += count; left -= count; } _tokenIncomplete = false; return totalCount; } if (_typeByte != SmileConstants.TOKEN_MISC_BINARY_7BIT) { _throwInternal(); } // or, alternative, 7-bit encoded stuff: final int totalCount = _readUnsignedVInt(); byte[] encodingBuffer = _ioContext.allocBase64Buffer(); try { _readBinaryEncoded(out, totalCount, encodingBuffer); } finally { _ioContext.releaseBase64Buffer(encodingBuffer); } _tokenIncomplete = false; return totalCount; } private void _readBinaryEncoded(OutputStream out, int length, byte[] buffer) throws IOException, JsonParseException { int outPtr = 0; final int lastSafeOut = buffer.length - 7; // first handle all full 7/8 units while (length > 7) { if ((_inputEnd - _inputPtr) < 8) { _loadToHaveAtLeast(8); } int i1 = (_inputBuffer[_inputPtr++] << 25) + (_inputBuffer[_inputPtr++] << 18) + (_inputBuffer[_inputPtr++] << 11) + (_inputBuffer[_inputPtr++] << 4); int x = _inputBuffer[_inputPtr++]; i1 += x >> 3; int i2 = ((x & 0x7) << 21) + (_inputBuffer[_inputPtr++] << 14) + (_inputBuffer[_inputPtr++] << 7) + _inputBuffer[_inputPtr++]; // Ok: got our 7 bytes, just need to split, copy buffer[outPtr++] = (byte) (i1 >> 24); buffer[outPtr++] = (byte) (i1 >> 16); buffer[outPtr++] = (byte) (i1 >> 8); buffer[outPtr++] = (byte) i1; buffer[outPtr++] = (byte) (i2 >> 16); buffer[outPtr++] = (byte) (i2 >> 8); buffer[outPtr++] = (byte) i2; length -= 7; // ensure there's always room for at least 7 bytes more after looping: if (outPtr > lastSafeOut) { out.write(buffer, 0, outPtr); outPtr = 0; } } // and then leftovers: n+1 bytes to decode n bytes if (length > 0) { if ((_inputEnd - _inputPtr) < (length + 1)) { _loadToHaveAtLeast(length + 1); } int value = _inputBuffer[_inputPtr++]; for (int i = 1; i < length; ++i) { value = (value << 7) + _inputBuffer[_inputPtr++]; buffer[outPtr++] = (byte) (value >> (7 - i)); } // last byte is different, has remaining 1 - 6 bits, right-aligned value <<= length; buffer[outPtr++] = (byte) (value + _inputBuffer[_inputPtr++]); } if (outPtr > 0) { out.write(buffer, 0, outPtr); } } /* /********************************************************** /* Internal methods, field name parsing /********************************************************** */ /** * Method that handles initial token type recognition for token * that has to be either FIELD_NAME or END_OBJECT. */ protected final JsonToken _handleFieldName() throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int ch = _inputBuffer[_inputPtr++]; // is this needed? _typeByte = ch; switch ((ch >> 6) & 3) { case 0: // misc, including end marker switch (ch) { case 0x20: // empty String as name, legal if unusual _parsingContext.setCurrentName(""); return JsonToken.FIELD_NAME; case 0x30: // long shared case 0x31: case 0x32: case 0x33: { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int index = ((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } _parsingContext.setCurrentName(_seenNames[index]); } return JsonToken.FIELD_NAME; case 0x34: // long ASCII/Unicode name _handleLongFieldName(); return JsonToken.FIELD_NAME; } break; case 1: // short shared, can fully process { int index = (ch & 0x3F); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } _parsingContext.setCurrentName(_seenNames[index]); } return JsonToken.FIELD_NAME; case 2: // short ASCII { int len = 1 + (ch & 0x3f); String name; Name n = _findDecodedFromSymbols(len); if (n != null) { name = n.getName(); _inputPtr += len; } else { name = _decodeShortAsciiName(len); name = _addDecodedToSymbols(len, name); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); } return JsonToken.FIELD_NAME; case 3: // short Unicode // all valid, except for 0xFF ch &= 0x3F; { if (ch > 0x37) { if (ch == 0x3B) { if (!_parsingContext.inObject()) { _reportMismatchedEndMarker('}', ']'); } _parsingContext = _parsingContext.getParent(); return JsonToken.END_OBJECT; } } else { final int len = ch + 2; // values from 2 to 57... String name; Name n = _findDecodedFromSymbols(len); if (n != null) { name = n.getName(); _inputPtr += len; } else { name = _decodeShortUnicodeName(len); name = _addDecodedToSymbols(len, name); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); return JsonToken.FIELD_NAME; } } break; } // Other byte values are illegal _reportError("Invalid type marker byte 0x" + Integer.toHexString(_typeByte) + " for expected field name (or END_OBJECT marker)"); return null; } /** * Method called to try to expand shared name area to fit one more potentially * shared String. If area is already at its biggest size, will just clear * the area (by setting next-offset to 0) */ private final String[] _expandSeenNames(String[] oldShared) { int len = oldShared.length; String[] newShared; if (len == 0) { newShared = _smileBufferRecycler.allocSeenNamesBuffer(); if (newShared == null) { newShared = new String[SmileBufferRecycler.DEFAULT_NAME_BUFFER_LENGTH]; } } else if (len == SmileConstants.MAX_SHARED_NAMES) { // too many? Just flush... newShared = oldShared; _seenNameCount = 0; // could also clear, but let's not yet bother } else { int newSize = (len == SmileBufferRecycler.DEFAULT_STRING_VALUE_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_NAMES; newShared = new String[newSize]; System.arraycopy(oldShared, 0, newShared, 0, oldShared.length); } return newShared; } private final String _addDecodedToSymbols(int len, String name) { if (len < 5) { return _symbols.addName(name, _quad1, 0).getName(); } if (len < 9) { return _symbols.addName(name, _quad1, _quad2).getName(); } int qlen = (len + 3) >> 2; return _symbols.addName(name, _quadBuffer, qlen).getName(); } private final String _decodeShortAsciiName(int len) throws IOException, JsonParseException { // note: caller ensures we have enough bytes available char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int outPtr = 0; final byte[] inBuf = _inputBuffer; int inPtr = _inputPtr; // loop unrolling seems to help here: for (int inEnd = inPtr + len - 3; inPtr < inEnd;) { outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; } int left = (len & 3); if (left > 0) { outBuf[outPtr++] = (char) inBuf[inPtr++]; if (left > 1) { outBuf[outPtr++] = (char) inBuf[inPtr++]; if (left > 2) { outBuf[outPtr++] = (char) inBuf[inPtr++]; } } } _inputPtr = inPtr; _textBuffer.setCurrentLength(len); return _textBuffer.contentsAsString(); } /** * Helper method used to decode short Unicode string, length for which actual * length (in bytes) is known * * @param len Length between 1 and 64 */ private final String _decodeShortUnicodeName(int len) throws IOException, JsonParseException { // note: caller ensures we have enough bytes available int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int inPtr = _inputPtr; _inputPtr += len; final int[] codes = SmileConstants.sUtf8UnitLengths; final byte[] inBuf = _inputBuffer; for (int end = inPtr + len; inPtr < end;) { int i = inBuf[inPtr++] & 0xFF; int code = codes[i]; if (code != 0) { // trickiest one, need surrogate handling switch (code) { case 1: i = ((i & 0x1F) << 6) | (inBuf[inPtr++] & 0x3F); break; case 2: i = ((i & 0x0F) << 12) | ((inBuf[inPtr++] & 0x3F) << 6) | (inBuf[inPtr++] & 0x3F); break; case 3: i = ((i & 0x07) << 18) | ((inBuf[inPtr++] & 0x3F) << 12) | ((inBuf[inPtr++] & 0x3F) << 6) | (inBuf[inPtr++] & 0x3F); // note: this is the codepoint value; need to split, too i -= 0x10000; outBuf[outPtr++] = (char) (0xD800 | (i >> 10)); i = 0xDC00 | (i & 0x3FF); break; default: // invalid _reportError("Invalid byte " + Integer.toHexString(i) + " in short Unicode text block"); } } outBuf[outPtr++] = (char) i; } _textBuffer.setCurrentLength(outPtr); return _textBuffer.contentsAsString(); } // note: slightly edited copy of UTF8StreamParser.addName() private final Name _decodeLongUnicodeName(int[] quads, int byteLen, int quadLen) throws IOException, JsonParseException { int lastQuadBytes = byteLen & 3; // Ok: must decode UTF-8 chars. No other validation SHOULD be needed (except bounds checks?) /* Note: last quad is not correctly aligned (leading zero bytes instead * need to shift a bit, instead of trailing). Only need to shift it * for UTF-8 decoding; need revert for storage (since key will not * be aligned, to optimize lookup speed) */ int lastQuad; if (lastQuadBytes < 4) { lastQuad = quads[quadLen - 1]; // 8/16/24 bit left shift quads[quadLen - 1] = (lastQuad << ((4 - lastQuadBytes) << 3)); } else { lastQuad = 0; } char[] cbuf = _textBuffer.emptyAndGetCurrentSegment(); int cix = 0; for (int ix = 0; ix < byteLen;) { int ch = quads[ix >> 2]; // current quad, need to shift+mask int byteIx = (ix & 3); ch = (ch >> ((3 - byteIx) << 3)) & 0xFF; ++ix; if (ch > 127) { // multi-byte int needed; if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF) ch &= 0x1F; needed = 1; } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF) ch &= 0x0F; needed = 2; } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all... ch &= 0x07; needed = 3; } else { // 5- and 6-byte chars not valid chars _reportInvalidInitial(ch); needed = ch = 1; // never really gets this far } if ((ix + needed) > byteLen) { _reportInvalidEOF(" in long field name"); } // Ok, always need at least one more: int ch2 = quads[ix >> 2]; // current quad, need to shift+mask byteIx = (ix & 3); ch2 = (ch2 >> ((3 - byteIx) << 3)); ++ix; if ((ch2 & 0xC0) != 0x080) { _reportInvalidOther(ch2); } ch = (ch << 6) | (ch2 & 0x3F); if (needed > 1) { ch2 = quads[ix >> 2]; byteIx = (ix & 3); ch2 = (ch2 >> ((3 - byteIx) << 3)); ++ix; if ((ch2 & 0xC0) != 0x080) { _reportInvalidOther(ch2); } ch = (ch << 6) | (ch2 & 0x3F); if (needed > 2) { // 4 bytes? (need surrogates on output) ch2 = quads[ix >> 2]; byteIx = (ix & 3); ch2 = (ch2 >> ((3 - byteIx) << 3)); ++ix; if ((ch2 & 0xC0) != 0x080) { _reportInvalidOther(ch2 & 0xFF); } ch = (ch << 6) | (ch2 & 0x3F); } } if (needed > 2) { // surrogate pair? once again, let's output one here, one later on ch -= 0x10000; // to normalize it starting with 0x0 if (cix >= cbuf.length) { cbuf = _textBuffer.expandCurrentSegment(); } cbuf[cix++] = (char) (0xD800 + (ch >> 10)); ch = 0xDC00 | (ch & 0x03FF); } } if (cix >= cbuf.length) { cbuf = _textBuffer.expandCurrentSegment(); } cbuf[cix++] = (char) ch; } // Ok. Now we have the character array, and can construct the String String baseName = new String(cbuf, 0, cix); // And finally, un-align if necessary if (lastQuadBytes < 4) { quads[quadLen - 1] = lastQuad; } return _symbols.addName(baseName, quads, quadLen); } private final void _handleLongFieldName() throws IOException, JsonParseException { // First: gather quads we need, looking for end marker final byte[] inBuf = _inputBuffer; int quads = 0; int bytes = 0; int q = 0; while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } byte b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 0; break; } q = ((int) b) & 0xFF; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 1; break; } q = (q << 8) | (b & 0xFF); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 2; break; } q = (q << 8) | (b & 0xFF); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 3; break; } q = (q << 8) | (b & 0xFF); if (quads >= _quadBuffer.length) { _quadBuffer = _growArrayTo(_quadBuffer, _quadBuffer.length + 256); // grow by 1k } _quadBuffer[quads++] = q; } // and if we have more bytes, append those too int byteLen = (quads << 2); if (bytes > 0) { if (quads >= _quadBuffer.length) { _quadBuffer = _growArrayTo(_quadBuffer, _quadBuffer.length + 256); } _quadBuffer[quads++] = q; byteLen += bytes; } // Know this name already? String name; Name n = _symbols.findName(_quadBuffer, quads); if (n != null) { name = n.getName(); } else { name = _decodeLongUnicodeName(_quadBuffer, byteLen, quads).getName(); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); } /** * Helper method for trying to find specified encoded UTF-8 byte sequence * from symbol table; if successful avoids actual decoding to String */ private final Name _findDecodedFromSymbols(int len) throws IOException, JsonParseException { if ((_inputEnd - _inputPtr) < len) { _loadToHaveAtLeast(len); } // First: maybe we already have this name decoded? if (len < 5) { int inPtr = _inputPtr; final byte[] inBuf = _inputBuffer; int q = inBuf[inPtr] & 0xFF; if (--len > 0) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); if (--len > 0) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); if (--len > 0) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); } } } _quad1 = q; return _symbols.findName(q); } if (len < 9) { int inPtr = _inputPtr; final byte[] inBuf = _inputBuffer; // First quadbyte is easy int q1 = (inBuf[inPtr] & 0xFF) << 8; q1 += (inBuf[++inPtr] & 0xFF); q1 <<= 8; q1 += (inBuf[++inPtr] & 0xFF); q1 <<= 8; q1 += (inBuf[++inPtr] & 0xFF); int q2 = (inBuf[++inPtr] & 0xFF); len -= 5; if (len > 0) { q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); if (--len > 0) { q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); if (--len > 0) { q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); } } } _quad1 = q1; _quad2 = q2; return _symbols.findName(q1, q2); } return _findDecodedMedium(len); } /** * Method for locating names longer than 8 bytes (in UTF-8) */ private final Name _findDecodedMedium(int len) throws IOException, JsonParseException { // first, need enough buffer to store bytes as ints: { int bufLen = (len + 3) >> 2; if (bufLen > _quadBuffer.length) { _quadBuffer = _growArrayTo(_quadBuffer, bufLen); } } // then decode, full quads first int offset = 0; int inPtr = _inputPtr; final byte[] inBuf = _inputBuffer; do { int q = (inBuf[inPtr++] & 0xFF) << 8; q |= inBuf[inPtr++] & 0xFF; q <<= 8; q |= inBuf[inPtr++] & 0xFF; q <<= 8; q |= inBuf[inPtr++] & 0xFF; _quadBuffer[offset++] = q; } while ((len -= 4) > 3); // and then leftovers if (len > 0) { int q = inBuf[inPtr] & 0xFF; if (--len > 0) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); if (--len > 0) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); } } _quadBuffer[offset++] = q; } return _symbols.findName(_quadBuffer, offset); } private static int[] _growArrayTo(int[] arr, int minSize) { int[] newArray = new int[minSize + 4]; if (arr != null) { // !!! TODO: JDK 1.6, Arrays.copyOf System.arraycopy(arr, 0, newArray, 0, arr.length); } return newArray; } /* /********************************************************** /* Internal methods, secondary parsing /********************************************************** */ @Override protected void _parseNumericValue(int expType) throws IOException, JsonParseException { if (_tokenIncomplete) { int tb = _typeByte; // ensure we got a numeric type with value that is lazily parsed if (((tb >> 5) & 0x7) != 1) { _reportError("Current token (" + _currToken + ") not numeric, can not use numeric value accessors"); } _tokenIncomplete = false; _finishNumberToken(tb); } } /** * Method called to finish parsing of a token so that token contents * are retriable */ protected void _finishToken() throws IOException, JsonParseException { _tokenIncomplete = false; int tb = _typeByte; int type = ((tb >> 5) & 0x7); if (type == 1) { // simple literals, numbers _finishNumberToken(tb); return; } if (type <= 3) { // tiny & short ASCII _decodeShortAsciiValue(1 + (tb & 0x3F)); return; } if (type <= 5) { // tiny & short Unicode // short unicode; note, lengths 2 - 65 (off-by-one compared to ASCII) _decodeShortUnicodeValue(2 + (tb & 0x3F)); return; } if (type == 7) { tb &= 0x1F; // next 3 bytes define subtype switch (tb >> 2) { case 0: // long variable length ASCII _decodeLongAscii(); return; case 1: // long variable length Unicode _decodeLongUnicode(); return; case 2: // binary, 7-bit _binaryValue = _read7BitBinaryWithLength(); return; case 7: // binary, raw _finishRawBinary(); return; } } // sanity check _throwInternal(); } protected final void _finishNumberToken(int tb) throws IOException, JsonParseException { tb &= 0x1F; int type = (tb >> 2); if (type == 1) { // VInt (zigzag) or BigDecimal int subtype = tb & 0x03; if (subtype == 0) { // (v)int _finishInt(); } else if (subtype == 1) { // (v)long _finishLong(); } else if (subtype == 2) { _finishBigInteger(); } else { _throwInternal(); } return; } if (type == 2) { // other numbers switch (tb & 0x03) { case 0: // float _finishFloat(); return; case 1: // double _finishDouble(); return; case 2: // big-decimal _finishBigDecimal(); return; } } _throwInternal(); } /* /********************************************************** /* Internal methods, secondary Number parsing /********************************************************** */ private final void _finishInt() throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int value = _inputBuffer[_inputPtr++]; int i; if (value < 0) { // 6 bits value &= 0x3F; } else { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { // 13 bits value = (value << 7) + i; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { value = (value << 7) + i; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { value = (value << 7) + i; // and then we must get negative if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { _reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes"); } } } } value = (value << 6) + (i & 0x3F); } _numberInt = SmileUtil.zigzagDecode(value); _numTypesValid = NR_INT; } private final void _finishLong() throws IOException, JsonParseException { // Ok, first, will always get 4 full data bytes first; 1 was already passed long l = (long) _fourBytesToInt(); // and loop for the rest while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int value = _inputBuffer[_inputPtr++]; if (value < 0) { l = (l << 6) + (value & 0x3F); _numberLong = SmileUtil.zigzagDecode(l); _numTypesValid = NR_LONG; return; } l = (l << 7) + value; } } private final void _finishBigInteger() throws IOException, JsonParseException { byte[] raw = _read7BitBinaryWithLength(); _numberBigInt = new BigInteger(raw); _numTypesValid = NR_BIGINT; } private final void _finishFloat() throws IOException, JsonParseException { // just need 5 bytes to get int32 first; all are unsigned int i = _fourBytesToInt(); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = (i << 7) + _inputBuffer[_inputPtr++]; float f = Float.intBitsToFloat(i); _numberDouble = (double) f; _numTypesValid = NR_DOUBLE; } private final void _finishDouble() throws IOException, JsonParseException { // ok; let's take two sets of 4 bytes (each is int) long hi = _fourBytesToInt(); long value = (hi << 28) + (long) _fourBytesToInt(); // and then remaining 2 bytes if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } value = (value << 7) + _inputBuffer[_inputPtr++]; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } value = (value << 7) + _inputBuffer[_inputPtr++]; _numberDouble = Double.longBitsToDouble(value); _numTypesValid = NR_DOUBLE; } private final int _fourBytesToInt() throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int i = _inputBuffer[_inputPtr++]; // first 7 bits if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = (i << 7) + _inputBuffer[_inputPtr++]; // 14 bits if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = (i << 7) + _inputBuffer[_inputPtr++]; // 21 if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } return (i << 7) + _inputBuffer[_inputPtr++]; } private final void _finishBigDecimal() throws IOException, JsonParseException { int scale = SmileUtil.zigzagDecode(_readUnsignedVInt()); byte[] raw = _read7BitBinaryWithLength(); _numberBigDecimal = new BigDecimal(new BigInteger(raw), scale); _numTypesValid = NR_BIGDECIMAL; } private final int _readUnsignedVInt() throws IOException, JsonParseException { int value = 0; while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int i = _inputBuffer[_inputPtr++]; if (i < 0) { // last byte value = (value << 6) + (i & 0x3F); return value; } value = (value << 7) + i; } } private final byte[] _read7BitBinaryWithLength() throws IOException, JsonParseException { int byteLen = _readUnsignedVInt(); byte[] result = new byte[byteLen]; int ptr = 0; int lastOkPtr = byteLen - 7; // first, read all 7-by-8 byte chunks while (ptr <= lastOkPtr) { if ((_inputEnd - _inputPtr) < 8) { _loadToHaveAtLeast(8); } int i1 = (_inputBuffer[_inputPtr++] << 25) + (_inputBuffer[_inputPtr++] << 18) + (_inputBuffer[_inputPtr++] << 11) + (_inputBuffer[_inputPtr++] << 4); int x = _inputBuffer[_inputPtr++]; i1 += x >> 3; int i2 = ((x & 0x7) << 21) + (_inputBuffer[_inputPtr++] << 14) + (_inputBuffer[_inputPtr++] << 7) + _inputBuffer[_inputPtr++]; // Ok: got our 7 bytes, just need to split, copy result[ptr++] = (byte) (i1 >> 24); result[ptr++] = (byte) (i1 >> 16); result[ptr++] = (byte) (i1 >> 8); result[ptr++] = (byte) i1; result[ptr++] = (byte) (i2 >> 16); result[ptr++] = (byte) (i2 >> 8); result[ptr++] = (byte) i2; } // and then leftovers: n+1 bytes to decode n bytes int toDecode = (result.length - ptr); if (toDecode > 0) { if ((_inputEnd - _inputPtr) < (toDecode + 1)) { _loadToHaveAtLeast(toDecode + 1); } int value = _inputBuffer[_inputPtr++]; for (int i = 1; i < toDecode; ++i) { value = (value << 7) + _inputBuffer[_inputPtr++]; result[ptr++] = (byte) (value >> (7 - i)); } // last byte is different, has remaining 1 - 6 bits, right-aligned value <<= toDecode; result[ptr] = (byte) (value + _inputBuffer[_inputPtr++]); } return result; } /* /********************************************************** /* Internal methods, secondary String parsing /********************************************************** */ protected final void _decodeShortAsciiValue(int len) throws IOException, JsonParseException { if ((_inputEnd - _inputPtr) < len) { _loadToHaveAtLeast(len); } // Note: we count on fact that buffer must have at least 'len' (<= 64) empty char slots final char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int outPtr = 0; final byte[] inBuf = _inputBuffer; int inPtr = _inputPtr; // loop unrolling SHOULD be faster (as with _decodeShortAsciiName), but somehow // is NOT; as per testing, benchmarking... very weird. /* for (int inEnd = inPtr + len - 3; inPtr < inEnd; ) { outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; } int left = (len & 3); if (left > 0) { outBuf[outPtr++] = (char) inBuf[inPtr++]; if (left > 1) { outBuf[outPtr++] = (char) inBuf[inPtr++]; if (left > 2) { outBuf[outPtr++] = (char) inBuf[inPtr++]; } } } */ // meaning: regular tight loop is no slower, typically faster here: for (final int end = inPtr + len; inPtr < end; ++inPtr) { outBuf[outPtr++] = (char) inBuf[inPtr]; } _inputPtr = inPtr; _textBuffer.setCurrentLength(len); } protected final void _decodeShortUnicodeValue(int len) throws IOException, JsonParseException { if ((_inputEnd - _inputPtr) < len) { _loadToHaveAtLeast(len); } int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int inPtr = _inputPtr; _inputPtr += len; final int[] codes = SmileConstants.sUtf8UnitLengths; final byte[] inputBuf = _inputBuffer; for (int end = inPtr + len; inPtr < end;) { int i = inputBuf[inPtr++] & 0xFF; int code = codes[i]; if (code != 0) { // trickiest one, need surrogate handling switch (code) { case 1: i = ((i & 0x1F) << 6) | (inputBuf[inPtr++] & 0x3F); break; case 2: i = ((i & 0x0F) << 12) | ((inputBuf[inPtr++] & 0x3F) << 6) | (inputBuf[inPtr++] & 0x3F); break; case 3: i = ((i & 0x07) << 18) | ((inputBuf[inPtr++] & 0x3F) << 12) | ((inputBuf[inPtr++] & 0x3F) << 6) | (inputBuf[inPtr++] & 0x3F); // note: this is the codepoint value; need to split, too i -= 0x10000; outBuf[outPtr++] = (char) (0xD800 | (i >> 10)); i = 0xDC00 | (i & 0x3FF); break; default: // invalid _reportError("Invalid byte " + Integer.toHexString(i) + " in short Unicode text block"); } } outBuf[outPtr++] = (char) i; } _textBuffer.setCurrentLength(outPtr); } private final void _decodeLongAscii() throws IOException, JsonParseException { int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); main_loop: while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int inPtr = _inputPtr; int left = _inputEnd - inPtr; if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } left = Math.min(left, outBuf.length - outPtr); do { byte b = _inputBuffer[inPtr++]; if (b == SmileConstants.BYTE_MARKER_END_OF_STRING) { _inputPtr = inPtr; break main_loop; } outBuf[outPtr++] = (char) b; } while (--left > 0); _inputPtr = inPtr; } _textBuffer.setCurrentLength(outPtr); } private final void _decodeLongUnicode() throws IOException, JsonParseException { int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); final int[] codes = SmileConstants.sUtf8UnitLengths; int c; final byte[] inputBuffer = _inputBuffer; main_loop: while (true) { // First the tight ASCII loop: ascii_loop: while (true) { int ptr = _inputPtr; if (ptr >= _inputEnd) { loadMoreGuaranteed(); ptr = _inputPtr; } if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } int max = _inputEnd; { int max2 = ptr + (outBuf.length - outPtr); if (max2 < max) { max = max2; } } while (ptr < max) { c = (int) inputBuffer[ptr++] & 0xFF; if (codes[c] != 0) { _inputPtr = ptr; break ascii_loop; } outBuf[outPtr++] = (char) c; } _inputPtr = ptr; } // Ok: end marker, escape or multi-byte? if (c == SmileConstants.INT_MARKER_END_OF_STRING) { break main_loop; } switch (codes[c]) { case 1: // 2-byte UTF c = _decodeUtf8_2(c); break; case 2: // 3-byte UTF if ((_inputEnd - _inputPtr) >= 2) { c = _decodeUtf8_3fast(c); } else { c = _decodeUtf8_3(c); } break; case 3: // 4-byte UTF c = _decodeUtf8_4(c); // Let's add first part right away: outBuf[outPtr++] = (char) (0xD800 | (c >> 10)); if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } c = 0xDC00 | (c & 0x3FF); // And let the other char output down below break; default: // Is this good enough error message? _reportInvalidChar(c); } // Need more room? if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } // Ok, let's add char to output: outBuf[outPtr++] = (char) c; } _textBuffer.setCurrentLength(outPtr); } private final void _finishRawBinary() throws IOException, JsonParseException { int byteLen = _readUnsignedVInt(); _binaryValue = new byte[byteLen]; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int ptr = 0; while (true) { int toAdd = Math.min(byteLen, _inputEnd - _inputPtr); System.arraycopy(_inputBuffer, _inputPtr, _binaryValue, ptr, toAdd); _inputPtr += toAdd; ptr += toAdd; byteLen -= toAdd; if (byteLen <= 0) { return; } loadMoreGuaranteed(); } } /* /********************************************************** /* Internal methods, skipping /********************************************************** */ /** * Method called to skip remainders of an incomplete token, when * contents themselves will not be needed any more */ protected void _skipIncomplete() throws IOException, JsonParseException { _tokenIncomplete = false; int tb = _typeByte; switch ((tb >> 5) & 0x7) { case 1: // simple literals, numbers tb &= 0x1F; // next 3 bytes define subtype switch (tb >> 2) { case 1: // VInt (zigzag) // easy, just skip until we see sign bit... (should we try to limit damage?) switch (tb & 0x3) { case 1: // vlong _skipBytes(4); // min 5 bytes // fall through case 0: // vint while (true) { final int end = _inputEnd; final byte[] buf = _inputBuffer; while (_inputPtr < end) { if (buf[_inputPtr++] < 0) { return; } } loadMoreGuaranteed(); } case 2: // big-int // just has binary data _skip7BitBinary(); return; } break; case 2: // other numbers switch (tb & 0x3) { case 0: // float _skipBytes(5); return; case 1: // double _skipBytes(10); return; case 2: // big-decimal // first, skip scale _readUnsignedVInt(); // then length-prefixed binary serialization _skip7BitBinary(); return; } break; } break; case 2: // tiny ASCII // fall through case 3: // short ASCII _skipBytes(1 + (tb & 0x3F)); return; case 4: // tiny unicode // fall through case 5: // short unicode _skipBytes(2 + (tb & 0x3F)); return; case 7: tb &= 0x1F; // next 3 bytes define subtype switch (tb >> 2) { case 0: // long variable length ASCII case 1: // long variable length unicode /* Doesn't matter which one, just need to find the end marker * (note: can potentially skip invalid UTF-8 too) */ while (true) { final int end = _inputEnd; final byte[] buf = _inputBuffer; while (_inputPtr < end) { if (buf[_inputPtr++] == BYTE_MARKER_END_OF_STRING) { return; } } loadMoreGuaranteed(); } // never gets here case 2: // binary, 7-bit _skip7BitBinary(); return; case 7: // binary, raw _skipBytes(_readUnsignedVInt()); return; } } _throwInternal(); } protected void _skipBytes(int len) throws IOException, JsonParseException { while (true) { int toAdd = Math.min(len, _inputEnd - _inputPtr); _inputPtr += toAdd; len -= toAdd; if (len <= 0) { return; } loadMoreGuaranteed(); } } /** * Helper method for skipping length-prefixed binary data * section */ protected void _skip7BitBinary() throws IOException, JsonParseException { int origBytes = _readUnsignedVInt(); // Ok; 8 encoded bytes for 7 payload bytes first int chunks = origBytes / 7; int encBytes = chunks * 8; // and for last 0 - 6 bytes, last+1 (except none if no leftovers) origBytes -= 7 * chunks; if (origBytes > 0) { encBytes += 1 + origBytes; } _skipBytes(encBytes); } /* /********************************************************** /* Internal methods, UTF8 decoding /********************************************************** */ private final int _decodeUtf8_2(int c) throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } return ((c & 0x1F) << 6) | (d & 0x3F); } private final int _decodeUtf8_3(int c1) throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } c1 &= 0x0F; int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } int c = (c1 << 6) | (d & 0x3F); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = (c << 6) | (d & 0x3F); return c; } private final int _decodeUtf8_3fast(int c1) throws IOException, JsonParseException { c1 &= 0x0F; int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } int c = (c1 << 6) | (d & 0x3F); d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = (c << 6) | (d & 0x3F); return c; } /** * @return Character value <b>minus 0x10000</c>; this so that caller * can readily expand it to actual surrogates */ private final int _decodeUtf8_4(int c) throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = ((c & 0x07) << 6) | (d & 0x3F); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = (c << 6) | (d & 0x3F); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } /* note: won't change it to negative here, since caller * already knows it'll need a surrogate */ return ((c << 6) | (d & 0x3F)) - 0x10000; } /* /********************************************************** /* Internal methods, error reporting /********************************************************** */ protected void _reportInvalidSharedName(int index) throws IOException { if (_seenNames == null) { _reportError( "Encountered shared name reference, even though document header explicitly declared no shared name references are included"); } _reportError("Invalid shared name reference " + index + "; only got " + _seenNameCount + " names in buffer (invalid content)"); } protected void _reportInvalidSharedStringValue(int index) throws IOException { if (_seenStringValues == null) { _reportError( "Encountered shared text value reference, even though document header did not declared shared text value references may be included"); } _reportError("Invalid shared text value reference " + index + "; only got " + _seenStringValueCount + " names in buffer (invalid content)"); } protected void _reportInvalidChar(int c) throws JsonParseException { // Either invalid WS or illegal UTF-8 start char if (c < ' ') { _throwInvalidSpace(c); } _reportInvalidInitial(c); } protected void _reportInvalidInitial(int mask) throws JsonParseException { _reportError("Invalid UTF-8 start byte 0x" + Integer.toHexString(mask)); } protected void _reportInvalidOther(int mask) throws JsonParseException { _reportError("Invalid UTF-8 middle byte 0x" + Integer.toHexString(mask)); } protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException { _inputPtr = ptr; _reportInvalidOther(mask); } }