Java tutorial
/* Large portions of this file are copyright by the authors of Jackson under the Apache 2.0 or LGPL license. The implementation was derived from the Jackson class 'org.codehaus.jackson.impl.ReaderBasedParser' and modified for Rison. */ package com.bazaarvoice.jackson.rison; import com.fasterxml.jackson.core.Base64Variant; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.core.ObjectCodec; import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.core.base.ParserBase; import com.fasterxml.jackson.core.io.IOContext; import com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer; import com.fasterxml.jackson.core.util.ByteArrayBuilder; import com.fasterxml.jackson.core.util.TextBuffer; import java.io.IOException; import java.io.Reader; import java.io.Writer; /** * This is a concrete implementation of {@link JsonParser}, which is * based on a {@link java.io.Reader} to handle low-level character * conversion tasks. */ public class RisonParser extends ParserBase { /** * Enumeration that defines all configurable features for Rison parsers. */ public enum Feature { /** * Whether to assume that the top-level value being encoded is an object, * and therefore to omit the containing '(' and ')'. *<p> * Default setting is false. */ O_RISON(false), /** * Whether to assume that the top-level value being encoded is an array, * and therefore to omit the containing '!(' and ')'. *<p> * Default setting is false. */ A_RISON(false); final boolean _defaultState; final int _mask; /** * Method that calculates bit set (flags) of all features that * are enabled by default. */ public static int collectDefaults() { int flags = 0; for (Feature f : values()) { if (f.enabledByDefault()) { flags |= f.getMask(); } } return flags; } private Feature(boolean defaultState) { _defaultState = defaultState; _mask = (1 << ordinal()); } public boolean enabledByDefault() { return _defaultState; } public int getMask() { return _mask; } } /* /********************************************************** /* Input configuration /********************************************************** */ /** * Reader that can be used for reading more content, if one * buffer from input source, but in some cases pre-loaded buffer * is handed to the parser. */ protected Reader _reader; /** * Current buffer from which data is read; generally data is read into * buffer from input source. */ protected char[] _inputBuffer; /* /********************************************************** /* Configuration /********************************************************** */ protected ObjectCodec _objectCodec; final protected CharsToNameCanonicalizer _symbols; final protected int _risonFeatures; /* /********************************************************** /* Parsing state /********************************************************** */ /** * Flag that indicates that the current token has not yet * been fully processed, and needs to be finished for * some access (or skipped to obtain the next token) */ protected boolean _tokenIncomplete = false; /* /********************************************************** /* Life-cycle /********************************************************** */ public RisonParser(IOContext ctxt, int jsonFeatures, int risonFeatures, Reader r, ObjectCodec codec, CharsToNameCanonicalizer st) { super(ctxt, jsonFeatures); _reader = r; _risonFeatures = risonFeatures; _inputBuffer = ctxt.allocTokenBuffer(); _objectCodec = codec; _symbols = st; } @Override public Version version() { return ModuleVersion.instance.version(); } /* /********************************************************** /* Configuration /********************************************************** */ private boolean isRisonEnabled(Feature feature) { return (_risonFeatures & feature.getMask()) != 0; } /* /********************************************************** /* Base method defs, overrides /********************************************************** */ @Override public ObjectCodec getCodec() { return _objectCodec; } @Override public void setCodec(ObjectCodec c) { _objectCodec = c; } @Override public int releaseBuffered(Writer w) throws IOException { int count = _inputEnd - _inputPtr; if (count < 1) { return 0; } // let's just advance ptr to end int origPtr = _inputPtr; w.write(_inputBuffer, origPtr, count); return count; } @Override public Object getInputSource() { return _reader; } @Override protected boolean loadMore() throws IOException { _currInputProcessed += _inputEnd; _currInputRowStart -= _inputEnd; if (_reader != null) { int count = _reader.read(_inputBuffer, 0, _inputBuffer.length); if (count > 0) { _inputPtr = 0; _inputEnd = count; return true; } // End of input _closeInput(); // Should never return 0, so let's fail if (count == 0) { throw new IOException("Reader returned 0 characters when trying to read " + _inputEnd); } } return false; } protected char getNextChar(String eofMsg) throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { if (!loadMore()) { _reportInvalidEOF(eofMsg); } } return _inputBuffer[_inputPtr++]; } @Override protected void _closeInput() throws IOException { /* 25-Nov-2008, tatus: As per [JACKSON-16] we are not to call close() * on the underlying Reader, unless we "own" it, or auto-closing * feature is enabled. * One downside is that when using our optimized * Reader (granted, we only do that for UTF-32...) this * means that buffer recycling won't work correctly. */ if (_reader != null) { if (_ioContext.isResourceManaged() || isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE)) { _reader.close(); } _reader = null; } } /** * Method called to release internal buffers owned by the base * reader. This may be called along with {@link #_closeInput} (for * example, when explicitly closing this reader instance), or * separately (if need be). */ @Override protected void _releaseBuffers() throws IOException { super._releaseBuffers(); char[] buf = _inputBuffer; if (buf != null) { _inputBuffer = null; _ioContext.releaseTokenBuffer(buf); } } /* /********************************************************** /* Public API, data access /********************************************************** */ /** * Method for accessing textual representation of the current event; * if no current event (before first call to {@link #nextToken}, or * after encountering end-of-input), returns null. * Method can be called for any event. */ @Override public String getText() throws IOException, JsonParseException { JsonToken t = _currToken; if (t == JsonToken.VALUE_STRING) { if (_tokenIncomplete) { _tokenIncomplete = false; _finishString(); // only strings can be incomplete } return _textBuffer.contentsAsString(); } return _getText2(t); } protected String _getText2(JsonToken t) { if (t == null) { return null; } switch (t) { case FIELD_NAME: return _parsingContext.getCurrentName(); case VALUE_STRING: // fall through case VALUE_NUMBER_INT: case VALUE_NUMBER_FLOAT: return _textBuffer.contentsAsString(); } return t.asString(); } @Override public char[] getTextCharacters() throws IOException, JsonParseException { if (_currToken != null) { // null only before/after document switch (_currToken) { case FIELD_NAME: if (!_nameCopied) { String name = _parsingContext.getCurrentName(); int nameLen = name.length(); if (_nameCopyBuffer == null) { _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen); } else if (_nameCopyBuffer.length < nameLen) { _nameCopyBuffer = new char[nameLen]; } name.getChars(0, nameLen, _nameCopyBuffer, 0); _nameCopied = true; } return _nameCopyBuffer; case VALUE_STRING: if (_tokenIncomplete) { _tokenIncomplete = false; _finishString(); // only strings can be incomplete } // fall through case VALUE_NUMBER_INT: case VALUE_NUMBER_FLOAT: return _textBuffer.getTextBuffer(); default: return _currToken.asCharArray(); } } return null; } @Override public int getTextLength() throws IOException, JsonParseException { if (_currToken != null) { // null only before/after document switch (_currToken) { case FIELD_NAME: return _parsingContext.getCurrentName().length(); case VALUE_STRING: if (_tokenIncomplete) { _tokenIncomplete = false; _finishString(); // only strings can be incomplete } // fall through case VALUE_NUMBER_INT: case VALUE_NUMBER_FLOAT: return _textBuffer.size(); default: return _currToken.asCharArray().length; } } return 0; } @Override public int getTextOffset() throws IOException, JsonParseException { // Most have offset of 0, only some may have other values: if (_currToken != null) { switch (_currToken) { case FIELD_NAME: return 0; case VALUE_STRING: if (_tokenIncomplete) { _tokenIncomplete = false; _finishString(); // only strings can be incomplete } // fall through case VALUE_NUMBER_INT: case VALUE_NUMBER_FLOAT: return _textBuffer.getTextOffset(); } } return 0; } @Override public Object getEmbeddedObject() throws IOException, JsonParseException { return null; } @Override public byte[] getBinaryValue(Base64Variant b64variant) throws IOException, JsonParseException { if (_currToken != JsonToken.VALUE_STRING && (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT || _binaryValue == null)) { _reportError("Current token (" + _currToken + ") not VALUE_STRING or VALUE_EMBEDDED_OBJECT, can not access as binary"); } /* To ensure that we won't see inconsistent data, better clear up * state... */ if (_tokenIncomplete) { try { _binaryValue = _decodeBase64(b64variant); } catch (IllegalArgumentException iae) { throw _constructError( "Failed to decode VALUE_STRING as base64 (" + b64variant + "): " + iae.getMessage()); } /* let's clear incomplete only now; allows for accessing other * textual content in error cases */ _tokenIncomplete = false; } else { // may actually require conversion... if (_binaryValue == null) { ByteArrayBuilder builder = _getByteArrayBuilder(); _decodeBase64(getText(), builder, b64variant); _binaryValue = builder.toByteArray(); } } return _binaryValue; } /* /********************************************************** /* Public API, traversal /********************************************************** */ /** * @return Next token from the stream, if any found, or null * to indicate end-of-input */ @Override public JsonToken nextToken() throws IOException, JsonParseException { _numTypesValid = NR_UNKNOWN; if (_currToken == null && _parsingContext.inRoot()) { if (isRisonEnabled(Feature.A_RISON)) { _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); return (_currToken = JsonToken.START_ARRAY); } else if (isRisonEnabled(Feature.O_RISON)) { _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); return (_currToken = JsonToken.START_OBJECT); } } /* First: field names are special -- we will always tokenize * (part of) value along with field name to simplify * state handling. If so, can and need to use secondary token: */ if (_currToken == JsonToken.FIELD_NAME) { return _nextAfterName(); } if (_tokenIncomplete) { _skipString(); // only strings can be partial } int i = _nextChOrEnd(); if (i < 0) { // end-of-input /* 19-Feb-2009, tatu: Should actually close/release things * like input source, symbol table and recyclable buffers now. */ close(); if (_parsingContext.getParent().inRoot()) { if (isRisonEnabled(Feature.A_RISON) && _parsingContext.inArray()) { _parsingContext = _parsingContext.getParent(); return (_currToken = JsonToken.END_ARRAY); } if (isRisonEnabled(Feature.O_RISON) && _parsingContext.inObject()) { _parsingContext = _parsingContext.getParent(); return (_currToken = JsonToken.END_OBJECT); } } _handleEOF(); return (_currToken = null); } /* First, need to ensure we know the starting location of token * after skipping leading white space */ _tokenInputTotal = _currInputProcessed + _inputPtr - 1; _tokenInputRow = _currInputRow; _tokenInputCol = _inputPtr - _currInputRowStart - 1; // finally: clear any data retained so far _binaryValue = null; // Closing scope? if (i == ')') { if (_parsingContext.inArray()) { _parsingContext = _parsingContext.getParent(); return (_currToken = JsonToken.END_ARRAY); } if (_parsingContext.inObject()) { _parsingContext = _parsingContext.getParent(); return (_currToken = JsonToken.END_OBJECT); } _reportMismatchedEndMarker(i, ')'); } // Nope: do we then expect a comma? if (_parsingContext.expectComma()) { if (i != INT_COMMA) { _reportUnexpectedChar(i, "was expecting comma to separate " + _parsingContext.getTypeDesc() + " entries"); } i = _nextCh(); } /* And should we now have a name? Always true for * Object contexts, since the intermediate 'expect-value' * state is never retained. */ boolean inObject = _parsingContext.inObject(); if (inObject) { // First, field name itself: String name; if (i != INT_APOSTROPHE) { name = _parseUnquotedFieldName(i); } else { name = _parseFieldName(i); } _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; i = _nextCh(); if (i != INT_COLON) { _reportUnexpectedChar(i, "was expecting a colon to separate field name and value"); } i = _nextCh(); } // Ok: we must have a value... what is it? JsonToken t; switch (i) { case INT_APOSTROPHE: _tokenIncomplete = true; t = JsonToken.VALUE_STRING; break; case '(': if (!inObject) { _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); } t = JsonToken.START_OBJECT; break; case ')': // Error: ')' is not valid at this point; valid closers have // been handled earlier _reportUnexpectedChar(i, "expected a value"); case INT_MINUS: /* Should we have separate handling for plus? Although * it is not allowed per se, it may be erroneously used, * and could be indicate by a more specific error message. */ case INT_0: case INT_1: case INT_2: case INT_3: case INT_4: case INT_5: case INT_6: case INT_7: case INT_8: case INT_9: t = parseNumberText(i); break; case '!': i = _nextCh(); switch (i) { case '(': if (!inObject) { _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); } t = JsonToken.START_ARRAY; break; case INT_t: t = JsonToken.VALUE_TRUE; break; case INT_f: t = JsonToken.VALUE_FALSE; break; case INT_n: t = JsonToken.VALUE_NULL; break; default: t = _handleUnexpectedValue(i); break; } break; default: if (IdentifierUtils.isIdStartLenient(i)) { t = JsonToken.VALUE_STRING; _inputPtr--; // push back the first char _parseUnquotedString(); } else { t = _handleUnexpectedValue(i); } break; } if (inObject) { _nextToken = t; return _currToken; } _currToken = t; return t; } private JsonToken _nextAfterName() { _nameCopied = false; // need to invalidate if it was copied JsonToken t = _nextToken; _nextToken = null; // Also: may need to start new context? if (t == JsonToken.START_ARRAY) { _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); } else if (t == JsonToken.START_OBJECT) { _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); } return (_currToken = t); } /* @Override public boolean nextFieldName(SerializableString str) throws IOException, JsonParseException */ // note: identical to one in Utf8StreamParser @Override public String nextTextValue() throws IOException, JsonParseException { if (_currToken == JsonToken.FIELD_NAME) { // mostly copied from '_nextAfterName' _nameCopied = false; JsonToken t = _nextToken; _nextToken = null; _currToken = t; if (t == JsonToken.VALUE_STRING) { if (_tokenIncomplete) { _tokenIncomplete = false; _finishString(); } return _textBuffer.contentsAsString(); } if (t == JsonToken.START_ARRAY) { _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); } else if (t == JsonToken.START_OBJECT) { _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); } return null; } // !!! TODO: optimize this case as well return (nextToken() == JsonToken.VALUE_STRING) ? getText() : null; } // note: identical to one in Utf8StreamParser @Override public int nextIntValue(int defaultValue) throws IOException, JsonParseException { if (_currToken == JsonToken.FIELD_NAME) { _nameCopied = false; JsonToken t = _nextToken; _nextToken = null; _currToken = t; if (t == JsonToken.VALUE_NUMBER_INT) { return getIntValue(); } if (t == JsonToken.START_ARRAY) { _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); } else if (t == JsonToken.START_OBJECT) { _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); } return defaultValue; } // !!! TODO: optimize this case as well return (nextToken() == JsonToken.VALUE_NUMBER_INT) ? getIntValue() : defaultValue; } // note: identical to one in Utf8StreamParser @Override public long nextLongValue(long defaultValue) throws IOException, JsonParseException { if (_currToken == JsonToken.FIELD_NAME) { // mostly copied from '_nextAfterName' _nameCopied = false; JsonToken t = _nextToken; _nextToken = null; _currToken = t; if (t == JsonToken.VALUE_NUMBER_INT) { return getLongValue(); } if (t == JsonToken.START_ARRAY) { _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); } else if (t == JsonToken.START_OBJECT) { _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); } return defaultValue; } // !!! TODO: optimize this case as well return (nextToken() == JsonToken.VALUE_NUMBER_INT) ? getLongValue() : defaultValue; } // note: identical to one in Utf8StreamParser @Override public Boolean nextBooleanValue() throws IOException, JsonParseException { if (_currToken == JsonToken.FIELD_NAME) { // mostly copied from '_nextAfterName' _nameCopied = false; JsonToken t = _nextToken; _nextToken = null; _currToken = t; if (t == JsonToken.VALUE_TRUE) { return Boolean.TRUE; } if (t == JsonToken.VALUE_FALSE) { return Boolean.FALSE; } if (t == JsonToken.START_ARRAY) { _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); } else if (t == JsonToken.START_OBJECT) { _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); } return null; } switch (nextToken()) { case VALUE_TRUE: return Boolean.TRUE; case VALUE_FALSE: return Boolean.FALSE; } return null; } @Override public void close() throws IOException { super.close(); _symbols.release(); } /* /********************************************************** /* Internal methods, number parsing /* (note: in 1.8 and prior, part of "ReaderBasedNumericParser" /********************************************************** */ /** * Initial parsing method for number values. It needs to be able * to parse enough input to be able to determine whether the * value is to be considered a simple integer value, or a more * generic decimal value: latter of which needs to be expressed * as a floating point number. The basic rule is that if the number * has no fractional or exponential part, it is an integer; otherwise * a floating point number. *<p> * Because much of input has to be processed in any case, no partial * parsing is done: all input text will be stored for further * processing. However, actual numeric value conversion will be * deferred, since it is usually the most complicated and costliest * part of processing. */ protected JsonToken parseNumberText(int ch) throws IOException, JsonParseException { /* Although we will always be complete with respect to textual * representation (that is, all characters will be parsed), * actual conversion to a number is deferred. Thus, need to * note that no representations are valid yet */ boolean negative = (ch == INT_MINUS); int ptr = _inputPtr; int startPtr = ptr - 1; // to include sign/digit already read final int inputLen = _inputEnd; dummy_loop: do { // dummy loop, to be able to break out if (negative) { // need to read the next digit if (ptr >= _inputEnd) { break dummy_loop; } ch = _inputBuffer[ptr++]; // First check: must have a digit to follow minus sign if (ch > INT_9 || ch < INT_0) { _inputPtr = ptr; return _handleInvalidNumberStart(ch, true); } /* (note: has been checked for non-negative already, in * the dispatching code that determined it should be * a numeric value) */ } // One special case, leading zero(es): if (ch == INT_0) { break dummy_loop; } /* First, let's see if the whole number is contained within * the input buffer unsplit. This should be the common case; * and to simplify processing, we will just reparse contents * in the alternative case (number split on buffer boundary) */ int intLen = 1; // already got one // First let's get the obligatory integer part: int_loop: while (true) { if (ptr >= _inputEnd) { break dummy_loop; } ch = (int) _inputBuffer[ptr++]; if (ch < INT_0 || ch > INT_9) { break int_loop; } ++intLen; } int fractLen = 0; // And then see if we get other parts if (ch == INT_DECIMAL_POINT) { // yes, fraction fract_loop: while (true) { if (ptr >= inputLen) { break dummy_loop; } ch = (int) _inputBuffer[ptr++]; if (ch < INT_0 || ch > INT_9) { break fract_loop; } ++fractLen; } // must be followed by sequence of ints, one minimum if (fractLen == 0) { reportUnexpectedNumberChar(ch, "Decimal point not followed by a digit"); } } int expLen = 0; if (ch == INT_e || ch == INT_E) { // and/or exponent if (ptr >= inputLen) { break dummy_loop; } // Sign indicator? ch = (int) _inputBuffer[ptr++]; if (ch == INT_MINUS || ch == INT_PLUS) { // yup, skip for now if (ptr >= inputLen) { break dummy_loop; } ch = (int) _inputBuffer[ptr++]; } while (ch <= INT_9 && ch >= INT_0) { ++expLen; if (ptr >= inputLen) { break dummy_loop; } ch = (int) _inputBuffer[ptr++]; } // must be followed by sequence of ints, one minimum if (expLen == 0) { reportUnexpectedNumberChar(ch, "Exponent indicator not followed by a digit"); } } // Got it all: let's add to text buffer for parsing, access --ptr; // need to push back following separator _inputPtr = ptr; int len = ptr - startPtr; _textBuffer.resetWithShared(_inputBuffer, startPtr, len); return reset(negative, intLen, fractLen, expLen); } while (false); _inputPtr = negative ? (startPtr + 1) : startPtr; return parseNumberText2(negative); } /** * Method called to parse a number, when the primary parse * method has failed to parse it, due to it being split on * buffer boundary. As a result code is very similar, except * that it has to explicitly copy contents to the text buffer * instead of just sharing the main input buffer. */ private JsonToken parseNumberText2(boolean negative) throws IOException, JsonParseException { char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int outPtr = 0; // Need to prepend sign? if (negative) { outBuf[outPtr++] = '-'; } // This is the place to do leading-zero check(s) too: int intLen = 0; char c = (_inputPtr < _inputEnd) ? _inputBuffer[_inputPtr++] : getNextChar("No digit following minus sign"); if (c == '0') { c = _verifyNoLeadingZeroes(); } boolean eof = false; // Ok, first the obligatory integer part: int_loop: while (c >= '0' && c <= '9') { ++intLen; if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } outBuf[outPtr++] = c; if (_inputPtr >= _inputEnd && !loadMore()) { // EOF is legal for main level int values c = CHAR_NULL; eof = true; break int_loop; } c = _inputBuffer[_inputPtr++]; } // Also, integer part is not optional if (intLen == 0) { reportInvalidNumber("Missing integer part (next char " + _getCharDesc(c) + ")"); } int fractLen = 0; // And then see if we get other parts if (c == '.') { // yes, fraction outBuf[outPtr++] = c; fract_loop: while (true) { if (_inputPtr >= _inputEnd && !loadMore()) { eof = true; break fract_loop; } c = _inputBuffer[_inputPtr++]; if (c < INT_0 || c > INT_9) { break fract_loop; } ++fractLen; if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } outBuf[outPtr++] = c; } // must be followed by sequence of ints, one minimum if (fractLen == 0) { reportUnexpectedNumberChar(c, "Decimal point not followed by a digit"); } } int expLen = 0; if (c == 'e' || c == 'E') { // exponent? if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } outBuf[outPtr++] = c; // Not optional, can require that we get one more char c = (_inputPtr < _inputEnd) ? _inputBuffer[_inputPtr++] : getNextChar("expected a digit for number exponent"); // Sign indicator? if (c == '-' || c == '+') { if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } outBuf[outPtr++] = c; // Likewise, non optional: c = (_inputPtr < _inputEnd) ? _inputBuffer[_inputPtr++] : getNextChar("expected a digit for number exponent"); } exp_loop: while (c <= INT_9 && c >= INT_0) { ++expLen; if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } outBuf[outPtr++] = c; if (_inputPtr >= _inputEnd && !loadMore()) { eof = true; break exp_loop; } c = _inputBuffer[_inputPtr++]; } // must be followed by sequence of ints, one minimum if (expLen == 0) { reportUnexpectedNumberChar(c, "Exponent indicator not followed by a digit"); } } // Ok; unless we hit end-of-input, need to push last char read back if (!eof) { --_inputPtr; } _textBuffer.setCurrentLength(outPtr); // And there we have it! return reset(negative, intLen, fractLen, expLen); } /** * Method called when we have seen one zero, and want to ensure * it is not followed by another */ private char _verifyNoLeadingZeroes() throws IOException, JsonParseException { // Ok to have plain "0" if (_inputPtr >= _inputEnd && !loadMore()) { return '0'; } char ch = _inputBuffer[_inputPtr]; // if not followed by a number (probably '.'); return zero as is, to be included if (ch < '0' || ch > '9') { return '0'; } if (!isEnabled(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS)) { reportInvalidNumber("Leading zeroes not allowed"); } // if so, just need to skip either all zeroes (if followed by number); or all but one (if non-number) ++_inputPtr; // Leading zero to be skipped if (ch == INT_0) { while (_inputPtr < _inputEnd || loadMore()) { ch = _inputBuffer[_inputPtr]; if (ch < '0' || ch > '9') { // followed by non-number; retain one zero return '0'; } ++_inputPtr; // skip previous zero if (ch != '0') { // followed by other number; return break; } } } return ch; } /** * Method called if expected numeric value (due to leading sign) does not * look like a number */ protected JsonToken _handleInvalidNumberStart(int ch, boolean negative) throws IOException, JsonParseException { if (ch == 'I') { if (_inputPtr >= _inputEnd) { if (!loadMore()) { _reportInvalidEOFInValue(); } } ch = _inputBuffer[_inputPtr++]; if (ch == 'N') { String match = negative ? "-INF" : "+INF"; _matchToken(match, 3); if (isEnabled(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS)) { return resetAsNaN(match, negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY); } _reportError("Non-standard token '" + match + "': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow"); } else if (ch == 'n') { String match = negative ? "-Infinity" : "+Infinity"; _matchToken(match, 3); if (isEnabled(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS)) { return resetAsNaN(match, negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY); } _reportError("Non-standard token '" + match + "': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow"); } } reportUnexpectedNumberChar(ch, "expected digit (0-9) to follow minus sign, for valid numeric value"); return null; } /* /********************************************************** /* Internal methods, secondary parsing /********************************************************** */ protected String _parseFieldName(int i) throws IOException, JsonParseException { /* First: let's try to see if we have a simple name: one that does * not cross input buffer boundary, and does not contain escape * sequences. */ int ptr = _inputPtr; int hash = 0; final int inputLen = _inputEnd; if (ptr < inputLen) { do { int ch = _inputBuffer[ptr]; if (ch == INT_APOSTROPHE) { int start = _inputPtr; _inputPtr = ptr + 1; // to skip the quote return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash); } else if (ch == '!') { break; // escape, can't take the fast path } hash = (hash * 31) + ch; ++ptr; } while (ptr < inputLen); } int start = _inputPtr; _inputPtr = ptr; return _parseFieldName2(start, hash, INT_APOSTROPHE); } private String _parseFieldName2(int startPtr, int hash, int endChar) throws IOException, JsonParseException { _textBuffer.resetWithShared(_inputBuffer, startPtr, (_inputPtr - startPtr)); /* Output pointers; calls will also ensure that the buffer is * not shared and has room for at least one more char. */ char[] outBuf = _textBuffer.getCurrentSegment(); int outPtr = _textBuffer.getCurrentSegmentSize(); while (true) { if (_inputPtr >= _inputEnd) { if (!loadMore()) { _reportInvalidEOF(": was expecting closing '" + ((char) endChar) + "' for name"); } } char c = _inputBuffer[_inputPtr++]; if (c == endChar) { break; } else if (c == '!') { c = _decodeEscaped(); } hash = (hash * 31) + c; // Ok, let's add char to output: outBuf[outPtr++] = c; // Need more room? if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } } _textBuffer.setCurrentLength(outPtr); { TextBuffer tb = _textBuffer; char[] buf = tb.getTextBuffer(); int start = tb.getTextOffset(); int len = tb.size(); return _symbols.findSymbol(buf, start, len, hash); } } /** * Method called when we see non-white space character other * than double quote, when expecting a field name. * In standard mode will just throw an expection; but * in non-standard modes may be able to parse name. */ protected String _parseUnquotedFieldName(int i) throws IOException, JsonParseException { // Also: first char must be a valid name char, but NOT be number boolean firstOk = IdentifierUtils.isIdStartLenient(i); if (!firstOk) { _reportUnexpectedChar(i, "was expecting either valid name character (for unquoted name) or single-quote (for quoted) to start field name"); } int ptr = _inputPtr; int hash = 0; final int inputLen = _inputEnd; if (ptr < inputLen) { do { int ch = _inputBuffer[ptr]; if (!IdentifierUtils.isIdCharLenient(ch)) { int start = _inputPtr - 1; // -1 to bring back first char _inputPtr = ptr; return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash); } hash = (hash * 31) + ch; ++ptr; } while (ptr < inputLen); } int start = _inputPtr - 1; _inputPtr = ptr; return _parseUnquotedFieldName2(start, hash); } /** * Method for handling cases where first non-space character * of an expected value token is not legal for standard JSON content. */ protected JsonToken _handleUnexpectedValue(int i) throws IOException, JsonParseException { // Most likely an error, unless we are to allow single-quote-strings switch (i) { case 'N': _matchToken("NaN", 1); if (isEnabled(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS)) { return resetAsNaN("NaN", Double.NaN); } _reportError("Non-standard token 'NaN': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow"); break; case '+': // note: '-' is taken as number if (_inputPtr >= _inputEnd) { if (!loadMore()) { _reportInvalidEOFInValue(); } } return _handleInvalidNumberStart(_inputBuffer[_inputPtr++], false); } _reportUnexpectedChar(i, "expected a valid value (number, String, array, object, 'true', 'false' or 'null')"); return null; } private String _parseUnquotedFieldName2(int startPtr, int hash) throws IOException, JsonParseException { _textBuffer.resetWithShared(_inputBuffer, startPtr, (_inputPtr - startPtr)); char[] outBuf = _textBuffer.getCurrentSegment(); int outPtr = _textBuffer.getCurrentSegmentSize(); while (true) { if (_inputPtr >= _inputEnd) { if (!loadMore()) { // acceptable for now (will error out later) break; } } char c = _inputBuffer[_inputPtr]; if (!IdentifierUtils.isIdCharLenient(c)) { break; } ++_inputPtr; hash = (hash * 31) + c; // Ok, let's add char to output: outBuf[outPtr++] = c; // Need more room? if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } } _textBuffer.setCurrentLength(outPtr); { TextBuffer tb = _textBuffer; char[] buf = tb.getTextBuffer(); int start = tb.getTextOffset(); int len = tb.size(); return _symbols.findSymbol(buf, start, len, hash); } } protected void _parseUnquotedString() throws IOException, JsonParseException { /* First: let's try to see if we have simple String value: one * that does not cross input buffer boundary, and does not * contain escape sequences. */ int ptr = _inputPtr; final int inputLen = _inputEnd; if (ptr < inputLen) { do { char ch = _inputBuffer[ptr]; if (!IdentifierUtils.isIdCharLenient(ch)) { _textBuffer.resetWithShared(_inputBuffer, _inputPtr, (ptr - _inputPtr)); _inputPtr = ptr; // Yes, we got it all return; } ++ptr; } while (ptr < inputLen); } /* Either ran out of input, or bumped into an escape * sequence... */ _textBuffer.resetWithCopy(_inputBuffer, _inputPtr, (ptr - _inputPtr)); _inputPtr = ptr; _parseUnquotedString2(); } protected void _parseUnquotedString2() throws IOException, JsonParseException { char[] outBuf = _textBuffer.getCurrentSegment(); int outPtr = _textBuffer.getCurrentSegmentSize(); while (true) { if (_inputPtr >= _inputEnd && !loadMore()) { break; // eof } char c = _inputBuffer[_inputPtr]; if (!IdentifierUtils.isIdCharLenient(c)) { break; } _inputPtr++; // Need more room? if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } // Ok, let's add char to output: outBuf[outPtr++] = c; } _textBuffer.setCurrentLength(outPtr); } @Override protected void _finishString() throws IOException, JsonParseException { /* First: let's try to see if we have simple String value: one * that does not cross input buffer boundary, and does not * contain escape sequences. */ int ptr = _inputPtr; final int inputLen = _inputEnd; if (ptr < inputLen) { do { int ch = _inputBuffer[ptr]; if (ch == INT_APOSTROPHE) { _textBuffer.resetWithShared(_inputBuffer, _inputPtr, (ptr - _inputPtr)); _inputPtr = ptr + 1; // Yes, we got it all return; } else if (ch == '!') { break; // escape, can't take the fast path } ++ptr; } while (ptr < inputLen); } /* Either ran out of input, or bumped into an escape * sequence... */ _textBuffer.resetWithCopy(_inputBuffer, _inputPtr, (ptr - _inputPtr)); _inputPtr = ptr; _finishString2(); } protected void _finishString2() throws IOException, JsonParseException { char[] outBuf = _textBuffer.getCurrentSegment(); int outPtr = _textBuffer.getCurrentSegmentSize(); while (true) { if (_inputPtr >= _inputEnd) { if (!loadMore()) { _reportInvalidEOF(": was expecting closing quote for a string value"); } } char c = _inputBuffer[_inputPtr++]; if (c == INT_APOSTROPHE) { break; } else if (c == '!') { c = _decodeEscaped(); } // Need more room? if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } // Ok, let's add char to output: outBuf[outPtr++] = c; } _textBuffer.setCurrentLength(outPtr); } /** * Method called to skim through rest of unparsed String value, * if it is not needed. This can be done bit faster if contents * need not be stored for future access. */ protected void _skipString() throws IOException, JsonParseException { _tokenIncomplete = false; int inputPtr = _inputPtr; int inputLen = _inputEnd; char[] inputBuffer = _inputBuffer; while (true) { if (inputPtr >= inputLen) { _inputPtr = inputPtr; if (!loadMore()) { _reportInvalidEOF(": was expecting closing quote for a string value"); } inputPtr = _inputPtr; inputLen = _inputEnd; } char c = inputBuffer[inputPtr++]; if (c == INT_APOSTROPHE) { _inputPtr = inputPtr; break; } else if (c == '!') { _inputPtr = inputPtr; _decodeEscaped(); inputPtr = _inputPtr; inputLen = _inputEnd; } } } /* /********************************************************** /* Internal methods, other parsing /********************************************************** */ private int _nextCh() throws IOException, JsonParseException { if (_inputPtr < _inputEnd || loadMore()) { return _inputBuffer[_inputPtr++]; } throw _constructError( "Unexpected end-of-input within/between " + _parsingContext.getTypeDesc() + " entries"); } private int _nextChOrEnd() throws IOException, JsonParseException { if ((_inputPtr < _inputEnd) || loadMore()) { return _inputBuffer[_inputPtr++]; } return -1; } @Override protected char _decodeEscaped() throws IOException, JsonParseException { if (_inputPtr >= _inputEnd) { if (!loadMore()) { _reportInvalidEOF(" in character escape sequence"); } } char c = _inputBuffer[_inputPtr++]; if (c == '!' || c == '\'') { return c; } return _handleUnrecognizedCharacterEscape(c); } /** * Helper method for checking whether input matches expected token */ protected void _matchToken(String matchStr, int i) throws IOException, JsonParseException { final int len = matchStr.length(); do { if (_inputPtr >= _inputEnd) { if (!loadMore()) { _reportInvalidEOFInValue(); } } if (_inputBuffer[_inputPtr] != matchStr.charAt(i)) { _reportInvalidToken(matchStr.substring(0, i), "'null', 'true', 'false' or NaN"); } ++_inputPtr; } while (++i < len); // but let's also ensure we either get EOF, or non-alphanum char... if (_inputPtr >= _inputEnd) { if (!loadMore()) { return; } } char c = _inputBuffer[_inputPtr]; if (c < '0' || c == ')') { // expected/allowed chars return; } // if letter, it's a problem tho if (IdentifierUtils.isIdCharLenient(c)) { _reportInvalidToken(matchStr.substring(0, i), "'null', 'true', 'false' or NaN"); } } /* /********************************************************** /* Binary access /********************************************************** */ /** * Efficient handling for incremental parsing of base64-encoded * textual content. */ protected byte[] _decodeBase64(Base64Variant b64variant) throws IOException, JsonParseException { ByteArrayBuilder builder = _getByteArrayBuilder(); //main_loop: while (true) { // first, we'll skip preceding white space, if any char ch; do { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } ch = _inputBuffer[_inputPtr++]; } while (ch <= INT_SPACE); int bits = b64variant.decodeBase64Char(ch); if (bits < 0) { if (ch == INT_APOSTROPHE) { // reached the end, fair and square? return builder.toByteArray(); } bits = _decodeBase64Escape2(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; } } int decodedData = bits; // then second base64 char; can't get padding yet, nor ws if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } ch = _inputBuffer[_inputPtr++]; bits = b64variant.decodeBase64Char(ch); if (bits < 0) { bits = _decodeBase64Escape2(b64variant, ch, 1); } decodedData = (decodedData << 6) | bits; // third base64 char; can be padding, but not ws if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } ch = _inputBuffer[_inputPtr++]; bits = b64variant.decodeBase64Char(ch); // First branch: can get padding (-> 1 byte) if (bits < 0) { if (bits != Base64Variant.BASE64_VALUE_PADDING) { // as per [JACKSON-631], could also just be 'missing' padding if (ch == INT_APOSTROPHE && !b64variant.usesPadding()) { decodedData >>= 4; builder.append(decodedData); return builder.toByteArray(); } bits = _decodeBase64Escape2(b64variant, ch, 2); } if (bits == Base64Variant.BASE64_VALUE_PADDING) { // Ok, must get more padding chars, then if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } ch = _inputBuffer[_inputPtr++]; if (!b64variant.usesPaddingChar(ch)) { throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '" + b64variant.getPaddingChar() + "'"); } // Got 12 bits, only need 8, need to shift decodedData >>= 4; builder.append(decodedData); continue; } // otherwise we got escaped other char, to be processed below } // Nope, 2 or 3 bytes decodedData = (decodedData << 6) | bits; // fourth and last base64 char; can be padding, but not ws if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } ch = _inputBuffer[_inputPtr++]; bits = b64variant.decodeBase64Char(ch); if (bits < 0) { if (bits != Base64Variant.BASE64_VALUE_PADDING) { // as per [JACKSON-631], could also just be 'missing' padding if (ch == INT_APOSTROPHE && !b64variant.usesPadding()) { decodedData >>= 2; builder.appendTwoBytes(decodedData); return builder.toByteArray(); } bits = _decodeBase64Escape2(b64variant, ch, 3); } if (bits == Base64Variant.BASE64_VALUE_PADDING) { // With padding we only get 2 bytes; but we have // to shift it a bit so it is identical to triplet // case with partial output. // 3 chars gives 3x6 == 18 bits, of which 2 are // dummies, need to discard: decodedData >>= 2; builder.appendTwoBytes(decodedData); continue; } // otherwise we got escaped other char, to be processed below } // otherwise, our triplet is now complete decodedData = (decodedData << 6) | bits; builder.appendThreeBytes(decodedData); } } protected int _decodeBase64Escape2(Base64Variant b64variant, char ch, int index) throws IOException, JsonParseException { // if white space, skip if first triplet; otherwise errors if (ch <= INT_SPACE) { if (index == 0) { // whitespace only allowed to be skipped between triplets return -1; } } // 17-May-2011, tatu: As per [JACKSON-xxx], need to handle escaped chars if (ch != '!') { throw reportInvalidBase64Char(b64variant, ch, index); } char unescaped = _decodeEscaped(); // otherwise try to find actual triplet value int bits = b64variant.decodeBase64Char(unescaped); if (bits < 0) { throw reportInvalidBase64Char(b64variant, unescaped, index); } return bits; } /* /********************************************************** /* Error reporting /********************************************************** */ protected void _reportInvalidToken(String matchedPart, String msg) throws IOException, JsonParseException { StringBuilder sb = new StringBuilder(matchedPart); /* Let's just try to find what appears to be the token, using * regular Java identifier character rules. It's just a heuristic, * nothing fancy here. */ while (true) { if (_inputPtr >= _inputEnd) { if (!loadMore()) { break; } } char c = _inputBuffer[_inputPtr]; if (!IdentifierUtils.isIdCharLenient(c)) { break; } ++_inputPtr; sb.append(c); } _reportError("Unrecognized token '" + sb.toString() + "': was expecting "); } }