Java tutorial
/* * Copyright 1999-2101 Alibaba Group Holding Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.druid.sql.parser; import static com.alibaba.druid.sql.parser.CharTypes.isFirstIdentifierChar; import static com.alibaba.druid.sql.parser.CharTypes.isIdentifierChar; import static com.alibaba.druid.sql.parser.CharTypes.isWhitespace; import static com.alibaba.druid.sql.parser.LayoutCharacters.EOI; import static com.alibaba.druid.sql.parser.Token.COLONCOLON; import static com.alibaba.druid.sql.parser.Token.COLONEQ; import static com.alibaba.druid.sql.parser.Token.COMMA; import static com.alibaba.druid.sql.parser.Token.EOF; import static com.alibaba.druid.sql.parser.Token.ERROR; import static com.alibaba.druid.sql.parser.Token.LBRACE; import static com.alibaba.druid.sql.parser.Token.LBRACKET; import static com.alibaba.druid.sql.parser.Token.LITERAL_ALIAS; import static com.alibaba.druid.sql.parser.Token.LITERAL_CHARS; import static com.alibaba.druid.sql.parser.Token.LPAREN; import static com.alibaba.druid.sql.parser.Token.RBRACE; import static com.alibaba.druid.sql.parser.Token.RBRACKET; import static com.alibaba.druid.sql.parser.Token.RPAREN; import java.math.BigDecimal; import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import com.alibaba.druid.util.StringUtils; import org.apache.commons.lang.math.NumberUtils; /** * @author wenshao [szujobs@hotmail.com] */ public class Lexer { protected final String text; protected int pos; protected int mark; protected char ch; protected char[] buf; protected int bufPos; protected Token token; protected Keywords keywods = Keywords.DEFAULT_KEYWORDS; protected String stringVal; protected List<String> comments = new ArrayList<String>(2); protected boolean skipComment = true; private SavePoint savePoint = null; /* * anti sql injection */ private boolean allowComment = true; private int varIndex = -1; protected CommentHandler commentHandler; protected boolean endOfComment = false; protected boolean keepComments = false; protected int line = 0; protected int lines = 0; public Lexer(String input) { this(input, null); } public Lexer(String input, CommentHandler commentHandler) { this(input, true); this.commentHandler = commentHandler; } public boolean isKeepComments() { return keepComments; } public void setKeepComments(boolean keepComments) { this.keepComments = keepComments; } public CommentHandler getCommentHandler() { return commentHandler; } public void setCommentHandler(CommentHandler commentHandler) { this.commentHandler = commentHandler; } public final char charAt(int index) { if (index >= text.length()) { return EOI; } return text.charAt(index); } public final String addSymbol() { return subString(mark, bufPos); } public final String subString(int offset, int count) { return text.substring(offset, offset + count); } protected void initBuff(int size) { if (buf == null) { if (size < 32) { buf = new char[32]; } else { buf = new char[size + 32]; } } else if (buf.length < size) { buf = Arrays.copyOf(buf, size); } } public void arraycopy(int srcPos, char[] dest, int destPos, int length) { text.getChars(srcPos, srcPos + length, dest, destPos); } public boolean isAllowComment() { return allowComment; } public void setAllowComment(boolean allowComment) { this.allowComment = allowComment; } public int nextVarIndex() { return ++varIndex; } private static class SavePoint { int bp; int sp; int np; char ch; Token token; } public Keywords getKeywods() { return keywods; } public void mark() { SavePoint savePoint = new SavePoint(); savePoint.bp = pos; savePoint.sp = bufPos; savePoint.np = mark; savePoint.ch = ch; savePoint.token = token; this.savePoint = savePoint; } public void reset() { this.pos = savePoint.bp; this.bufPos = savePoint.sp; this.mark = savePoint.np; this.ch = savePoint.ch; this.token = savePoint.token; } public Lexer(String input, boolean skipComment) { this.skipComment = skipComment; this.text = input; this.pos = -1; scanChar(); } public Lexer(char[] input, int inputLength, boolean skipComment) { this(new String(input, 0, inputLength), skipComment); } protected final void scanChar() { ch = charAt(++pos); } protected void unscan() { ch = charAt(--pos); } public boolean isEOF() { return pos >= text.length(); } /** * Report an error at the given position using the provided arguments. */ protected void lexError(String key, Object... args) { token = ERROR; } /** * Return the current token, set by nextToken(). */ public final Token token() { return token; } public String info() { return this.token + " " + this.stringVal(); } public final void nextTokenComma() { if (ch == ' ') { scanChar(); } if (ch == ',' || ch == '') { scanChar(); token = COMMA; return; } if (ch == ')') { scanChar(); token = RPAREN; return; } nextToken(); } public final void nextTokenLParen() { if (ch == ' ') { scanChar(); } if (ch == '(') { scanChar(); token = LPAREN; return; } nextToken(); } public final void nextTokenValue() { if (ch == ' ') { scanChar(); } if (ch == '\'') { bufPos = 0; scanString(); return; } if (ch >= '0' && ch <= '9') { bufPos = 0; scanNumber(); return; } if (ch == '?') { scanChar(); token = Token.QUES; return; } if (isFirstIdentifierChar(ch) && ch != 'N') { scanIdentifier(); return; } nextToken(); } public final void nextToken() { bufPos = 0; if (comments != null) { comments = null; } this.lines = 0; int startLine = line; for (;;) { if (isWhitespace(ch)) { if (ch == '\n') { line++; lines = line - startLine; } scanChar(); continue; } if (ch == '$' && charAt(pos + 1) == '{') { scanVariable(); return; } if (isFirstIdentifierChar(ch)) { if (ch == 'N') { if (charAt(pos + 1) == '\'') { ++pos; ch = '\''; scanString(); token = Token.LITERAL_NCHARS; return; } } scanIdentifier(); return; } switch (ch) { case '0': if (charAt(pos + 1) == 'x') { scanChar(); scanChar(); scanHexaDecimal(); } else { scanNumber(); } return; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': scanNumber(); return; case ',': case '': scanChar(); token = COMMA; return; case '(': scanChar(); token = LPAREN; return; case ')': scanChar(); token = RPAREN; return; case '[': scanLBracket(); return; case ']': scanChar(); token = RBRACKET; return; case '{': scanChar(); token = LBRACE; return; case '}': scanChar(); token = RBRACE; return; case ':': scanChar(); if (ch == '=') { scanChar(); token = COLONEQ; } else if (ch == ':') { scanChar(); token = COLONCOLON; } else { if (isDigit(ch)) { unscan(); scanVariable(); } else { unscan(); scanVariable(); } } return; case '#': scanSharp(); if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT) && skipComment) { bufPos = 0; continue; } return; case '.': scanChar(); if (isDigit(ch) && !isFirstIdentifierChar(charAt(pos - 2))) { unscan(); scanNumber(); return; } else if (ch == '.') { scanChar(); if (ch == '.') { scanChar(); token = Token.DOTDOTDOT; } else { token = Token.DOTDOT; } } else { token = Token.DOT; } return; case '\'': scanString(); return; case '\"': scanAlias(); return; case '*': scanChar(); token = Token.STAR; return; case '?': scanChar(); token = Token.QUES; return; case ';': scanChar(); token = Token.SEMI; return; case '`': throw new ParserException("TODO"); // TODO case '@': scanVariable(); return; case '-': int subNextChar = charAt(pos + 1); if (subNextChar == '-') { scanComment(); if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT) && skipComment) { bufPos = 0; continue; } } else { scanOperator(); } return; case '/': int nextChar = charAt(pos + 1); if (nextChar == '/' || nextChar == '*') { scanComment(); if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT) && skipComment) { bufPos = 0; continue; } } else { token = Token.SLASH; scanChar(); } return; default: if (Character.isLetter(ch)) { scanIdentifier(); return; } if (isOperator(ch)) { scanOperator(); return; } // QS_TODO ? if (isEOF()) { // JLS token = EOF; } else { lexError("illegal.char", String.valueOf((int) ch)); scanChar(); } return; } } } protected void scanLBracket() { scanChar(); token = LBRACKET; } private final void scanOperator() { switch (ch) { case '+': scanChar(); token = Token.PLUS; break; case '-': scanChar(); if (ch == '>') { scanChar(); if (ch == '>') { scanChar(); token = Token.SUBGTGT; } else { token = Token.SUBGT; } } else { token = Token.SUB; } break; case '*': scanChar(); token = Token.STAR; break; case '/': scanChar(); token = Token.SLASH; break; case '&': scanChar(); if (ch == '&') { scanChar(); token = Token.AMPAMP; } else { token = Token.AMP; } break; case '|': scanChar(); if (ch == '|') { scanChar(); if (ch == '/') { scanChar(); token = Token.BARBARSLASH; } else { token = Token.BARBAR; } } else if (ch == '/') { scanChar(); token = Token.BARSLASH; } else { token = Token.BAR; } break; case '^': scanChar(); token = Token.CARET; break; case '%': scanChar(); token = Token.PERCENT; break; case '=': scanChar(); if (ch == '=') { scanChar(); token = Token.EQEQ; } else { token = Token.EQ; } break; case '>': scanChar(); if (ch == '=') { scanChar(); token = Token.GTEQ; } else if (ch == '>') { scanChar(); token = Token.GTGT; } else { token = Token.GT; } break; case '<': scanChar(); if (ch == '=') { scanChar(); if (ch == '>') { token = Token.LTEQGT; scanChar(); } else { token = Token.LTEQ; } } else if (ch == '>') { scanChar(); token = Token.LTGT; } else if (ch == '<') { scanChar(); token = Token.LTLT; } else { token = Token.LT; } break; case '!': scanChar(); if (ch == '=') { scanChar(); token = Token.BANGEQ; } else if (ch == '>') { scanChar(); token = Token.BANGGT; } else if (ch == '<') { scanChar(); token = Token.BANGLT; } else if (ch == '!') { scanChar(); token = Token.BANGBANG; // postsql } else { token = Token.BANG; } break; case '?': scanChar(); token = Token.QUES; break; case '~': scanChar(); token = Token.TILDE; break; default: throw new ParserException("TODO"); } } protected void scanString() { mark = pos; boolean hasSpecial = false; for (;;) { if (isEOF()) { lexError("unclosed.str.lit"); return; } ch = charAt(++pos); if (ch == '\'') { scanChar(); if (ch != '\'') { token = LITERAL_CHARS; break; } else { if (!hasSpecial) { initBuff(bufPos); arraycopy(mark + 1, buf, 0, bufPos); hasSpecial = true; } putChar('\''); continue; } } if (!hasSpecial) { bufPos++; continue; } if (bufPos == buf.length) { putChar(ch); } else { buf[bufPos++] = ch; } } if (!hasSpecial) { stringVal = subString(mark + 1, bufPos); } else { stringVal = new String(buf, 0, bufPos); } } protected final void scanString2() { { boolean hasSpecial = false; int startIndex = pos + 1; int endIndex = -1; // text.indexOf('\'', startIndex); for (int i = startIndex; i < text.length(); ++i) { final char ch = text.charAt(i); if (ch == '\\') { hasSpecial = true; continue; } if (ch == '\'') { endIndex = i; break; } } if (endIndex == -1) { throw new ParserException("unclosed str"); } String stringVal = subString(startIndex, endIndex - startIndex); // hasSpecial = stringVal.indexOf('\\') != -1; if (!hasSpecial) { this.stringVal = stringVal; int pos = endIndex + 1; char ch = charAt(pos); if (ch != '\'') { this.pos = pos; this.ch = ch; token = LITERAL_CHARS; return; } } } mark = pos; boolean hasSpecial = false; for (;;) { if (isEOF()) { lexError("unclosed.str.lit"); return; } ch = charAt(++pos); if (ch == '\\') { scanChar(); if (!hasSpecial) { initBuff(bufPos); arraycopy(mark + 1, buf, 0, bufPos); hasSpecial = true; } switch (ch) { case '0': putChar('\0'); break; case '\'': putChar('\''); break; case '"': putChar('"'); break; case 'b': putChar('\b'); break; case 'n': putChar('\n'); break; case 'r': putChar('\r'); break; case 't': putChar('\t'); break; case '\\': putChar('\\'); break; case 'Z': putChar((char) 0x1A); // ctrl + Z break; default: putChar(ch); break; } continue; } if (ch == '\'') { scanChar(); if (ch != '\'') { token = LITERAL_CHARS; break; } else { if (!hasSpecial) { initBuff(bufPos); arraycopy(mark + 1, buf, 0, bufPos); hasSpecial = true; } putChar('\''); continue; } } if (!hasSpecial) { bufPos++; continue; } if (bufPos == buf.length) { putChar(ch); } else { buf[bufPos++] = ch; } } if (!hasSpecial) { stringVal = subString(mark + 1, bufPos); } else { stringVal = new String(buf, 0, bufPos); } } private final void scanAlias() { mark = pos; if (buf == null) { buf = new char[32]; } boolean hasSpecial = false; for (;;) { if (isEOF()) { lexError("unclosed.str.lit"); return; } ch = charAt(++pos); if (ch == '\"' && charAt(pos - 1) != '\\') { scanChar(); token = LITERAL_ALIAS; break; } if (ch == '\\') { scanChar(); if (ch == '"') { hasSpecial = true; } else { unscan(); } } if (bufPos == buf.length) { putChar(ch); } else { buf[bufPos++] = ch; } } if (!hasSpecial) { stringVal = subString(mark + 1, bufPos); } else { stringVal = new String(buf, 0, bufPos); } //stringVal = subString(mark + 1, bufPos); } public void scanSharp() { scanVariable(); } public void scanVariable() { if (ch != '@' && ch != ':' && ch != '#' && ch != '$') { throw new ParserException("illegal variable"); } mark = pos; bufPos = 1; char ch; if (charAt(pos + 1) == '@') { ch = charAt(++pos); bufPos++; } else if (charAt(pos + 1) == '{') { pos++; bufPos++; for (;;) { ch = charAt(++pos); if (ch == '}') { break; } bufPos++; continue; } if (ch != '}') { throw new ParserException("syntax error"); } ++pos; bufPos++; this.ch = charAt(pos); stringVal = addSymbol(); token = Token.VARIANT; return; } for (;;) { ch = charAt(++pos); if (!isIdentifierChar(ch)) { break; } bufPos++; continue; } this.ch = charAt(pos); stringVal = addSymbol(); token = Token.VARIANT; } public void scanComment() { if (!allowComment) { throw new NotAllowCommentException(); } if ((ch == '/' && charAt(pos + 1) == '/') || (ch == '-' && charAt(pos + 1) == '-')) { scanSingleLineComment(); } else if (ch == '/' && charAt(pos + 1) == '*') { scanMultiLineComment(); } else { throw new IllegalStateException(); } } private void scanMultiLineComment() { Token lastToken = this.token; scanChar(); scanChar(); mark = pos; bufPos = 0; for (;;) { if (ch == '*' && charAt(pos + 1) == '/') { scanChar(); scanChar(); break; } // multiline comment? if (ch == EOI) { throw new ParserException("unterminated /* comment."); } scanChar(); bufPos++; } stringVal = subString(mark, bufPos); token = Token.MULTI_LINE_COMMENT; if (keepComments) { addComment(stringVal); } if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) { return; } if (!isAllowComment() && !isSafeComment(stringVal)) { throw new NotAllowCommentException(); } } private void scanSingleLineComment() { Token lastToken = this.token; scanChar(); scanChar(); mark = pos - 1; bufPos = 0; for (;;) { if (ch == '\r') { if (charAt(pos + 1) == '\n') { line++; scanChar(); break; } bufPos++; break; } if (ch == '\n') { line++; scanChar(); break; } // single line comment? if (ch == EOI) { throw new ParserException("syntax error at end of input."); } scanChar(); bufPos++; } stringVal = subString(mark, bufPos); token = Token.LINE_COMMENT; if (keepComments) { addComment(stringVal); } if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) { return; } if (!isAllowComment() && !isSafeComment(stringVal)) { throw new NotAllowCommentException(); } } public void scanIdentifier() { final char first = ch; final boolean firstFlag = isFirstIdentifierChar(first); if (!firstFlag) { throw new ParserException("illegal identifier"); } mark = pos; bufPos = 1; char ch; for (;;) { ch = charAt(++pos); if (!isIdentifierChar(ch)) { break; } bufPos++; continue; } this.ch = charAt(pos); stringVal = addSymbol(); Token tok = keywods.getKeyword(stringVal); if (tok != null) { token = tok; } else { token = Token.IDENTIFIER; } } public void scanNumber() { mark = pos; if (ch == '-') { bufPos++; ch = charAt(++pos); } for (;;) { if (ch >= '0' && ch <= '9') { bufPos++; } else { break; } ch = charAt(++pos); } boolean isDouble = false; if (ch == '.') { if (charAt(pos + 1) == '.') { token = Token.LITERAL_INT; return; } bufPos++; ch = charAt(++pos); isDouble = true; for (;;) { if (ch >= '0' && ch <= '9') { bufPos++; } else { break; } ch = charAt(++pos); } } if (ch == 'e' || ch == 'E') { bufPos++; ch = charAt(++pos); if (ch == '+' || ch == '-') { bufPos++; ch = charAt(++pos); } for (;;) { if (ch >= '0' && ch <= '9') { bufPos++; } else { break; } ch = charAt(++pos); } isDouble = true; } if (isDouble) { token = Token.LITERAL_FLOAT; } else { token = Token.LITERAL_INT; } } public void scanHexaDecimal() { mark = pos; if (ch == '-') { bufPos++; ch = charAt(++pos); } for (;;) { if (CharTypes.isHex(ch)) { bufPos++; } else { break; } ch = charAt(++pos); } token = Token.LITERAL_HEX; } public String hexString() { return subString(mark, bufPos); } public final boolean isDigit(char ch) { return ch >= '0' && ch <= '9'; } /** * Append a character to sbuf. */ protected final void putChar(char ch) { if (bufPos == buf.length) { char[] newsbuf = new char[buf.length * 2]; System.arraycopy(buf, 0, newsbuf, 0, buf.length); buf = newsbuf; } buf[bufPos++] = ch; } /** * Return the current token's position: a 0-based offset from beginning of the raw input stream (before unicode * translation) */ public final int pos() { return pos; } /** * The value of a literal token, recorded as a string. For integers, leading 0x and 'l' suffixes are suppressed. */ public final String stringVal() { return stringVal; } public final List<String> readAndResetComments() { List<String> comments = this.comments; this.comments = null; return comments; } private boolean isOperator(char ch) { switch (ch) { case '!': case '%': case '&': case '*': case '+': case '-': case '<': case '=': case '>': case '^': case '|': case '~': case ';': return true; default: return false; } } private static final long MULTMIN_RADIX_TEN = Long.MIN_VALUE / 10; private static final long N_MULTMAX_RADIX_TEN = -Long.MAX_VALUE / 10; private final static int[] digits = new int[(int) '9' + 1]; static { for (int i = '0'; i <= '9'; ++i) { digits[i] = i - '0'; } } // QS_TODO negative number is invisible for lexer public Number integerValue() { long result = 0; boolean negative = false; int i = mark, max = mark + bufPos; long limit; long multmin; int digit; if (charAt(mark) == '-') { negative = true; limit = Long.MIN_VALUE; i++; } else { limit = -Long.MAX_VALUE; } multmin = negative ? MULTMIN_RADIX_TEN : N_MULTMAX_RADIX_TEN; if (i < max) { digit = digits[charAt(i++)]; result = -digit; } while (i < max) { // Accumulating negatively avoids surprises near MAX_VALUE digit = digits[charAt(i++)]; if (result < multmin) { return new BigInteger(numberString()); } result *= 10; if (result < limit + digit) { return new BigInteger(numberString()); } result -= digit; } if (negative) { if (i > mark + 1) { if (result >= Integer.MIN_VALUE) { return (int) result; } return result; } else { /* Only got "-" */ throw new NumberFormatException(numberString()); } } else { result = -result; if (result <= Integer.MAX_VALUE) { return (int) result; } return result; } } public int bp() { return this.pos; } public char current() { return this.ch; } public void reset(int mark, char markChar, Token token) { this.pos = mark; this.ch = markChar; this.token = token; } public final String numberString() { return subString(mark, bufPos); } public BigDecimal decimalValue() { String value = subString(mark, bufPos); if (!StringUtils.isNumber(value)) { throw new ParserException(value + " is not a number!"); } return new BigDecimal(value.toCharArray()); } public static interface CommentHandler { boolean handle(Token lastToken, String comment); } public boolean hasComment() { return comments != null; } public void skipToEOF() { pos = text.length(); this.token = Token.EOF; } public boolean isEndOfComment() { return endOfComment; } protected boolean isSafeComment(String comment) { if (comment == null) { return true; } comment = comment.toLowerCase(); if (comment.indexOf("select") != -1 // || comment.indexOf("delete") != -1 // || comment.indexOf("insert") != -1 // || comment.indexOf("update") != -1 // || comment.indexOf("into") != -1 // || comment.indexOf("where") != -1 // || comment.indexOf("or") != -1 // || comment.indexOf("and") != -1 // || comment.indexOf("union") != -1 // || comment.indexOf('\'') != -1 // || comment.indexOf('=') != -1 // || comment.indexOf('>') != -1 // || comment.indexOf('<') != -1 // || comment.indexOf('&') != -1 // || comment.indexOf('|') != -1 // || comment.indexOf('^') != -1 // ) { return false; } return true; } protected void addComment(String comment) { if (comments == null) { comments = new ArrayList<String>(2); } comments.add(stringVal); } public int getLine() { return line; } }