Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.phoenix.util; import java.util.Arrays; import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.phoenix.exception.UndecodableByteException; import org.apache.phoenix.schema.SortOrder; import com.google.common.base.Preconditions; public class StringUtil { public static final String EMPTY_STRING = ""; // Masks to determine how many bytes are in each character // From http://tools.ietf.org/html/rfc3629#section-3 public static final byte SPACE_UTF8 = 0x20; private static final int BYTES_1_MASK = 0xFF << 7; // 0xxxxxxx is a single byte char private static final int BYTES_2_MASK = 0xFF << 5; // 110xxxxx is a double byte char private static final int BYTES_3_MASK = 0xFF << 4; // 1110xxxx is a triple byte char private static final int BYTES_4_MASK = 0xFF << 3; // 11110xxx is a quadruple byte char public static final byte INVERTED_SPACE_UTF8 = SortOrder.invert(new byte[] { SPACE_UTF8 }, 0, new byte[1], 0, 1)[0]; public final static char SINGLE_CHAR_WILDCARD = '?'; public final static char SINGLE_CHAR_LIKE = '_'; public final static char MULTI_CHAR_WILDCARD = '*'; public final static char MULTI_CHAR_LIKE = '%'; public final static String[] LIKE_ESCAPE_SEQS = new String[] { "\\" + SINGLE_CHAR_LIKE, "\\" + MULTI_CHAR_LIKE }; public final static String[] LIKE_UNESCAPED_SEQS = new String[] { "" + SINGLE_CHAR_LIKE, "" + MULTI_CHAR_LIKE }; private StringUtil() { } /** Replace instances of character ch in String value with String replacement */ public static String replaceChar(String value, char ch, CharSequence replacement) { if (value == null) return null; int i = value.indexOf(ch); if (i == -1) return value; // nothing to do // we've got at least one character to replace StringBuilder buf = new StringBuilder(value.length() + 16); // some extra space int j = 0; while (i != -1) { buf.append(value, j, i).append(replacement); j = i + 1; i = value.indexOf(ch, j); } if (j < value.length()) buf.append(value, j, value.length()); return buf.toString(); } /** * @return the replacement of all occurrences of src[i] with target[i] in s. Src and target are not regex's so this * uses simple searching with indexOf() */ public static String replace(String s, String[] src, String[] target) { assert src != null && target != null && src.length > 0 && src.length == target.length; if (src.length == 1 && src[0].length() == 1) { return replaceChar(s, src[0].charAt(0), target[0]); } if (s == null) return null; StringBuilder sb = new StringBuilder(s.length()); int pos = 0; int limit = s.length(); int lastMatch = 0; while (pos < limit) { boolean matched = false; for (int i = 0; i < src.length; i++) { if (s.startsWith(src[i], pos) && src[i].length() > 0) { // we found a matching pattern - append the acculumation plus the replacement sb.append(s.substring(lastMatch, pos)).append(target[i]); pos += src[i].length(); lastMatch = pos; matched = true; break; } } if (!matched) { // we didn't match any patterns, so move forward 1 character pos++; } } // see if we found any matches if (lastMatch == 0) { // we didn't match anything, so return the source string return s; } // apppend the trailing portion sb.append(s.substring(lastMatch)); return sb.toString(); } public static int getBytesInChar(byte b, SortOrder sortOrder) { int ret = getBytesInCharNoException(b, sortOrder); if (ret == -1) throw new UndecodableByteException(b); return ret; } private static int getBytesInCharNoException(byte b, SortOrder sortOrder) { Preconditions.checkNotNull(sortOrder); if (sortOrder == SortOrder.DESC) { b = SortOrder.invert(b); } int c = b & 0xff; if ((c & BYTES_1_MASK) == 0) return 1; if ((c & BYTES_2_MASK) == 0xC0) return 2; if ((c & BYTES_3_MASK) == 0xE0) return 3; if ((c & BYTES_4_MASK) == 0xF0) return 4; return -1; } public static int calculateUTF8Length(byte[] bytes, int offset, int length, SortOrder sortOrder) { int i = offset, endOffset = offset + length; length = 0; while (i < endOffset) { int charLength = getBytesInChar(bytes[i], sortOrder); i += charLength; length++; } return length; } // given an array of bytes containing utf-8 encoded strings, starting from curPos, ending before // range, and return the next character offset, -1 if no next character available or // UndecodableByteException private static int calculateNextCharOffset(byte[] bytes, int curPos, int range, SortOrder sortOrder) { int ret = getBytesInCharNoException(bytes[curPos], sortOrder); if (ret == -1) return -1; ret += curPos; if (ret >= range) return -1; return ret; } // given an array of bytes containing utf-8 encoded strings, starting from offset, and return // the previous character offset , -1 if UndecodableByteException. curPos points to current // character starting offset. private static int calculatePreCharOffset(byte[] bytes, int curPos, int offset, SortOrder sortOrder) { --curPos; for (int i = 1, pos = curPos - i + 1; i <= 4 && offset <= pos; ++i, --pos) { int ret = getBytesInCharNoException(bytes[pos], sortOrder); if (ret == i) return pos; } return -1; } // return actural offsetInBytes corresponding to offsetInStr in utf-8 encoded strings bytes // containing // @param bytes an array of bytes containing utf-8 encoded strings // @param offset // @param length // @param sortOrder // @param offsetInStr offset for utf-8 encoded strings bytes array containing. Can be negative // meaning counting from the end of encoded strings // @return actural offsetInBytes corresponding to offsetInStr. -1 if offsetInStr is out of index public static int calculateUTF8Offset(byte[] bytes, int offset, int length, SortOrder sortOrder, int offsetInStr) { if (offsetInStr == 0) return offset; int ret, range = offset + length; if (offsetInStr > 0) { ret = offset; while (offsetInStr > 0) { ret = calculateNextCharOffset(bytes, ret, range, sortOrder); if (ret == -1) return -1; --offsetInStr; } } else { ret = offset + length; while (offsetInStr < 0) { ret = calculatePreCharOffset(bytes, ret, offset, sortOrder); // if calculateCurCharOffset returns -1, ret must be smaller than offset if (ret < offset) return -1; ++offsetInStr; } } return ret; } // Given an array of bytes containing encoding utf-8 encoded strings, the offset and a length // parameter, return the actual index into the byte array which would represent a substring // of <length> starting from the character at <offset>. We assume the <offset> is the start // byte of an UTF-8 character. public static int getByteLengthForUtf8SubStr(byte[] bytes, int offset, int length, SortOrder sortOrder) { int byteLength = 0; while (length > 0 && offset + byteLength < bytes.length) { int charLength = getBytesInChar(bytes[offset + byteLength], sortOrder); byteLength += charLength; length--; } return byteLength; } public static boolean hasMultiByteChars(String s) { for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c > 0x007F) { return true; } } return false; } public static int getFirstNonBlankCharIdxFromStart(byte[] string, int offset, int length, SortOrder sortOrder) { int i = offset; byte space = sortOrder == SortOrder.ASC ? SPACE_UTF8 : INVERTED_SPACE_UTF8; for (; i < offset + length; i++) { if (string[i] != space) { break; } } return i; } public static int getFirstNonBlankCharIdxFromEnd(byte[] string, int offset, int length, SortOrder sortOrder) { int i = offset + length - 1; byte space = sortOrder == SortOrder.ASC ? SPACE_UTF8 : INVERTED_SPACE_UTF8; for (; i >= offset; i--) { if (string[i] != space) { break; } } return i; } // A toBytes function backed up HBase's utility function, but would accept null input, in which // case it returns an empty byte array. public static byte[] toBytes(String input) { if (input == null) { return ByteUtil.EMPTY_BYTE_ARRAY; } return Bytes.toBytes(input); } public static String escapeLike(String s) { return replace(s, LIKE_UNESCAPED_SEQS, LIKE_ESCAPE_SEQS); } public static int getUnpaddedCharLength(byte[] b, int offset, int length, SortOrder sortOrder) { return getFirstNonBlankCharIdxFromEnd(b, offset, length, sortOrder) - offset + 1; } public static byte[] padChar(byte[] value, Integer byteSize) { byte[] newValue = Arrays.copyOf(value, byteSize); if (newValue.length > value.length) { Arrays.fill(newValue, value.length, newValue.length, SPACE_UTF8); } return newValue; } /** * Lame - StringBuilder.equals is retarded. * @param b1 * @param b2 * @return whether or not the two builders consist the same sequence of characters */ public static boolean equals(StringBuilder b1, StringBuilder b2) { if (b1.length() != b2.length()) { return false; } for (int i = 0; i < b1.length(); i++) { if (b1.charAt(i) != b2.charAt(i)) { return false; } } return true; } /** * LPAD implementation * * @param str * array containing string to be left padded * @param strOffset * byte offset of string * @param strLength * byte length of string * @param fill * array containing fill values * @param fillOffset * byte offset of fill * @param fillLength * byte length of fill * @param invertFill * if true inverts the bits in fill before filling the array * @param strWithPaddingLen * length of the string that is returned with fill values left padded * @return byte[] containing left padded string */ public static byte[] lpad(byte[] str, int strOffset, int strLength, byte[] fill, int fillOffset, int fillLength, boolean invertFill, int strWithPaddingLen) { byte[] paddedStr = new byte[strWithPaddingLen]; int fillStopIdx = strWithPaddingLen - strLength; // copy fill into the start of paddedStr fill(paddedStr, 0, fillStopIdx, fill, fillOffset, fillOffset + fillLength, invertFill); // fill remaining characters with original string System.arraycopy(str, strOffset, paddedStr, fillStopIdx, strLength); return paddedStr; } /** * Assigns the specified byte values to elements of the specified range of the specified array of bytes. The range * to be filled extends from index fromIndex, inclusive, to index toIndex, exclusive. (If * fromIndex==toIndex, the range to be filled is empty.) * * @param str * the array to be filled * @param strFromIdx * the index of the first element (inclusive) to be filled with the fill values * @param strToIdx * the index of the last element (exclusive) to be filled with the fill values * @param fillArray * the values to be stored in all elements of the array * @param fillFromIdx * the index of the first element (inclusive) to be used as fill values * @param filToIdx * the index of the last element (exclusive) to be used as fill value * @param invertFill * if true inverts the bits in fill before filling the array */ public static void fill(byte[] str, int strFromIdx, int strToIdx, byte[] fillArray, int fillFromIdx, int fillToIdx, boolean invertFill) { rangeCheck(str.length, strFromIdx, strToIdx); rangeCheck(fillArray.length, fillFromIdx, fillToIdx); int strIdx = strFromIdx; byte[] fill = fillArray; int fillLen = fillToIdx - fillFromIdx; if (invertFill) fill = SortOrder.invert(fillArray, fillFromIdx, fillLen); while (strIdx < strToIdx) { int fillIdx = fillFromIdx; while (fillIdx < fillToIdx && strIdx < strToIdx) { if (strIdx + fillLen < fillToIdx) { System.arraycopy(fill, fillFromIdx, str, strIdx, fillLen); } else { str[strIdx++] = fill[fillIdx++]; } } } } /** * Checks that fromIndex and toIndex are in the range and throws an appropriate exception, if they * are not */ private static void rangeCheck(int length, int fromIndex, int toIndex) { if (fromIndex > toIndex) { throw new IllegalArgumentException("fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")"); } if (fromIndex < 0) { throw new ArrayIndexOutOfBoundsException(fromIndex); } if (toIndex > length) { throw new ArrayIndexOutOfBoundsException(toIndex); } } public static String escapeStringConstant(String pattern) { return StringEscapeUtils.escapeSql(pattern); // Need to escape double quotes } public static String escapeBackslash(String input) { // see http://stackoverflow.com/questions/4653831/regex-how-to-escape-backslashes-and-special-characters return input.replaceAll("\\\\", "\\\\\\\\"); } }