Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.phoenix.expression.function; import java.io.DataInput; import java.io.IOException; import java.sql.SQLException; import java.text.Collator; import java.util.List; import java.util.Locale; import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.phoenix.expression.Expression; import org.apache.phoenix.parse.FunctionParseNode; import org.apache.phoenix.schema.tuple.Tuple; import org.apache.phoenix.schema.types.PBoolean; import org.apache.phoenix.schema.types.PDataType; import org.apache.phoenix.schema.types.PInteger; import org.apache.phoenix.schema.types.PVarbinary; import org.apache.phoenix.schema.types.PVarchar; import org.apache.phoenix.util.VarBinaryFormatter; import com.force.db.i18n.LinguisticSort; import com.force.i18n.LocaleUtils; /** * A Phoenix Function that calculates a collation key for an input string based * on a caller-provided locale and collator strength and decomposition settings. * * The locale should be specified as xx_yy_variant where xx is the ISO 639-1 * 2-letter language code, yy is the the ISO 3166 2-letter country code. Both * countryCode and variant are optional. For example, zh_TW_STROKE, zh_TW and zh * are all valid locale representations. Note the language code, country code * and variant are used as arguments to the constructor of java.util.Locale. * * This function uses the open-source i18n-util package to obtain the collators * it needs from the provided locale. * * The LinguisticSort implementation in i18n-util encapsulates sort-related * functionality for a substantive list of locales. For each locale, it provides * a collator and an Oracle-specific database function that can be used to sort * strings according to the natural language rules of that locale. * * This function uses the collator returned by LinguisticSort.getCollator to * produce a collation key for its input string. A user can expect that the * sorting semantics of this function for a given locale is equivalent to the * sorting behaviour of an Oracle query that is constructed using the Oracle * functions returned by LinguisticSort for that locale. * * The optional third argument to the function is a boolean that specifies * whether to use the upper-case collator (case-insensitive) returned by * LinguisticSort.getUpperCaseCollator. * * The optional fourth and fifth arguments are used to set respectively the * strength and composition of the collator returned by LinguisticSort using the * setStrength and setDecomposition methods of java.text.Collator. * */ @FunctionParseNode.BuiltInFunction(name = CollationKeyFunction.NAME, args = { // input string @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }), // ISO Code for Locale @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }, isConstant = true), // whether to use special upper case collator @FunctionParseNode.Argument(allowedTypes = { PBoolean.class }, defaultValue = "false", isConstant = true), // collator strength @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true), // collator decomposition @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true) }) public class CollationKeyFunction extends ScalarFunction { private static final Log LOG = LogFactory.getLog(CollationKeyFunction.class); public static final String NAME = "COLLATION_KEY"; private Collator collator; public CollationKeyFunction() { } public CollationKeyFunction(List<Expression> children) throws SQLException { super(children); initialize(); } @Override public void readFields(DataInput input) throws IOException { super.readFields(input); initialize(); } @Override public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) { Expression expression = getChildren().get(0); if (!expression.evaluate(tuple, ptr)) { return false; } String inputString = (String) PVarchar.INSTANCE.toObject(ptr, expression.getSortOrder()); if (LOG.isTraceEnabled()) { LOG.trace("CollationKey inputString: " + inputString); } if (inputString == null) { return true; } byte[] collationKeyByteArray = collator.getCollationKey(inputString).toByteArray(); if (LOG.isTraceEnabled()) { LOG.trace("CollationKey bytes: " + VarBinaryFormatter.INSTANCE.format(collationKeyByteArray)); } ptr.set(collationKeyByteArray); return true; } private void initialize() { String localeISOCode = getLiteralValue(1, String.class); Boolean useSpecialUpperCaseCollator = getLiteralValue(2, Boolean.class); Integer collatorStrength = getLiteralValue(3, Integer.class); Integer collatorDecomposition = getLiteralValue(4, Integer.class); if (LOG.isTraceEnabled()) { StringBuilder logInputsMessage = new StringBuilder(); logInputsMessage.append("Input (literal) arguments:").append("localeISOCode: " + localeISOCode) .append(", useSpecialUpperCaseCollator: " + useSpecialUpperCaseCollator) .append(", collatorStrength: " + collatorStrength) .append(", collatorDecomposition: " + collatorDecomposition); LOG.trace(logInputsMessage); } Locale locale = LocaleUtils.get().getLocaleByIsoCode(localeISOCode); if (LOG.isTraceEnabled()) { LOG.trace(String.format("Locale: " + locale.toLanguageTag())); } LinguisticSort linguisticSort = LinguisticSort.get(locale); collator = BooleanUtils.isTrue(useSpecialUpperCaseCollator) ? linguisticSort.getUpperCaseCollator(false) : linguisticSort.getCollator(); if (collatorStrength != null) { collator.setStrength(collatorStrength); } if (collatorDecomposition != null) { collator.setDecomposition(collatorDecomposition); } if (LOG.isTraceEnabled()) { LOG.trace(String.format("Collator: [strength: %d, decomposition: %d], Special-Upper-Case: %s", collator.getStrength(), collator.getDecomposition(), BooleanUtils.isTrue(useSpecialUpperCaseCollator))); } } @Override public PDataType getDataType() { return PVarbinary.INSTANCE; } @Override public String getName() { return NAME; } @Override public boolean isThreadSafe() { // ICU4J Collators are not thread-safe unless they are frozen. // TODO: Look into calling freeze() on them to be able return true here. return false; } @Override public boolean isNullable() { return getChildren().get(0).isNullable(); } }