org.apache.hadoop.hive.accumulo.columns.ColumnMappingFactory.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.accumulo.columns.ColumnMappingFactory.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.accumulo.columns;

import java.util.Map.Entry;

import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;

/**
 *
 */
public class ColumnMappingFactory {
    private static final Logger log = LoggerFactory.getLogger(ColumnMappingFactory.class);

    /**
     * Generate the proper instance of a ColumnMapping
     *
     * @param columnSpec
     *          Specification for mapping this column to Accumulo
     * @param defaultEncoding
     *          The default encoding in which values should be encoded to Accumulo
     */
    public static ColumnMapping get(String columnSpec, ColumnEncoding defaultEncoding, String columnName,
            TypeInfo columnType) {
        Preconditions.checkNotNull(columnSpec);
        Preconditions.checkNotNull(columnName);
        Preconditions.checkNotNull(columnType);
        ColumnEncoding encoding = defaultEncoding;

        // Check for column encoding specification
        if (ColumnEncoding.hasColumnEncoding(columnSpec)) {
            String columnEncodingStr = ColumnEncoding.getColumnEncoding(columnSpec);
            columnSpec = ColumnEncoding.stripCode(columnSpec);

            if (AccumuloHiveConstants.ROWID.equalsIgnoreCase(columnSpec)) {
                return new HiveAccumuloRowIdColumnMapping(columnSpec, ColumnEncoding.get(columnEncodingStr),
                        columnName, columnType.getTypeName());
            } else {
                Entry<String, String> pair = parseMapping(columnSpec);

                if (isPrefix(pair.getValue())) {
                    // Sanity check that, for a map, we got 2 encodings
                    if (!ColumnEncoding.isMapEncoding(columnEncodingStr)) {
                        throw new IllegalArgumentException("Expected map encoding for a map specification, "
                                + columnSpec + " with encoding " + columnEncodingStr);
                    }

                    Entry<ColumnEncoding, ColumnEncoding> encodings = ColumnEncoding
                            .getMapEncoding(columnEncodingStr);

                    return new HiveAccumuloMapColumnMapping(pair.getKey(), pair.getValue(), encodings.getKey(),
                            encodings.getValue(), columnName, columnType.getTypeName());
                } else {
                    return new HiveAccumuloColumnMapping(pair.getKey(), pair.getValue(),
                            ColumnEncoding.getFromMapping(columnEncodingStr), columnName, columnType.getTypeName());
                }
            }
        } else {
            if (AccumuloHiveConstants.ROWID.equalsIgnoreCase(columnSpec)) {
                return new HiveAccumuloRowIdColumnMapping(columnSpec, defaultEncoding, columnName,
                        columnType.getTypeName());
            } else {
                Entry<String, String> pair = parseMapping(columnSpec);
                boolean isPrefix = isPrefix(pair.getValue());

                String cq = pair.getValue();

                // Replace any \* that appear in the prefix with a regular *
                if (-1 != cq.indexOf(AccumuloHiveConstants.ESCAPED_ASTERISK)) {
                    cq = cq.replaceAll(AccumuloHiveConstants.ESCAPED_ASERTISK_REGEX,
                            Character.toString(AccumuloHiveConstants.ASTERISK));
                }

                if (isPrefix) {
                    return new HiveAccumuloMapColumnMapping(pair.getKey(), cq.substring(0, cq.length() - 1),
                            defaultEncoding, defaultEncoding, columnName, columnType.getTypeName());
                } else {
                    return new HiveAccumuloColumnMapping(pair.getKey(), cq, encoding, columnName,
                            columnType.getTypeName());
                }
            }
        }
    }

    public static ColumnMapping getMap(String columnSpec, ColumnEncoding keyEncoding, ColumnEncoding valueEncoding,
            String columnName, TypeInfo columnType) {
        Entry<String, String> pair = parseMapping(columnSpec);
        return new HiveAccumuloMapColumnMapping(pair.getKey(), pair.getValue(), keyEncoding, valueEncoding,
                columnName, columnType.toString());

    }

    public static boolean isPrefix(String maybePrefix) {
        Preconditions.checkNotNull(maybePrefix);

        if (AccumuloHiveConstants.ASTERISK == maybePrefix.charAt(maybePrefix.length() - 1)) {
            if (maybePrefix.length() > 1) {
                return AccumuloHiveConstants.ESCAPE != maybePrefix.charAt(maybePrefix.length() - 2);
            } else {
                return true;
            }
        }

        // If we couldn't find an asterisk, it's not a prefix
        return false;
    }

    /**
     * Consumes the column mapping specification and breaks it into column family and column
     * qualifier.
     */
    public static Entry<String, String> parseMapping(String columnSpec) throws InvalidColumnMappingException {
        int index = 0;
        while (true) {
            if (index >= columnSpec.length()) {
                log.error("Cannot parse '" + columnSpec + "' as colon-separated column configuration");
                throw new InvalidColumnMappingException(
                        "Columns must be provided as colon-separated family and qualifier pairs");
            }

            index = columnSpec.indexOf(AccumuloHiveConstants.COLON, index);

            if (-1 == index) {
                log.error("Cannot parse '" + columnSpec + "' as colon-separated column configuration");
                throw new InvalidColumnMappingException(
                        "Columns must be provided as colon-separated family and qualifier pairs");
            }

            // Check for an escape character before the colon
            if (index - 1 > 0) {
                char testChar = columnSpec.charAt(index - 1);
                if (AccumuloHiveConstants.ESCAPE == testChar) {
                    // this colon is escaped, search again after it
                    index++;
                    continue;
                }

                // If the previous character isn't an escape characters, it's the separator
            }

            // Can't be escaped, it is the separator
            break;
        }

        String cf = columnSpec.substring(0, index), cq = columnSpec.substring(index + 1);

        // Check for the escaped colon to remove before doing the expensive regex replace
        if (-1 != cf.indexOf(AccumuloHiveConstants.ESCAPED_COLON)) {
            cf = cf.replaceAll(AccumuloHiveConstants.ESCAPED_COLON_REGEX,
                    Character.toString(AccumuloHiveConstants.COLON));
        }

        // Check for the escaped colon to remove before doing the expensive regex replace
        if (-1 != cq.indexOf(AccumuloHiveConstants.ESCAPED_COLON)) {
            cq = cq.replaceAll(AccumuloHiveConstants.ESCAPED_COLON_REGEX,
                    Character.toString(AccumuloHiveConstants.COLON));
        }

        return Maps.immutableEntry(cf, cq);
    }
}