hivemall.utils.hadoop.JsonSerdeUtils.java Source code

Java tutorial

Introduction

Here is the source code for hivemall.utils.hadoop.JsonSerdeUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
// This file codes borrowed from 
//   - org.apache.hive.hcatalog.data.JsonSerDe
package hivemall.utils.hadoop;

import hivemall.utils.io.FastByteArrayInputStream;
import hivemall.utils.lang.Preconditions;

import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.io.Text;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;

public final class JsonSerdeUtils {

    /**
     * Serialize Hive objects as Text.
     */
    @Nonnull
    public static Text serialize(@Nullable final Object obj, @Nonnull final ObjectInspector oi)
            throws SerDeException {
        return serialize(obj, oi, null);
    }

    /**
     * Serialize Hive objects as Text.
     */
    @Nonnull
    public static Text serialize(@Nullable final Object obj, @Nonnull final ObjectInspector oi,
            @Nullable final List<String> columnNames) throws SerDeException {
        final StringBuilder sb = new StringBuilder();
        switch (oi.getCategory()) {
        case STRUCT:
            StructObjectInspector soi = (StructObjectInspector) oi;
            serializeStruct(sb, obj, soi, columnNames);
            break;
        case LIST:
            ListObjectInspector loi = (ListObjectInspector) oi;
            serializeList(sb, obj, loi);
            break;
        case MAP:
            MapObjectInspector moi = (MapObjectInspector) oi;
            serializeMap(sb, obj, moi);
            break;
        case PRIMITIVE:
            PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
            serializePrimitive(sb, obj, poi);
            break;
        default:
            throw new SerDeException("Unknown type in ObjectInspector: " + oi.getCategory());
        }

        return new Text(sb.toString());
    }

    /**
     * Serialize Hive objects as Text.
     */
    private static void serializeStruct(@Nonnull final StringBuilder sb, @Nullable final Object obj,
            @Nonnull final StructObjectInspector soi, @Nullable final List<String> columnNames)
            throws SerDeException {
        if (obj == null) {
            sb.append("null");
        } else {
            final List<? extends StructField> structFields = soi.getAllStructFieldRefs();
            sb.append(SerDeUtils.LBRACE);
            if (columnNames == null) {
                for (int i = 0, len = structFields.size(); i < len; i++) {
                    String colName = structFields.get(i).getFieldName();
                    if (i > 0) {
                        sb.append(SerDeUtils.COMMA);
                    }
                    appendWithQuotes(sb, colName);
                    sb.append(SerDeUtils.COLON);
                    buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)),
                            structFields.get(i).getFieldObjectInspector());
                }
            } else if (columnNames.size() == structFields.size()) {
                for (int i = 0, len = structFields.size(); i < len; i++) {
                    if (i > 0) {
                        sb.append(SerDeUtils.COMMA);
                    }
                    String colName = columnNames.get(i);
                    appendWithQuotes(sb, colName);
                    sb.append(SerDeUtils.COLON);
                    buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)),
                            structFields.get(i).getFieldObjectInspector());
                }
            } else {
                Collections.sort(columnNames);
                final List<String> found = new ArrayList<>(columnNames.size());
                for (int i = 0, len = structFields.size(); i < len; i++) {
                    String colName = structFields.get(i).getFieldName();
                    if (Collections.binarySearch(columnNames, colName) < 0) {
                        continue;
                    }
                    if (!found.isEmpty()) {
                        sb.append(SerDeUtils.COMMA);
                    }
                    appendWithQuotes(sb, colName);
                    sb.append(SerDeUtils.COLON);
                    buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)),
                            structFields.get(i).getFieldObjectInspector());
                    found.add(colName);
                }
                if (found.size() != columnNames.size()) {
                    ArrayList<String> expected = new ArrayList<>(columnNames);
                    expected.removeAll(found);
                    throw new SerDeException("Could not find some fields: " + expected);
                }
            }
            sb.append(SerDeUtils.RBRACE);
        }
    }

    @Nonnull
    private static void serializeList(@Nonnull final StringBuilder sb, @Nullable final Object obj,
            @Nullable final ListObjectInspector loi) throws SerDeException {
        ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
        List<?> olist = loi.getList(obj);

        if (olist == null) {
            sb.append("null");
        } else {
            sb.append(SerDeUtils.LBRACKET);
            for (int i = 0; i < olist.size(); i++) {
                if (i > 0) {
                    sb.append(SerDeUtils.COMMA);
                }
                buildJSONString(sb, olist.get(i), listElementObjectInspector);
            }
            sb.append(SerDeUtils.RBRACKET);
        }
    }

    private static void serializeMap(@Nonnull final StringBuilder sb, @Nullable final Object obj,
            @Nonnull final MapObjectInspector moi) throws SerDeException {
        ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
        ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
        Map<?, ?> omap = moi.getMap(obj);
        if (omap == null) {
            sb.append("null");
        } else {
            sb.append(SerDeUtils.LBRACE);
            boolean first = true;
            for (Object entry : omap.entrySet()) {
                if (first) {
                    first = false;
                } else {
                    sb.append(SerDeUtils.COMMA);
                }
                Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
                StringBuilder keyBuilder = new StringBuilder();
                buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
                String keyString = keyBuilder.toString().trim();
                if ((!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE)) {
                    appendWithQuotes(sb, keyString);
                } else {
                    sb.append(keyString);
                }
                sb.append(SerDeUtils.COLON);
                buildJSONString(sb, e.getValue(), mapValueObjectInspector);
            }
            sb.append(SerDeUtils.RBRACE);
        }
    }

    private static void serializePrimitive(@Nonnull final StringBuilder sb, @Nullable final Object obj,
            @Nullable final PrimitiveObjectInspector poi) throws SerDeException {
        if (obj == null) {
            sb.append("null");
        } else {
            switch (poi.getPrimitiveCategory()) {
            case BOOLEAN: {
                boolean b = ((BooleanObjectInspector) poi).get(obj);
                sb.append(b ? "true" : "false");
                break;
            }
            case BYTE: {
                sb.append(((ByteObjectInspector) poi).get(obj));
                break;
            }
            case SHORT: {
                sb.append(((ShortObjectInspector) poi).get(obj));
                break;
            }
            case INT: {
                sb.append(((IntObjectInspector) poi).get(obj));
                break;
            }
            case LONG: {
                sb.append(((LongObjectInspector) poi).get(obj));
                break;
            }
            case FLOAT: {
                sb.append(((FloatObjectInspector) poi).get(obj));
                break;
            }
            case DOUBLE: {
                sb.append(((DoubleObjectInspector) poi).get(obj));
                break;
            }
            case STRING: {
                String s = SerDeUtils.escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(obj));
                appendWithQuotes(sb, s);
                break;
            }
            case BINARY:
                byte[] b = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(obj);
                Text txt = new Text();
                txt.set(b, 0, b.length);
                appendWithQuotes(sb, SerDeUtils.escapeString(txt.toString()));
                break;
            case DATE:
                Date d = ((DateObjectInspector) poi).getPrimitiveJavaObject(obj);
                appendWithQuotes(sb, d.toString());
                break;
            case TIMESTAMP: {
                Timestamp t = ((TimestampObjectInspector) poi).getPrimitiveJavaObject(obj);
                appendWithQuotes(sb, t.toString());
                break;
            }
            case DECIMAL:
                sb.append(((HiveDecimalObjectInspector) poi).getPrimitiveJavaObject(obj));
                break;
            case VARCHAR: {
                String s = SerDeUtils
                        .escapeString(((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(obj).toString());
                appendWithQuotes(sb, s);
                break;
            }
            case CHAR: {
                //this should use HiveChar.getPaddedValue() but it's protected; currently (v0.13)
                // HiveChar.toString() returns getPaddedValue()
                String s = SerDeUtils
                        .escapeString(((HiveCharObjectInspector) poi).getPrimitiveJavaObject(obj).toString());
                appendWithQuotes(sb, s);
                break;
            }
            default:
                throw new SerDeException("Unknown primitive type: " + poi.getPrimitiveCategory());
            }
        }
    }

    private static void buildJSONString(@Nonnull final StringBuilder sb, @Nullable final Object obj,
            @Nonnull final ObjectInspector oi) throws SerDeException {
        switch (oi.getCategory()) {
        case PRIMITIVE: {
            PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
            serializePrimitive(sb, obj, poi);
            break;
        }
        case LIST: {
            ListObjectInspector loi = (ListObjectInspector) oi;
            serializeList(sb, obj, loi);
            break;
        }
        case MAP: {
            MapObjectInspector moi = (MapObjectInspector) oi;
            serializeMap(sb, obj, moi);
            break;
        }
        case STRUCT: {
            StructObjectInspector soi = (StructObjectInspector) oi;
            serializeStruct(sb, obj, soi, null);
            break;
        }
        case UNION: {
            UnionObjectInspector uoi = (UnionObjectInspector) oi;
            if (obj == null) {
                sb.append("null");
            } else {
                sb.append(SerDeUtils.LBRACE);
                sb.append(uoi.getTag(obj));
                sb.append(SerDeUtils.COLON);
                buildJSONString(sb, uoi.getField(obj), uoi.getObjectInspectors().get(uoi.getTag(obj)));
                sb.append(SerDeUtils.RBRACE);
            }
            break;
        }
        default:
            throw new SerDeException("Unknown type in ObjectInspector: " + oi.getCategory());
        }
    }

    @Nonnull
    public static <T> T deserialize(@Nonnull final Text t) throws SerDeException {
        return deserialize(t, null, null);
    }

    /**
     * Deserialize Json array or Json primitives.
     */
    @Nonnull
    public static <T> T deserialize(@Nonnull final Text t, @Nonnull TypeInfo columnTypes) throws SerDeException {
        return deserialize(t, null, Arrays.asList(columnTypes));
    }

    @SuppressWarnings("unchecked")
    @Nonnull
    public static <T> T deserialize(@Nonnull final Text t, @Nullable final List<String> columnNames,
            @Nullable final List<TypeInfo> columnTypes) throws SerDeException {
        final Object result;
        try {
            JsonParser p = new JsonFactory().createJsonParser(new FastByteArrayInputStream(t.getBytes()));
            final JsonToken token = p.nextToken();
            if (token == JsonToken.START_OBJECT) {
                result = parseObject(p, columnNames, columnTypes);
            } else if (token == JsonToken.START_ARRAY) {
                result = parseArray(p, columnTypes);
            } else {
                result = parseValue(p);
            }
        } catch (JsonParseException e) {
            throw new SerDeException(e);
        } catch (IOException e) {
            throw new SerDeException(e);
        }
        return (T) result;
    }

    @Nonnull
    private static Object parseObject(@Nonnull final JsonParser p, @CheckForNull final List<String> columnNames,
            @CheckForNull final List<TypeInfo> columnTypes) throws JsonParseException, IOException, SerDeException {
        Preconditions.checkNotNull(columnNames, "columnNames MUST NOT be null in parseObject",
                SerDeException.class);
        Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null in parseObject",
                SerDeException.class);
        if (columnNames.size() != columnTypes.size()) {
            throw new SerDeException("Size of columnNames and columnTypes does not match. #columnNames="
                    + columnNames.size() + ", #columnTypes=" + columnTypes.size());
        }

        TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
        final HCatSchema schema;
        try {
            schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
        } catch (HCatException e) {
            throw new SerDeException(e);
        }

        final List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(), null));
        JsonToken token;
        while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) {
            // iterate through each token, and create appropriate object here.
            populateRecord(r, token, p, schema);
        }

        if (columnTypes.size() == 1) {
            return r.get(0);
        }
        return r;
    }

    @Nonnull
    private static List<Object> parseArray(@Nonnull final JsonParser p,
            @CheckForNull final List<TypeInfo> columnTypes) throws HCatException, IOException, SerDeException {
        Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null", SerDeException.class);
        if (columnTypes.size() != 1) {
            throw new IOException("Expected a single array but go " + columnTypes);
        }

        TypeInfo elemType = columnTypes.get(0);
        HCatSchema schema = HCatSchemaUtils.getHCatSchema(elemType);

        HCatFieldSchema listSchema = schema.get(0);
        HCatFieldSchema elemSchema = listSchema.getArrayElementSchema().get(0);

        final List<Object> arr = new ArrayList<Object>();
        while (p.nextToken() != JsonToken.END_ARRAY) {
            arr.add(extractCurrentField(p, elemSchema, true));
        }
        return arr;
    }

    @Nonnull
    private static Object parseValue(@Nonnull final JsonParser p) throws JsonParseException, IOException {
        final JsonToken t = p.getCurrentToken();
        switch (t) {
        case VALUE_FALSE:
            return Boolean.FALSE;
        case VALUE_TRUE:
            return Boolean.TRUE;
        case VALUE_NULL:
            return null;
        case VALUE_STRING:
            return p.getText();
        case VALUE_NUMBER_FLOAT:
            return p.getDoubleValue();
        case VALUE_NUMBER_INT:
            return p.getIntValue();
        default:
            throw new IOException("Unexpected token: " + t);
        }
    }

    private static void populateRecord(@Nonnull final List<Object> r, @Nonnull final JsonToken token,
            @Nonnull final JsonParser p, @Nonnull final HCatSchema s) throws IOException {
        if (token != JsonToken.FIELD_NAME) {
            throw new IOException("Field name expected");
        }
        String fieldName = p.getText();
        Integer fpos = s.getPosition(fieldName);
        if (fpos == null) {
            fpos = getPositionFromHiveInternalColumnName(fieldName);
            if (fpos == -1) {
                skipValue(p);
                return; // unknown field, we return. We'll continue from the next field onwards.
            }
            // If we get past this, then the column name did match the hive pattern for an internal
            // column name, such as _col0, etc, so it *MUST* match the schema for the appropriate column.
            // This means people can't use arbitrary column names such as _col0, and expect us to ignore it
            // if we find it.
            if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) {
                throw new IOException("Hive internal column name (" + fieldName + ") and position encoding (" + fpos
                        + ") for the column name are at odds");
            }
            // If we reached here, then we were successful at finding an alternate internal
            // column mapping, and we're about to proceed.
        }
        HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
        Object currField = extractCurrentField(p, hcatFieldSchema, false);
        r.set(fpos, currField);
    }

    @SuppressWarnings("deprecation")
    @Nullable
    private static Object extractCurrentField(@Nonnull final JsonParser p,
            @Nonnull final HCatFieldSchema hcatFieldSchema, final boolean isTokenCurrent) throws IOException {
        JsonToken valueToken;
        if (isTokenCurrent) {
            valueToken = p.getCurrentToken();
        } else {
            valueToken = p.nextToken();
        }

        final Object val;
        switch (hcatFieldSchema.getType()) {
        case INT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue();
            break;
        case TINYINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue();
            break;
        case SMALLINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue();
            break;
        case BIGINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue();
            break;
        case BOOLEAN:
            String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            if (bval != null) {
                val = Boolean.valueOf(bval);
            } else {
                val = null;
            }
            break;
        case FLOAT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue();
            break;
        case DOUBLE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue();
            break;
        case STRING:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            break;
        case BINARY:
            String b = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            if (b != null) {
                try {
                    String t = Text.decode(b.getBytes(), 0, b.getBytes().length);
                    return t.getBytes();
                } catch (CharacterCodingException e) {
                    throw new IOException("Error generating json binary type from object.", e);
                }
            } else {
                val = null;
            }
            break;
        case DATE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText());
            break;
        case TIMESTAMP:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : Timestamp.valueOf(p.getText());
            break;
        case DECIMAL:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : HiveDecimal.create(p.getText());
            break;
        case VARCHAR:
            int vLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveVarchar(p.getText(), vLen);
            break;
        case CHAR:
            int cLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveChar(p.getText(), cLen);
            break;
        case ARRAY:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_ARRAY) {
                throw new IOException("Start of Array expected");
            }
            final List<Object> arr = new ArrayList<>();
            final HCatFieldSchema elemSchema = hcatFieldSchema.getArrayElementSchema().get(0);
            while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
                arr.add(extractCurrentField(p, elemSchema, true));
            }
            val = arr;
            break;
        case MAP:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            final Map<Object, Object> map = new LinkedHashMap<>();
            final HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(),
                        hcatFieldSchema.getMapKeyTypeInfo());
                Object v = extractCurrentField(p, valueSchema, false);
                map.put(k, v);
            }
            val = map;
            break;
        case STRUCT:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
            int sz = subSchema.getFieldNames().size();

            List<Object> struct = new ArrayList<>(Collections.nCopies(sz, null));
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                populateRecord(struct, valueToken, p, subSchema);
            }
            val = struct;
            break;
        default:
            throw new IOException("Unknown type found: " + hcatFieldSchema.getType());
        }
        return val;
    }

    @Nonnull
    private static Object getObjectOfCorrespondingPrimitiveType(String s, PrimitiveTypeInfo mapKeyType)
            throws IOException {
        switch (Type.getPrimitiveHType(mapKeyType)) {
        case INT:
            return Integer.valueOf(s);
        case TINYINT:
            return Byte.valueOf(s);
        case SMALLINT:
            return Short.valueOf(s);
        case BIGINT:
            return Long.valueOf(s);
        case BOOLEAN:
            return (s.equalsIgnoreCase("true"));
        case FLOAT:
            return Float.valueOf(s);
        case DOUBLE:
            return Double.valueOf(s);
        case STRING:
            return s;
        case BINARY:
            try {
                String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
                return t.getBytes();
            } catch (CharacterCodingException e) {
                throw new IOException("Error generating json binary type from object.", e);
            }
        case DATE:
            return Date.valueOf(s);
        case TIMESTAMP:
            return Timestamp.valueOf(s);
        case DECIMAL:
            return HiveDecimal.create(s);
        case VARCHAR:
            return new HiveVarchar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
        case CHAR:
            return new HiveChar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
        default:
            throw new IOException("Could not convert from string to map type " + mapKeyType.getTypeName());
        }
    }

    private static int getPositionFromHiveInternalColumnName(String internalName) {
        Pattern internalPattern = Pattern.compile("_col([0-9]+)");
        Matcher m = internalPattern.matcher(internalName);
        if (!m.matches()) {
            return -1;
        } else {
            return Integer.parseInt(m.group(1));
        }
    }

    private static void skipValue(@Nonnull final JsonParser p) throws JsonParseException, IOException {
        JsonToken valueToken = p.nextToken();
        if ((valueToken == JsonToken.START_ARRAY) || (valueToken == JsonToken.START_OBJECT)) {
            // if the currently read token is a beginning of an array or object, move stream forward
            // skipping any child tokens till we're at the corresponding END_ARRAY or END_OBJECT token
            p.skipChildren();
        }
    }

    @Nonnull
    private static String getHiveInternalColumnName(int fpos) {
        return HiveConf.getColumnInternalName(fpos);
    }

    @Nonnull
    private static StringBuilder appendWithQuotes(@Nonnull final StringBuilder sb, @Nonnull final String value) {
        return sb.append(SerDeUtils.QUOTE).append(value).append(SerDeUtils.QUOTE);
    }

}