parquet.hive.serde.ArrayWritableObjectInspector.java Source code

Java tutorial

Introduction

Here is the source code for parquet.hive.serde.ArrayWritableObjectInspector.java

Source

/**
 * Copyright 2013 Criteo.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package parquet.hive.serde;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.io.ArrayWritable;
import parquet.hive.serde.primitive.ParquetPrimitiveInspectorFactory;

/**
 *
 * The ArrayWritableObjectInspector will inspect an ArrayWritable, considering it as a Hive struct.<br />
 * It can also inspect a List if Hive decides to inspect the result of an inspection.
 *
 * @author Mickal Lacour <m.lacour@criteo.com>
 * @author Rmy Pecqueur <r.pecqueur@criteo.com>
 *
 */
public class ArrayWritableObjectInspector extends SettableStructObjectInspector {

    private final TypeInfo typeInfo;
    private final List<TypeInfo> fieldInfos;
    private final List<String> fieldNames;
    private final List<StructField> fields;
    private final HashMap<String, StructFieldImpl> fieldsByName;

    public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) {

        typeInfo = rowTypeInfo;
        fieldNames = rowTypeInfo.getAllStructFieldNames();
        fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos();
        fields = new ArrayList<StructField>(fieldNames.size());
        fieldsByName = new HashMap<String, StructFieldImpl>();

        for (int i = 0; i < fieldNames.size(); ++i) {
            final String name = fieldNames.get(i);
            final TypeInfo fieldInfo = fieldInfos.get(i);

            final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i);
            fields.add(field);
            fieldsByName.put(name, field);
        }
    }

    private ObjectInspector getObjectInspector(final TypeInfo typeInfo) {
        if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
            return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
        } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
            return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
        } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
            return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
        } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
            return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
        } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
            return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
        } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
            return ParquetPrimitiveInspectorFactory.parquetStringInspector;
        } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
            return new ArrayWritableObjectInspector((StructTypeInfo) typeInfo);
        } else if (typeInfo.getCategory().equals(Category.LIST)) {
            final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
            return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo));
        } else if (typeInfo.getCategory().equals(Category.MAP)) {
            final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
            final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
            if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo)
                    || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo)
                    || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
                return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo),
                        getObjectInspector(valueTypeInfo));
            } else {
                return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo),
                        getObjectInspector(valueTypeInfo));
            }
        } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
            throw new NotImplementedException("timestamp not implemented yet");
        } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
            return ParquetPrimitiveInspectorFactory.parquetByteInspector;
        } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
            return ParquetPrimitiveInspectorFactory.parquetShortInspector;
        } else {
            throw new RuntimeException("Unknown field info: " + typeInfo);
        }

    }

    @Override
    public Category getCategory() {
        return Category.STRUCT;
    }

    @Override
    public String getTypeName() {
        return typeInfo.getTypeName();
    }

    @Override
    public List<? extends StructField> getAllStructFieldRefs() {
        return fields;
    }

    @Override
    public Object getStructFieldData(final Object data, final StructField fieldRef) {
        if (data == null) {
            return null;
        }

        if (data instanceof ArrayWritable) {
            final ArrayWritable arr = (ArrayWritable) data;
            return arr.get()[((StructFieldImpl) fieldRef).getIndex()];
        }

        throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
    }

    @Override
    public StructField getStructFieldRef(final String name) {
        return fieldsByName.get(name);
    }

    @Override
    public List<Object> getStructFieldsDataAsList(final Object data) {
        if (data == null) {
            return null;
        }

        if (data instanceof ArrayWritable) {
            final ArrayWritable arr = (ArrayWritable) data;
            final Object[] arrWritable = arr.get();
            return new ArrayList<Object>(Arrays.asList(arrWritable));
        }

        throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
    }

    @Override
    public Object create() {
        final ArrayList<Object> list = new ArrayList<Object>(fields.size());
        for (int i = 0; i < fields.size(); ++i) {
            list.add(null);
        }
        return list;
    }

    @Override
    public Object setStructFieldData(Object struct, StructField field, Object fieldValue) {
        final ArrayList<Object> list = (ArrayList<Object>) struct;
        list.set(((StructFieldImpl) field).getIndex(), fieldValue);
        return list;
    }

    @Override
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final ArrayWritableObjectInspector other = (ArrayWritableObjectInspector) obj;
        if (this.typeInfo != other.typeInfo && (this.typeInfo == null || !this.typeInfo.equals(other.typeInfo))) {
            return false;
        }
        return true;
    }

    @Override
    public int hashCode() {
        int hash = 5;
        hash = 29 * hash + (this.typeInfo != null ? this.typeInfo.hashCode() : 0);
        return hash;
    }

    class StructFieldImpl implements StructField {

        private final String name;
        private final ObjectInspector inspector;
        private final int index;

        public StructFieldImpl(final String name, final ObjectInspector inspector, final int index) {
            this.name = name;
            this.inspector = inspector;
            this.index = index;
        }

        @Override
        public String getFieldComment() {
            return "";
        }

        @Override
        public String getFieldName() {
            return name;
        }

        public int getIndex() {
            return index;
        }

        @Override
        public ObjectInspector getFieldObjectInspector() {
            return inspector;
        }
    }
}