org.apache.hadoop.hive.serde2.TestSerDe.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.serde2.TestSerDe.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.serde2;

import java.nio.charset.CharacterCodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.MetadataListStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;

/**
 * TestSerDe.
 *
 */
@SerDeSpec(schemaProps = { serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES, TestSerDe.COLUMNS,
        TestSerDe.COLUMNS_COMMENTS, TestSerDe.DEFAULT_SERIALIZATION_FORMAT })
public class TestSerDe extends AbstractSerDe {

    public static final Log LOG = LogFactory.getLog(TestSerDe.class.getName());

    public static final String COLUMNS = "columns";
    public static final String COLUMNS_COMMENTS = "columns.comments";
    public static final String DEFAULT_SERIALIZATION_FORMAT = "testserde.default.serialization.format";

    public String getShortName() {
        return shortName();
    }

    public static String shortName() {
        return "test_meta";
    }

    public static final String DefaultSeparator = "\002";

    private String separator;
    // constant for now, will make it configurable later.
    private final String nullString = "\\N";
    private List<String> columnNames;
    private ObjectInspector cachedObjectInspector;

    @Override
    public String toString() {
        return "TestSerDe[" + separator + "," + columnNames + "]";
    }

    public TestSerDe() throws SerDeException {
        separator = DefaultSeparator;
    }

    @Override
    public void initialize(Configuration job, Properties tbl) throws SerDeException {
        separator = DefaultSeparator;
        String altSep = tbl.getProperty(DEFAULT_SERIALIZATION_FORMAT);
        if (altSep != null && altSep.length() > 0) {
            try {
                byte[] b = new byte[1];
                b[0] = Byte.valueOf(altSep).byteValue();
                separator = new String(b);
            } catch (NumberFormatException e) {
                separator = altSep;
            }
        }

        String columnProperty = tbl.getProperty(COLUMNS);
        if (columnProperty == null || columnProperty.length() == 0) {
            // Hack for tables with no columns
            // Treat it as a table with a single column called "col"
            cachedObjectInspector = ObjectInspectorFactory.getReflectionObjectInspector(ColumnSet.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        } else {
            columnNames = Arrays.asList(columnProperty.split(","));
            cachedObjectInspector = MetadataListStructObjectInspector.getInstance(columnNames,
                    Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty(COLUMNS_COMMENTS))));
        }
        LOG.info(getClass().getName() + ": initialized with columnNames: " + columnNames);
    }

    public static Object deserialize(ColumnSet c, String row, String sep, String nullString) throws Exception {
        if (c.col == null) {
            c.col = new ArrayList<String>();
        } else {
            c.col.clear();
        }
        String[] l1 = row.split(sep, -1);

        for (String s : l1) {
            if (s.equals(nullString)) {
                c.col.add(null);
            } else {
                c.col.add(s);
            }
        }
        return (c);
    }

    ColumnSet deserializeCache = new ColumnSet();

    @Override
    public Object deserialize(Writable field) throws SerDeException {
        String row = null;
        if (field instanceof BytesWritable) {
            BytesWritable b = (BytesWritable) field;
            try {
                row = Text.decode(b.get(), 0, b.getSize());
            } catch (CharacterCodingException e) {
                throw new SerDeException(e);
            }
        } else if (field instanceof Text) {
            row = field.toString();
        }
        try {
            deserialize(deserializeCache, row, separator, nullString);
            if (columnNames != null) {
                assert (columnNames.size() == deserializeCache.col.size());
            }
            return deserializeCache;
        } catch (ClassCastException e) {
            throw new SerDeException(this.getClass().getName() + " expects Text or BytesWritable", e);
        } catch (Exception e) {
            throw new SerDeException(e);
        }
    }

    @Override
    public ObjectInspector getObjectInspector() throws SerDeException {
        return cachedObjectInspector;
    }

    @Override
    public Class<? extends Writable> getSerializedClass() {
        return Text.class;
    }

    Text serializeCache = new Text();

    @Override
    public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {

        if (objInspector.getCategory() != Category.STRUCT) {
            throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: "
                    + objInspector.getTypeName());
        }
        StructObjectInspector soi = (StructObjectInspector) objInspector;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();

        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < fields.size(); i++) {
            if (i > 0) {
                sb.append(separator);
            }
            Object column = soi.getStructFieldData(obj, fields.get(i));
            if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
                // For primitive object, serialize to plain string
                sb.append(column == null ? nullString : column.toString());
            } else {
                // For complex object, serialize to JSON format
                sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
            }
        }
        serializeCache.set(sb.toString());
        return serializeCache;
    }

    @Override
    public SerDeStats getSerDeStats() {
        // no support for statistics
        return null;
    }

}