Java tutorial
/* * Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package gobblin.serde; import java.io.IOException; import org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat; import org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import com.google.common.base.Enums; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import gobblin.configuration.State; /** * A wrapper around {@link SerDe} that bundles input format, output format and file extension with a {@link SerDe}, * and provides additional functionalities. * * @author ziliu */ @SuppressWarnings("deprecation") public class HiveSerDeWrapper { private static final String SERDE_SERIALIZER_PREFIX = "serde.serializer."; private static final String SERDE_DESERIALIZER_PREFIX = "serde.deserializer."; public static final String SERDE_SERIALIZER_TYPE = SERDE_SERIALIZER_PREFIX + "type"; public static final String SERDE_SERIALIZER_INPUT_FORMAT_TYPE = SERDE_SERIALIZER_PREFIX + "input.format.type"; public static final String SERDE_SERIALIZER_OUTPUT_FORMAT_TYPE = SERDE_SERIALIZER_PREFIX + "output.format.type"; public static final String SERDE_DESERIALIZER_TYPE = SERDE_DESERIALIZER_PREFIX + "type"; public static final String SERDE_DESERIALIZER_INPUT_FORMAT_TYPE = SERDE_DESERIALIZER_PREFIX + "input.format.type"; public static final String SERDE_DESERIALIZER_OUTPUT_FORMAT_TYPE = SERDE_DESERIALIZER_PREFIX + "output.format.type"; public enum BuiltInHiveSerDe { AVRO(AvroSerDe.class.getName(), AvroContainerInputFormat.class.getName(), AvroContainerOutputFormat.class.getName()), ORC(OrcSerde.class.getName(), OrcInputFormat.class.getName(), OrcOutputFormat.class.getName()), PARQUET( ParquetHiveSerDe.class.getName(), MapredParquetInputFormat.class.getName(), MapredParquetOutputFormat.class.getName()); private final String serDeClassName; private final String inputFormatClassName; private final String outputFormatClassName; private BuiltInHiveSerDe(String serDeClassName, String inputFormatClassName, String outputFormatClassName) { this.serDeClassName = serDeClassName; this.inputFormatClassName = inputFormatClassName; this.outputFormatClassName = outputFormatClassName; } @Override public String toString() { return this.serDeClassName; } } private Optional<SerDe> serDe = Optional.absent(); private final String serDeClassName; private final String inputFormatClassName; private final String outputFormatClassName; private HiveSerDeWrapper(BuiltInHiveSerDe hiveSerDe) { this(hiveSerDe.serDeClassName, hiveSerDe.inputFormatClassName, hiveSerDe.outputFormatClassName); } private HiveSerDeWrapper(String serDeClassName, String inputFormatClassName, String outputFormatClassName) { this.serDeClassName = serDeClassName; this.inputFormatClassName = inputFormatClassName; this.outputFormatClassName = outputFormatClassName; } /** * Get the {@link SerDe} instance associated with this {@link HiveSerDeWrapper}. * This method performs lazy initialization. */ public SerDe getSerDe() throws IOException { if (!this.serDe.isPresent()) { try { this.serDe = Optional.of(SerDe.class.cast(Class.forName(this.serDeClassName).newInstance())); } catch (Throwable t) { throw new IOException("Failed to instantiate SerDe " + this.serDeClassName, t); } } return this.serDe.get(); } /** * Get the input format class name associated with this {@link HiveSerDeWrapper}. */ public String getInputFormatClassName() { return this.inputFormatClassName; } /** * Get the output format class name associated with this {@link HiveSerDeWrapper}. */ public String getOutputFormatClassName() { return this.outputFormatClassName; } /** * Get an instance of {@link HiveSerDeWrapper}. * * @param serDeType The SerDe type. This should be one of the available {@link HiveSerDeWrapper.BuiltInHiveSerDe}s. */ public static HiveSerDeWrapper get(String serDeType) { return get(serDeType, Optional.<String>absent(), Optional.<String>absent()); } /** * Get an instance of {@link HiveSerDeWrapper}. * * @param serDeType The SerDe type. If serDeType is one of the available {@link HiveSerDeWrapper.BuiltInHiveSerDe}, * the other three parameters are not used. Otherwise, serDeType should be the class name of a {@link SerDe}, * and the other three parameters must be present. */ public static HiveSerDeWrapper get(String serDeType, Optional<String> inputFormatClassName, Optional<String> outputFormatClassName) { Optional<BuiltInHiveSerDe> hiveSerDe = Enums.getIfPresent(BuiltInHiveSerDe.class, serDeType.toUpperCase()); if (hiveSerDe.isPresent()) { return new HiveSerDeWrapper(hiveSerDe.get()); } else { Preconditions.checkArgument(inputFormatClassName.isPresent(), "Missing input format class name for SerDe " + serDeType); Preconditions.checkArgument(outputFormatClassName.isPresent(), "Missing output format class name for SerDe " + serDeType); return new HiveSerDeWrapper(serDeType, inputFormatClassName.get(), outputFormatClassName.get()); } } /** * Get an instance of {@link HiveSerDeWrapper} from a {@link State}. * * @param state The state should contain property {@link #SERDE_SERIALIZER_TYPE}, and optionally contain properties * {@link #SERDE_SERIALIZER_INPUT_FORMAT_TYPE}, {@link #SERDE_SERIALIZER_OUTPUT_FORMAT_TYPE} and * {@link #SERDE_SERIALIZER_FILE_EXTENSION}. */ public static HiveSerDeWrapper getSerializer(State state) { Preconditions.checkArgument(state.contains(SERDE_SERIALIZER_TYPE), "Missing required property " + SERDE_SERIALIZER_TYPE); return get(state.getProp(SERDE_SERIALIZER_TYPE), Optional.fromNullable(state.getProp(SERDE_SERIALIZER_INPUT_FORMAT_TYPE)), Optional.fromNullable(state.getProp(SERDE_SERIALIZER_OUTPUT_FORMAT_TYPE))); } /** * Get an instance of {@link HiveSerDeWrapper} from a {@link State}. * * @param state The state should contain property {@link #SERDE_DESERIALIZER_TYPE}, and optionally contain properties * {@link #SERDE_DESERIALIZER_INPUT_FORMAT_TYPE}, {@link #SERDE_DESERIALIZER_OUTPUT_FORMAT_TYPE} and * {@link #SERDE_DESERIALIZER_FILE_EXTENSION}. */ public static HiveSerDeWrapper getDeserializer(State state) { Preconditions.checkArgument(state.contains(SERDE_DESERIALIZER_TYPE), "Missing required property " + SERDE_DESERIALIZER_TYPE); return get(state.getProp(SERDE_DESERIALIZER_TYPE), Optional.fromNullable(state.getProp(SERDE_DESERIALIZER_INPUT_FORMAT_TYPE)), Optional.fromNullable(state.getProp(SERDE_DESERIALIZER_OUTPUT_FORMAT_TYPE))); } }