com.conversantmedia.mapreduce.io.avro.MultiSchemaAvroSerialization.java Source code

Java tutorial

Introduction

Here is the source code for com.conversantmedia.mapreduce.io.avro.MultiSchemaAvroSerialization.java

Source

package com.conversantmedia.mapreduce.io.avro;

/*
 * #%L
 * Mara Core framework
 * ~~
 * Copyright (C) 2015 Conversant
 * ~~
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.Serialization;
import org.apache.hadoop.io.serializer.Serializer;
import org.apache.hadoop.mapreduce.Job;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 *
 *
 * @param <T> Avro record type
 */
public class MultiSchemaAvroSerialization<T> extends Configured implements Serialization<AvroMultiWrapper<T>> {

    public static final String CONF_KEY_MULTI_SCHEMAS = "com.dotomi.avro.mapreduce.schemas";

    @Override
    public boolean accept(Class<?> c) {
        return AvroMultiWrapper.class.isAssignableFrom(c);
    }

    @Override
    public Deserializer<AvroMultiWrapper<T>> getDeserializer(Class<AvroMultiWrapper<T>> c) {
        return new AvroMultiDeserializer<>(getConf());
    }

    @Override
    public Serializer<AvroMultiWrapper<T>> getSerializer(Class<AvroMultiWrapper<T>> c) {
        return new AvroMultiSerializer<>(getConf());
    }

    protected static Schema getSchemaAt(Configuration conf, int b) {
        String schemaName = conf.getStrings(CONF_KEY_MULTI_SCHEMAS)[b];
        if (schemaName == null) {
            throw new IllegalStateException("No avro schema registered for data.");
        }
        Schema schema = null;
        try {
            schema = (Schema) Class.forName(schemaName).getField("SCHEMA$").get(null);
        } catch (IllegalArgumentException | IllegalAccessException | NoSuchFieldException | SecurityException
                | ClassNotFoundException e) {
            logger().error(e.getMessage());
            throw new IllegalStateException(
                    "Configured class [" + schemaName + "] does not contain an accessible static SCHEMA$ member.");
        }
        return schema;
    }

    /**
     * 
     * @param conf   Hadoop configuration
     * @param c      the avro record type class
     * @return      the index of this schema assigned when it was registered
     * @see       #registerSchemas
     */
    protected static int getIndexForSchema(Configuration conf, Class<?> c) {
        int idx = 0;
        for (String name : conf.getStrings(CONF_KEY_MULTI_SCHEMAS)) {
            if (c.getName().equals(name)) {
                return idx;
            }
            idx++;
        }
        throw new IllegalStateException("Schema for class [" + c.getName() + "] was not registered.");
    }

    /**
     * Register the schemas this serializer will ser/deser to/from.
     * @param job      the job to be configured
     * @param schemas   list of schemas to register (Will assign internal indices
     *          based on the order they're provided.)
     */
    public static void registerSchemas(Job job, Schema... schemas) {
        String[] names = new String[schemas.length];
        int idx = 0;
        for (Schema schema : schemas) {
            names[idx++] = schema.getFullName();
        }
        job.getConfiguration().setStrings(CONF_KEY_MULTI_SCHEMAS, names);

        registerSerialization(job);
    }

    /**
     * Add this class to the list of serializers.
     * @param job   the job for registering serialization 
     */
    public static void registerSerialization(Job job) {
        String[] strings = job.getConfiguration().getStrings("io.serializations");
        String[] newStrings = new String[strings.length + 1];
        System.arraycopy(strings, 0, newStrings, 0, strings.length);
        newStrings[newStrings.length - 1] = MultiSchemaAvroSerialization.class.getName();
        job.getConfiguration().setStrings("io.serializations", newStrings);

    }

    protected static Logger logger() {
        return LoggerFactory.getLogger(MultiSchemaAvroSerialization.class);
    }

}