com.conversantmedia.mapreduce.io.CompositeSortKeySerialization.java Source code

Java tutorial

Introduction

Here is the source code for com.conversantmedia.mapreduce.io.CompositeSortKeySerialization.java

Source

package com.conversantmedia.mapreduce.io;

/*
 * #%L
 * Mara Core framework
 * ~~
 * Copyright (C) 2015 Conversant
 * ~~
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.Serialization;
import org.apache.hadoop.io.serializer.Serializer;
import org.apache.hadoop.mapreduce.Job;

/**
 * Encapsulates the serialization functionality for a job 
 * using the {@link CompositeSortKey} key.
 * 
 * @param <G> Grouping/Partitioning key type
 * @param <S> Sorting key type
 */
public class CompositeSortKeySerialization<G extends WritableComparable<G>, S extends WritableComparable<S>>
        extends Configured implements Serialization<CompositeSortKey<G, S>> {

    public static final String CONF_KEY_GROUPKEY_CLASS = "com.conversantmedia.mapreduce.io.compositekey.groupclass";
    public static final String CONF_KEY_SORTKEY_CLASS = "com.conversantmedia.mapreduce.io.compositekey.sortclass";

    private Serializer<CompositeSortKey<G, S>> serializer;
    private Deserializer<CompositeSortKey<G, S>> deserializer;

    public CompositeSortKeySerialization() {
    }

    public CompositeSortKeySerialization(Configuration conf) {
        super(conf);
    }

    @Override
    public boolean accept(Class<?> c) {
        return CompositeSortKey.class.isAssignableFrom(c);
    }

    @Override
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public Deserializer<CompositeSortKey<G, S>> getDeserializer(Class<CompositeSortKey<G, S>> arg0) {
        if (deserializer == null) {
            deserializer = new CompositeSortKeyDeserializer(getConf().getClass(CONF_KEY_GROUPKEY_CLASS, null),
                    getConf().getClass(CONF_KEY_SORTKEY_CLASS, null));
        }
        return deserializer;
    }

    @Override
    public Serializer<CompositeSortKey<G, S>> getSerializer(Class<CompositeSortKey<G, S>> arg0) {
        if (serializer == null) {
            serializer = new CompositeSortKeySerializer<>();
        }
        return serializer;
    }

    /**
     * Convenience method to configure the job for using the composite key.
     * @param job            the job using this serializer
     * @param groupKeyClass      the key type used for grouping
     * @param sortKeyClass      the key type used for sorting
     */
    @SuppressWarnings("rawtypes")
    public static void configureMapOutputKey(Job job, Class<? extends WritableComparable> groupKeyClass,
            Class<? extends WritableComparable> sortKeyClass) {

        // First, setup our classes...
        job.getConfiguration().set(CONF_KEY_GROUPKEY_CLASS, groupKeyClass.getName());
        job.getConfiguration().set(CONF_KEY_SORTKEY_CLASS, sortKeyClass.getName());

        // Set this class as our map output key
        job.setMapOutputKeyClass(CompositeSortKey.class);

        // Setup the partitioner and comparators.
        job.setPartitionerClass(CompositeSortKey.KeyPartitioner.class);
        job.setGroupingComparatorClass(CompositeSortKey.GroupingComparator.class);
        job.setSortComparatorClass(CompositeSortKey.NaturalSortComparator.class);

        // Now setup the serialization by registering with the framework.
        Collection<String> serializations = new ArrayList<>();
        serializations.add(CompositeSortKeySerialization.class.getName());
        serializations.addAll(job.getConfiguration().getStringCollection("io.serializations"));
        job.getConfiguration().setStrings("io.serializations",
                serializations.toArray(new String[serializations.size()]));

    }

    /**
     * Handles serialization of the composite sort keys.
     *
     * @param <G> Grouping/Partitioning key type
     * @param <S> Sorting key type
     */
    public static final class CompositeSortKeySerializer<G extends WritableComparable<G>, S extends WritableComparable<S>>
            implements Serializer<CompositeSortKey<G, S>> {

        private DataOutputStream out;

        @Override
        public void serialize(CompositeSortKey<G, S> key) throws IOException {
            key.getGroupKey().write(this.out);
            key.getSortKey().write(this.out);
        }

        @Override
        public void open(OutputStream out) throws IOException {
            this.out = new DataOutputStream(out);
        }

        @Override
        public void close() throws IOException {
            IOUtils.closeStream(this.out);
        }
    }

    /**
     * Handles deserialization of the sort keys.
     *
     * @param <G> Grouping/Partitioning key type
     * @param <S> Sorting key type
     */
    public static final class CompositeSortKeyDeserializer<G extends WritableComparable<G>, S extends WritableComparable<S>>
            implements Deserializer<CompositeSortKey<G, S>> {

        private DataInputStream in;

        private final Class<G> groupKeyClass;
        private final Class<S> sortKeyClass;

        public CompositeSortKeyDeserializer(Class<G> groupKeyClass, Class<S> sortKeyClass) {
            this.groupKeyClass = groupKeyClass;
            this.sortKeyClass = sortKeyClass;
        }

        @Override
        public CompositeSortKey<G, S> deserialize(CompositeSortKey<G, S> reuse) throws IOException {
            if (reuse == null) {
                reuse = new CompositeSortKey<>();
            }

            if (reuse.getGroupKey() == null) {
                try {
                    reuse.setGroupKey(groupKeyClass.newInstance());
                } catch (InstantiationException | IllegalAccessException e) {
                    throw new IOException("Unable to instantiate '" + groupKeyClass + "'");
                }
            }

            if (reuse.getSortKey() == null) {
                try {
                    reuse.setSortKey(sortKeyClass.newInstance());
                } catch (InstantiationException | IllegalAccessException e) {
                    throw new IOException("Unable to instantiate '" + sortKeyClass + "'");
                }
            }

            // Use the keys to deserialize...
            reuse.getGroupKey().readFields(this.in);
            reuse.getSortKey().readFields(this.in);

            return reuse;
        }

        @Override
        public void open(InputStream in) throws IOException {
            this.in = new DataInputStream(in);
        }

        @Override
        public void close() throws IOException {
            IOUtils.closeStream(this.in);
        }
    }
}