com.amazonaws.hbase.connector.HBaseEmitter.java Source code

Java tutorial

Introduction

Here is the source code for com.amazonaws.hbase.connector.HBaseEmitter.java

Source

/*
 * Copyright 2013-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Amazon Software License (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 * http://aws.amazon.com/asl/
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.amazonaws.hbase.connector;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;

import com.amazonaws.hbase.kinesis.utils.HBaseUtils;
import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer;
import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;

/**
 * This implementation of IEmitter is used to store files from an Amazon Kinesis stream into Apache HBse. The use of
 * this class requires the configuration of an Amazon EMR cluster with Apache HBase installed. When the buffer is full, this
 * class's emit method adds the contents of the buffer to Apache HBase running on Amazon EMR. 
 */
public class HBaseEmitter implements IEmitter<Map<String, String>> {
    private static final Log LOG = LogFactory.getLog(HBaseEmitter.class);
    protected final String emrEndpoint;
    protected final String hbaseTableName;
    protected final int hbaseRestPort;
    protected final String emrPublicDns;
    protected final AmazonElasticMapReduce emrClient;

    public HBaseEmitter(EMRHBaseKinesisConnectorConfiguration configuration) {
        // DynamoDB Config
        this.emrEndpoint = configuration.EMR_ENDPOINT;
        this.hbaseTableName = configuration.HBASE_TABLE_NAME;
        this.hbaseRestPort = configuration.HBASE_REST_PORT;
        this.emrPublicDns = configuration.EMR_CLUSTER_PUBLIC_DNS;
        // Client
        this.emrClient = new AmazonElasticMapReduceClient(configuration.AWS_CREDENTIALS_PROVIDER);
        this.emrClient.setEndpoint(this.emrEndpoint);
        LOG.info("EMRHBaseEmitter.....");
    }

    @Override
    public List<Map<String, String>> emit(final UnmodifiableBuffer<Map<String, String>> buffer) throws IOException {
        List<Map<String, String>> records = buffer.getRecords();
        ListIterator<Map<String, String>> iterator = records.listIterator();
        List<Put> batch = new ArrayList<Put>();
        HashMap<String, String> hashMap = (HashMap<String, String>) iterator.next();
        while (iterator.hasNext()) {
            //start with the row key followed by column family
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("user"),
                    Bytes.toBytes("userid"), Bytes.toBytes(hashMap.get("userid"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("user"),
                    Bytes.toBytes("firstname"), Bytes.toBytes(hashMap.get("firstname"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("user"),
                    Bytes.toBytes("lastname"), Bytes.toBytes(hashMap.get("lastname"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("address"),
                    Bytes.toBytes("city"), Bytes.toBytes(hashMap.get("city"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("address"),
                    Bytes.toBytes("state"), Bytes.toBytes(hashMap.get("state"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("contact"),
                    Bytes.toBytes("email"), Bytes.toBytes(hashMap.get("email"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("contact"),
                    Bytes.toBytes("phone"), Bytes.toBytes(hashMap.get("phone"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likesports"), Bytes.toBytes(hashMap.get("likesports"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("liketheatre"), Bytes.toBytes(hashMap.get("liketheatre"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likeconcerts"), Bytes.toBytes(hashMap.get("likeconcerts"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likejazz"), Bytes.toBytes(hashMap.get("likejazz"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likeclassical"), Bytes.toBytes(hashMap.get("likeclassical"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likeopera"), Bytes.toBytes(hashMap.get("likeopera"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likerock"), Bytes.toBytes(hashMap.get("likerock"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likevegas"), Bytes.toBytes(hashMap.get("likevegas"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likebroadway"), Bytes.toBytes(hashMap.get("likebroadway"))));
            batch.add(new Put(Bytes.toBytes(hashMap.get("username"))).add(Bytes.toBytes("likes"),
                    Bytes.toBytes("likemusicals"), Bytes.toBytes(hashMap.get("likemusicals"))));

            hashMap = (HashMap<String, String>) iterator.next();
        }
        LOG.info("EMIT: " + "records ....." + batch.size());
        HBaseUtils.addRecords(hbaseTableName, emrPublicDns, hbaseRestPort, batch);
        return Collections.emptyList();
        //return records;
    }

    @Override
    public void fail(List<Map<String, String>> records) {
        for (Map<String, String> record : records) {
            LOG.error("Record failed: " + record);
        }
    }

    @Override
    public void shutdown() {
        LOG.error("Record shutting down: ");
    }

}