com.github.jramos.snowplow.RedshiftSink.java Source code

Java tutorial

Introduction

Here is the source code for com.github.jramos.snowplow.RedshiftSink.java

Source

/**
   Portions of this project are copyright Australian Broadcasting Corporation, 2014.
   All other portions are copyright Justin Ramos, 2015.
    
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    
http://www.apache.org/licenses/LICENSE-2.0
    
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
 */
package com.github.jramos.snowplow;

import com.github.jramos.snowplow.SnowplowEventModel;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration;
import com.amazonaws.services.kinesis.connectors.KinesisConnectorExecutorBase;
import com.amazonaws.services.kinesis.connectors.KinesisConnectorRecordProcessorFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * Entry point for RedshiftSink application.
 * 
 * @author Sam Mason (sam.mason@abc.net.au)
 */
public class RedshiftSink extends KinesisConnectorExecutorBase<SnowplowEventModel, byte[]> {

    private static final String POSTGRES_DRIVER = "org.postgresql.Driver";
    private static final String CMD_ARG_CONFIG_FILE = "-config";

    private static final Log LOG = LogFactory.getLog(RedshiftSink.class);

    private final KinesisConnectorConfiguration config;
    private final Properties properties;

    public RedshiftSink(String pathToConfigFile) {
        InputStream configStream = null;
        properties = new Properties();
        try {
            File configFile = new File(pathToConfigFile);
            if (!configFile.exists()) {
                throw new IOException();
            }
            configStream = new FileInputStream(configFile);
            properties.load(configStream);

            properties.forEach((key, value) -> {
                String resolvedValue = resolveEnvVars(value.toString());
                properties.setProperty(key.toString(), resolvedValue);
            });
        } catch (IOException ioe) {
            String msg = "Could not load properties file " + pathToConfigFile;
            LOG.error(msg, ioe);
            throw new IllegalStateException(msg, ioe);
        } finally {
            if (configStream != null) {
                try {
                    configStream.close();
                } catch (IOException ioe) {
                }
            }
        }

        AWSCredentialsProvider credentialsProvider = new DefaultAWSCredentialsProviderChain();
        this.config = new RedshiftSinkConfiguration(properties, credentialsProvider);

        super.initialize(config);
    }

    @Override
    public KinesisConnectorRecordProcessorFactory<SnowplowEventModel, byte[]> getKinesisConnectorRecordProcessorFactory() {
        return new KinesisConnectorRecordProcessorFactory<>(new RedshiftSinkBasicPipeline(), config);
    }

    public static void main(String[] args) {
        try {
            // make sure PostGres driver is on class path
            Class.forName(POSTGRES_DRIVER);
        } catch (ClassNotFoundException e) {
            throw new IllegalStateException("Could not load PostgreSQL driver from classpath");
        }

        Map<String, String> argMap = validateArgs(args);
        if (argMap != null) {
            String configFile = argMap.get(CMD_ARG_CONFIG_FILE);
            KinesisConnectorExecutorBase<SnowplowEventModel, byte[]> redshiftExecutor = new RedshiftSink(
                    configFile);
            redshiftExecutor.run();
        }
    }

    ////////////////////////////////////////////////////////////////////////////

    private static Map<String, String> validateArgs(String[] args) {
        Map<String, String> argMap = null;
        if (args.length != 2) {
            showUsage();
        } else {
            argMap = new HashMap<String, String>();
            if (args[0].equals(CMD_ARG_CONFIG_FILE)) {
                argMap.put(CMD_ARG_CONFIG_FILE, args[1]);
            } else {
                System.err.println("Invalid switch " + args[0]);
            }
        }
        return argMap;
    }

    private String resolveEnvVars(String input) {
        if (null == input) {
            return null;
        }
        Pattern p = Pattern.compile("\\$\\{(\\w+)\\}|\\$(\\w+)"); // match ${ENV_VAR_NAME} or $ENV_VAR_NAME
        Matcher m = p.matcher(input);
        StringBuffer sb = new StringBuffer();
        while (m.find()) {
            String envVarName = null == m.group(1) ? m.group(2) : m.group(1);
            String envVarValue = System.getenv(envVarName);
            m.appendReplacement(sb, null == envVarValue ? "" : envVarValue);
        }
        m.appendTail(sb);
        return sb.toString();
    }

    private static void showUsage() {
        StringBuilder buff = new StringBuilder();
        buff.append("Usage : java -jar <path-to-jar>");
        buff.append(" ").append(CMD_ARG_CONFIG_FILE).append(" <path-to-config>\n");
        buff.append("\n");
        buff.append(
                "\tIf running locally you can provide AWS credentials as system properties to the JVM on start up:\n");
        buff.append("\t").append("-Daws.accessKeyId=<access key> -Daws.secretKey=<secret key>");
        System.out.println(buff.toString());
        System.exit(1);
    }
}