Java tutorial
/** Portions of this project are copyright Australian Broadcasting Corporation, 2014. All other portions are copyright Justin Ramos, 2015. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package com.github.jramos.snowplow; import com.amazonaws.AmazonServiceException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.BasicSessionCredentials; import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer; import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.ObjectMetadata; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Collections; import java.util.List; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * Implementation of the IEmitter interface that writes to temp files * and then streams these to s3 rather than using in memory structures. * * The bulk of this code is shamelessly copied/adapted from the connector * library source code - specifically: * * com.amazonaws.services.kinesis.connectors.redshift.RedshiftBasicEmitter * * @author Sam Mason (sam.mason@abc.net.au) */ public class RedshiftSinkEmitter implements IEmitter<byte[]> { private static final Log LOG = LogFactory.getLog(RedshiftSinkEmitter.class); protected final RedshiftSinkConfiguration configuration; protected final String s3Bucket; protected final String s3Endpoint; protected final AmazonS3Client s3client; private final String redshiftTable; private final String redshiftURL; private final char redshiftDelimiter; private final Properties loginProperties; private final File tempDir; public RedshiftSinkEmitter(KinesisConnectorConfiguration kinesisConfiguration) { this.configuration = (RedshiftSinkConfiguration) kinesisConfiguration; s3Bucket = configuration.S3_BUCKET; s3Endpoint = configuration.S3_ENDPOINT; s3client = new AmazonS3Client(configuration.AWS_CREDENTIALS_PROVIDER); if (s3Endpoint != null) { s3client.setEndpoint(s3Endpoint); } redshiftTable = configuration.REDSHIFT_DATA_TABLE; redshiftDelimiter = configuration.REDSHIFT_DATA_DELIMITER; redshiftURL = configuration.REDSHIFT_URL; loginProperties = new Properties(); loginProperties.setProperty("user", configuration.REDSHIFT_USERNAME); loginProperties.setProperty("password", configuration.REDSHIFT_PASSWORD); String localTempDir = System.getProperty("java.io.tmpdir") + File.separator + configuration.S3_BUCKET; tempDir = new File(localTempDir); if (!tempDir.exists()) { tempDir.mkdirs(); } LOG.info("Local S3 temp directory " + tempDir.getAbsolutePath()); } @Override public List<byte[]> emit(final UnmodifiableBuffer<byte[]> buffer) throws IOException { List<byte[]> failed = emitToS3(buffer); if (!failed.isEmpty()) { return buffer.getRecords(); } Connection conn = null; try { String s3File = getS3FileName(buffer.getFirstSequenceNumber(), buffer.getLastSequenceNumber()); AWSCredentials credentials = configuration.AWS_CREDENTIALS_PROVIDER.getCredentials(); String copyStatement = generateCopyStatement(s3File, credentials); conn = DriverManager.getConnection(redshiftURL, loginProperties); executeStatement(copyStatement, conn); LOG.info("Successfully copied " + getNumberOfCopiedRecords(conn) + " records to Redshift from file s3://" + s3Bucket + "/" + s3File); closeConnection(conn); return Collections.emptyList(); } catch (IOException | SQLException e) { LOG.error(e); closeConnection(conn); return buffer.getRecords(); } } @Override public void fail(List<byte[]> records) { for (byte[] record : records) { LOG.error("Record failed: " + record); } } @Override public void shutdown() { s3client.shutdown(); } // protected so that sub classes can override protected String generateCopyStatement(String s3File, AWSCredentials credentials) { StringBuilder exec = new StringBuilder(); exec.append("COPY ").append(redshiftTable).append(" "); exec.append("FROM 's3://").append(s3Bucket).append("/").append(s3File).append("' "); exec.append("CREDENTIALS 'aws_access_key_id=").append(credentials.getAWSAccessKeyId()); exec.append(";aws_secret_access_key=").append(credentials.getAWSSecretKey()); if (credentials instanceof BasicSessionCredentials) { BasicSessionCredentials sessionCredentials = (BasicSessionCredentials) credentials; exec.append(";token=").append(sessionCredentials.getSessionToken()); } exec.append("' DELIMITER '").append(redshiftDelimiter).append("'"); if (configuration instanceof RedshiftSinkConfiguration) { RedshiftSinkConfiguration redshiftSinkConfiguration = (RedshiftSinkConfiguration) configuration; if (redshiftSinkConfiguration.hasRedshiftOptions()) { List<String> options = redshiftSinkConfiguration.getRedshiftOptions(); for (String option : options) { exec.append(" ").append(option); } } } exec.append(";"); return exec.toString(); } protected String getS3FileName(String firstSeq, String lastSeq) { return firstSeq + "-" + lastSeq; } //////////////////////////////////////////////////////////////////////////// // write to a local temp file then stream contents to s3 private List<byte[]> emitToS3(final UnmodifiableBuffer<byte[]> buffer) throws IOException { List<byte[]> records = buffer.getRecords(); // Get the S3 filename (used for target bucket and local temp) String s3FileName = getS3FileName(buffer.getFirstSequenceNumber(), buffer.getLastSequenceNumber()); File localS3File = new File(tempDir, s3FileName); if (localS3File.exists()) { localS3File.delete(); } // write all records to local temp file OutputStream out = null; try { out = new BufferedOutputStream(new FileOutputStream(localS3File)); for (byte[] record : records) { out.write(record, 0, record.length); } out.flush(); } catch (IOException ioe) { LOG.error(ioe); return buffer.getRecords(); } finally { if (out != null) { try { out.close(); } catch (IOException ioe) { } } } // now stream to s3 ObjectMetadata metaData = new ObjectMetadata(); metaData.setContentLength(localS3File.length()); InputStream in = null; try { in = new FileInputStream(localS3File); s3client.putObject(s3Bucket, s3FileName, in, metaData); LOG.info("Successfully emitted " + buffer.getRecords().size() + " records to S3 in s3://" + s3Bucket + "/" + s3FileName); // delete the local temp localS3File.delete(); return Collections.emptyList(); } catch (AmazonServiceException e) { LOG.error(e); return buffer.getRecords(); } finally { if (in != null) { try { in.close(); } catch (IOException ioe) { } } } } private void closeConnection(Connection conn) { try { conn.close(); } catch (Exception e) { LOG.error(e); } } private void executeStatement(String statement, Connection conn) throws IOException { try { Statement stmt = conn.createStatement(); stmt.execute(statement); stmt.close(); return; } catch (SQLException e) { LOG.error(e); throw new IOException(e); } } private int getNumberOfCopiedRecords(Connection conn) throws IOException { String cmd = "select pg_last_copy_count();"; Statement stmt = null; ResultSet resultSet = null; try { stmt = conn.createStatement(); resultSet = stmt.executeQuery(cmd); resultSet.next(); int numCopiedRecords = resultSet.getInt(1); resultSet.close(); stmt.close(); return numCopiedRecords; } catch (SQLException e) { try { resultSet.close(); } catch (Exception e1) { } try { stmt.close(); } catch (Exception e1) { } throw new IOException(e); } } }