Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.flume.sink.elasticsearch; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.BATCH_SIZE; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLUSTER_NAME; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_CLUSTER_NAME; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_NAME; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_TYPE; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_PORT; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_TTL; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.HOSTNAMES; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_TYPE; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER_PREFIX; import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL; import java.util.Arrays; import java.util.concurrent.TimeUnit; import org.apache.commons.lang.StringUtils; import org.apache.flume.Channel; import org.apache.flume.Context; import org.apache.flume.CounterGroup; import org.apache.flume.Event; import org.apache.flume.EventDeliveryException; import org.apache.flume.Transaction; import org.apache.flume.conf.Configurable; import org.apache.flume.instrumentation.SinkCounter; import org.apache.flume.sink.AbstractSink; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; /** * A sink which reads events from a channel and writes them to ElasticSearch * based on the work done by https://github.com/Aconex/elasticflume.git.</p> * * This sink supports batch reading of events from the channel and writing them * to ElasticSearch.</p> * * Indexes will be rolled daily using the format 'indexname-YYYY-MM-dd' to allow * easier management of the index</p> * * This sink must be configured with with mandatory parameters detailed in * {@link ElasticSearchSinkConstants}</p> * It is recommended as a secondary step the ElasticSearch indexes are optimized * for the specified serializer. This is not handled by the sink but is * typically done by deploying a config template alongside the ElasticSearch * deploy</p> * @see http * ://www.elasticsearch.org/guide/reference/api/admin-indices-templates. * html */ public class ElasticSearchSink extends AbstractSink implements Configurable { private static final Logger logger = LoggerFactory.getLogger(ElasticSearchSink.class); // Used for testing private boolean isLocal = false; private final CounterGroup counterGroup = new CounterGroup(); private static final int defaultBatchSize = 100; private int batchSize = defaultBatchSize; private long ttlMs = DEFAULT_TTL; private String clusterName = DEFAULT_CLUSTER_NAME; private String indexName = DEFAULT_INDEX_NAME; private String indexType = DEFAULT_INDEX_TYPE; private InetSocketTransportAddress[] serverAddresses; private Node node; private Client client; private ElasticSearchIndexRequestBuilderFactory indexRequestFactory; private SinkCounter sinkCounter; /** * Create an {@link ElasticSearchSink} configured using the supplied * configuration */ public ElasticSearchSink() { this(false); } /** * Create an {@link ElasticSearchSink}</p> * * @param isLocal * If <tt>true</tt> sink will be configured to only talk to an * ElasticSearch instance hosted in the same JVM, should always be * false is production * */ @VisibleForTesting ElasticSearchSink(boolean isLocal) { this.isLocal = isLocal; } @VisibleForTesting InetSocketTransportAddress[] getServerAddresses() { return serverAddresses; } @VisibleForTesting String getClusterName() { return clusterName; } @VisibleForTesting String getIndexName() { return indexName; } @VisibleForTesting String getIndexType() { return indexType; } @VisibleForTesting long getTTLMs() { return ttlMs; } @Override public Status process() throws EventDeliveryException { logger.debug("processing..."); Status status = Status.READY; Channel channel = getChannel(); Transaction txn = channel.getTransaction(); try { txn.begin(); BulkRequestBuilder bulkRequest = client.prepareBulk(); for (int i = 0; i < batchSize; i++) { Event event = channel.take(); if (event == null) { break; } IndexRequestBuilder indexRequest = indexRequestFactory.createIndexRequest(client, indexName, indexType, event); if (ttlMs > 0) { indexRequest.setTTL(ttlMs); } bulkRequest.add(indexRequest); } int size = bulkRequest.numberOfActions(); if (size <= 0) { sinkCounter.incrementBatchEmptyCount(); counterGroup.incrementAndGet("channel.underflow"); status = Status.BACKOFF; } else { if (size < batchSize) { sinkCounter.incrementBatchUnderflowCount(); status = Status.BACKOFF; } else { sinkCounter.incrementBatchCompleteCount(); } sinkCounter.addToEventDrainAttemptCount(size); BulkResponse bulkResponse = bulkRequest.execute().actionGet(); if (bulkResponse.hasFailures()) { throw new EventDeliveryException(bulkResponse.buildFailureMessage()); } } txn.commit(); sinkCounter.addToEventDrainSuccessCount(size); counterGroup.incrementAndGet("transaction.success"); } catch (Throwable ex) { try { txn.rollback(); counterGroup.incrementAndGet("transaction.rollback"); } catch (Exception ex2) { logger.error("Exception in rollback. Rollback might not have been successful.", ex2); } if (ex instanceof Error || ex instanceof RuntimeException) { logger.error("Failed to commit transaction. Transaction rolled back.", ex); Throwables.propagate(ex); } else { logger.error("Failed to commit transaction. Transaction rolled back.", ex); throw new EventDeliveryException("Failed to commit transaction. Transaction rolled back.", ex); } } finally { txn.close(); } return status; } @Override public void configure(Context context) { if (!isLocal) { String[] hostNames = null; if (StringUtils.isNotBlank(context.getString(HOSTNAMES))) { hostNames = context.getString(HOSTNAMES).split(","); } Preconditions.checkState(hostNames != null && hostNames.length > 0, "Missing Param:" + HOSTNAMES); serverAddresses = new InetSocketTransportAddress[hostNames.length]; for (int i = 0; i < hostNames.length; i++) { String[] hostPort = hostNames[i].split(":"); String host = hostPort[0]; int port = hostPort.length == 2 ? Integer.parseInt(hostPort[1]) : DEFAULT_PORT; serverAddresses[i] = new InetSocketTransportAddress(host, port); } Preconditions.checkState(serverAddresses != null && serverAddresses.length > 0, "Missing Param:" + HOSTNAMES); } if (StringUtils.isNotBlank(context.getString(INDEX_NAME))) { this.indexName = context.getString(INDEX_NAME); } if (StringUtils.isNotBlank(context.getString(INDEX_TYPE))) { this.indexType = context.getString(INDEX_TYPE); } if (StringUtils.isNotBlank(context.getString(CLUSTER_NAME))) { this.clusterName = context.getString(CLUSTER_NAME); } if (StringUtils.isNotBlank(context.getString(BATCH_SIZE))) { this.batchSize = Integer.parseInt(context.getString(BATCH_SIZE)); } if (StringUtils.isNotBlank(context.getString(TTL))) { this.ttlMs = TimeUnit.DAYS.toMillis(Integer.parseInt(context.getString(TTL))); Preconditions.checkState(ttlMs > 0, TTL + " must be greater than 0 or not set."); } String serializerClazz = "org.apache.flume.sink.elasticsearch.ElasticSearchLogStashEventSerializer"; if (StringUtils.isNotBlank(context.getString(SERIALIZER))) { serializerClazz = context.getString(SERIALIZER); } Context serializerContext = new Context(); serializerContext.putAll(context.getSubProperties(SERIALIZER_PREFIX)); try { @SuppressWarnings("unchecked") Class<? extends Configurable> clazz = (Class<? extends Configurable>) Class.forName(serializerClazz); Configurable serializer = clazz.newInstance(); if (serializer instanceof ElasticSearchIndexRequestBuilderFactory) { indexRequestFactory = (ElasticSearchIndexRequestBuilderFactory) serializer; } else if (serializer instanceof ElasticSearchEventSerializer) { indexRequestFactory = new EventSerializerIndexRequestBuilderFactory( (ElasticSearchEventSerializer) serializer); } else { throw new IllegalArgumentException(serializerClazz + " is neither an ElasticSearchEventSerializer" + " nor an ElasticSearchIndexRequestBuilderFactory."); } indexRequestFactory.configure(serializerContext); } catch (Exception e) { logger.error("Could not instantiate event serializer.", e); Throwables.propagate(e); } if (sinkCounter == null) { sinkCounter = new SinkCounter(getName()); } Preconditions.checkState(StringUtils.isNotBlank(indexName), "Missing Param:" + INDEX_NAME); Preconditions.checkState(StringUtils.isNotBlank(indexType), "Missing Param:" + INDEX_TYPE); Preconditions.checkState(StringUtils.isNotBlank(clusterName), "Missing Param:" + CLUSTER_NAME); Preconditions.checkState(batchSize >= 1, BATCH_SIZE + " must be greater than 0"); } @Override public void start() { logger.info("ElasticSearch sink {} started"); sinkCounter.start(); try { openConnection(); } catch (Exception ex) { sinkCounter.incrementConnectionFailedCount(); closeConnection(); } super.start(); } @Override public void stop() { logger.info("ElasticSearch sink {} stopping"); closeConnection(); sinkCounter.stop(); super.stop(); } private void openConnection() { if (isLocal) { logger.info("Using ElasticSearch AutoDiscovery mode"); openLocalDiscoveryClient(); } else { logger.info("Using ElasticSearch hostnames: {} ", Arrays.toString(serverAddresses)); openClient(); } sinkCounter.incrementConnectionCreatedCount(); } /* * FOR TESTING ONLY... * * Opens a local discovery node for talking to an elasticsearch server running * in the same JVM */ private void openLocalDiscoveryClient() { node = NodeBuilder.nodeBuilder().client(true).local(true).node(); client = node.client(); } private void openClient() { Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", clusterName).build(); TransportClient transport = new TransportClient(settings); for (InetSocketTransportAddress host : serverAddresses) { transport.addTransportAddress(host); } client = transport; } private void closeConnection() { if (client != null) { client.close(); } client = null; if (node != null) { node.close(); } node = null; sinkCounter.incrementConnectionClosedCount(); } }