org.msec.sink.es.ESSink.java Source code

Java tutorial

Introduction

Here is the source code for org.msec.sink.es.ESSink.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.msec.sink.es;

import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.lang.StringUtils;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.apache.flume.formatter.output.PathManager;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.map.ObjectMapper;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.apache.flume.formatter.output.PathManagerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;
import java.util.concurrent.*;
import java.lang.String;
import java.text.SimpleDateFormat;

public class ESSink extends AbstractSink implements Configurable {

    private Logger LOG = LoggerFactory.getLogger(ESSink.class);
    public static final int DEFAULT_PORT = 9300;

    //members for elasticsearch config
    private String clusterName = "testCluster";
    private String indexName = "flume";
    private String indexType = "logs";
    private int batchSize;
    private String indexRollingTime = "1day";

    private Map<String, Integer> lastIndexRollingMinute = new HashMap<String, Integer>();
    private Map<String, String> lastIndexRollingName = new HashMap<String, String>();

    private String currentIndexName = "";
    private String[] serverAddressStrings = null;

    private InetSocketTransportAddress[] serverAddresses;

    private int bulkNum;
    private int totalCount = 0;
    private long timeStart = 0;
    private int maxContentLength;

    //private OutputStream outputStream;
    private ScheduledExecutorService rollService;
    private PathManager pathController;
    private volatile boolean bulkTimeout;

    private static ExecutorService threadPool = Executors.newCachedThreadPool();
    private static BlockingQueue<ESClientThread.ESThreadRequest> workingQueue;
    ESClientThread.ESThreadRequest esThreadRequest = new ESClientThread.ESThreadRequest();

    public ESSink() {
        LOG.info("ESSink constructed...");
        bulkTimeout = false;
        bulkNum = 1;
        timeStart = System.currentTimeMillis();
    }

    @Override
    public void configure(Context context) {
        pathController = PathManagerFactory.getInstance("DEFAULT", context);

        if (StringUtils.isNotBlank(context.getString("hosts"))) {
            serverAddressStrings = StringUtils.deleteWhitespace(context.getString("hosts")).split(",");
        }
        Preconditions.checkState(serverAddressStrings != null && serverAddressStrings.length > 0,
                "Missing Param:" + "hosts");

        if (StringUtils.isNotBlank(context.getString("indexName"))) {
            this.indexName = context.getString("indexName");
        }

        if (StringUtils.isNotBlank(context.getString("indexType"))) {
            this.indexType = context.getString("indexType");
        }

        if (StringUtils.isNotBlank(context.getString("clusterName"))) {
            this.clusterName = context.getString("clusterName");
        }

        if (StringUtils.isNotBlank(context.getString("indexRollingTime"))) {
            this.indexRollingTime = context.getString("indexRollingTime");
        }

        bulkNum = context.getInteger("bulkNum", 1);

        batchSize = context.getInteger("batchSize", 100);
        Preconditions.checkNotNull(batchSize > 0, "batchSize must be a positive number!!");

        maxContentLength = context.getInteger("maxContentLength", 1000);
        Preconditions.checkNotNull(maxContentLength > 0, "maxContentLength must be a positive number!!");
    }

    @Override
    public void start() {
        super.start();

        initESThreadPool();

        rollService = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder()
                .setNameFormat("ESSink-Bulk-Timer" + Thread.currentThread().getId() + "-%d").build());

        rollService.scheduleAtFixedRate(new Runnable() {
            @Override
            public void run() {
                //LOG.debug("Marking time to bulk");
                bulkTimeout = true;
            }
        }, 3, 3, TimeUnit.SECONDS);

        LOG.info("ESSink {} started.", getName());
    }

    @Override
    public void stop() {
        super.stop();

        rollService.shutdown();
    }

    String getCurrentIndexName(String serviceName) {
        Calendar cal = Calendar.getInstance();
        if (lastIndexRollingMinute.get(serviceName) != null && lastIndexRollingMinute.get(serviceName)
                .intValue() == (int) (System.currentTimeMillis() / 60000)) {
            //udpate indexType every minute
            return lastIndexRollingName.get(serviceName);
        }

        String currentIndexPrefix = "msec_" + serviceName;
        int splitNum = 1;
        String splitUnit = "day";
        int pos = 0;
        while (pos < indexRollingTime.length() && Character.isDigit(indexRollingTime.charAt(pos)))
            ++pos;

        if (pos == 0) {
            splitNum = 1;
        } else if (pos < indexRollingTime.length()) {
            splitNum = Integer.valueOf(indexRollingTime.substring(0, pos).trim());
            splitUnit = indexRollingTime.substring(pos).trim();
        } else {
            splitNum = Integer.valueOf(indexRollingTime.trim());
        }
        if (splitNum == 0)
            splitNum = 1;

        int month = cal.get(Calendar.MONTH) + 1;
        if (splitUnit.compareToIgnoreCase("day") == 0) {
            int dayOfMonth = cal.get(Calendar.DAY_OF_MONTH);
            if (dayOfMonth % splitNum == 0 || currentIndexName.isEmpty())
                currentIndexName = currentIndexPrefix + String.format("%02d%02d", month, dayOfMonth);
        } else if (splitUnit.compareToIgnoreCase("hour") == 0) {
            int dayOfMonth = cal.get(Calendar.DAY_OF_MONTH);
            int hourOfDay = cal.get(Calendar.HOUR_OF_DAY);
            if (hourOfDay % splitNum == 0 || currentIndexName.isEmpty())
                currentIndexName = currentIndexPrefix + String.format("%02d%02d%02d", month, dayOfMonth, hourOfDay);
        } else if (splitUnit.compareToIgnoreCase("min") == 0) {
            int dayOfMonth = cal.get(Calendar.DAY_OF_MONTH);
            int hourOfDay = cal.get(Calendar.HOUR_OF_DAY);
            int minute = cal.get(Calendar.MINUTE);
            if (hourOfDay % splitNum == 0 || currentIndexName.isEmpty())
                currentIndexName = currentIndexPrefix
                        + String.format("%02d%02d%02d%02d", month, dayOfMonth, hourOfDay, minute);
        }

        lastIndexRollingMinute.put(serviceName, (int) (System.currentTimeMillis() / 60000));
        lastIndexRollingName.put(serviceName, currentIndexName);
        return currentIndexName;
    }

    private void initESThreadPool() {
        serverAddresses = new InetSocketTransportAddress[serverAddressStrings.length];
        workingQueue = new ArrayBlockingQueue<ESClientThread.ESThreadRequest>(serverAddressStrings.length * 2);

        for (int i = 0; i < serverAddressStrings.length; i++) {
            String[] hostPort = serverAddressStrings[i].trim().split(":");
            String host = hostPort[0].trim();
            int port = hostPort.length == 2 ? Integer.parseInt(hostPort[1].trim()) : DEFAULT_PORT;
            try {
                serverAddresses[i] = new InetSocketTransportAddress(InetAddress.getByName(host), port);

                threadPool.submit(new ESClientThread(workingQueue, clusterName, serverAddresses[i]));
            } catch (UnknownHostException e) {
                e.printStackTrace();
            }
        }
    }

    public void submitESRequest(ESClientThread.ESThreadRequest request) {
        try {
            workingQueue.put(request);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        totalCount += request.sourceList.size() - 1;
        if (totalCount > 20000) {
            SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            System.out.println(df.format(new Date()) + " totalCount: " + totalCount + " cost: "
                    + (System.currentTimeMillis() - timeStart) + " qps: "
                    + totalCount * 1000 / (System.currentTimeMillis() - timeStart));
            timeStart = System.currentTimeMillis();
            totalCount = 0;
        }
    }

    private void doSerialize(Event event) throws IOException {
        Map<String, String> headers = event.getHeaders();
        String content = null;
        String serviceName = "";

        if (!headers.containsKey("InsTime")) {
            long insTime = System.currentTimeMillis();
            headers.put("InsTime", String.valueOf(insTime));
        }

        if (headers.containsKey("ServiceName")) {
            serviceName = headers.get("ServiceName");
            int pos = serviceName.indexOf(".");
            if (pos > 0) {
                serviceName = serviceName.substring(0, pos);
            }
        }

        ObjectMapper objectMapper = new ObjectMapper();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        JsonGenerator jgen = null;
        try {
            jgen = objectMapper.getJsonFactory().createJsonGenerator(baos, JsonEncoding.UTF8);

            jgen.writeStartObject();
            for (String headerKey : headers.keySet()) {
                String headerValue = headers.get(headerKey);
                if (headerValue != null && !headerValue.isEmpty()) {
                    jgen.writeStringField(headerKey, headerValue);
                }
            }

            content = new String(event.getBody());
            content = content.replace('\t', ' ').replace('\n', ' ');
            if (content.length() > maxContentLength) {
                content = content.substring(0, maxContentLength - 15) + "<..truncated..>";
            }
            jgen.writeStringField("body", content);
            jgen.writeEndObject();
            jgen.flush();
            jgen = null;

            //outputStream.write((baos.toString() + "\n").getBytes());
            String curIndexName = getCurrentIndexName(serviceName.toLowerCase());
            //LOG.info("index: (" + curIndexName + "," + indexType + ") source: " + baos.toString());

            esThreadRequest.sourceList.add(baos.toString());
            esThreadRequest.indexNameList.add(curIndexName);
            esThreadRequest.indexTypeList.add(indexType);
            if (esThreadRequest.sourceList.size() >= bulkNum) {
                submitESRequest(esThreadRequest);
                esThreadRequest = new ESClientThread.ESThreadRequest();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        LOG.info("ES sink process: " + content + " " + headers.get("IP") + " " + headers.get("Level") + " "
                + headers.get("RPCName"));
    }

    @Override
    public Status process() throws EventDeliveryException {
        Status result = Status.READY;
        Channel channel = getChannel();
        Transaction transaction = channel.getTransaction();
        Event event;

        if (bulkTimeout) {
            bulkTimeout = false;

            if (!esThreadRequest.sourceList.isEmpty()) {
                LOG.info("ES bulk timeout");
                submitESRequest(esThreadRequest);
                esThreadRequest = new ESClientThread.ESThreadRequest();
            }
        }

        try {
            transaction.begin();
            for (int i = 0; i < batchSize; i++) {
                event = channel.take();
                if (event != null) {
                    doSerialize(event);
                } else {
                    // No events found, request back-off semantics from runner
                    result = Status.BACKOFF;
                    break;
                }
            }

            transaction.commit();
        } catch (Exception ex) {
            transaction.rollback();
            throw new EventDeliveryException("Failed to process transaction", ex);
        } finally {
            transaction.close();
        }

        return result;
    }

}