com.thinkbiganalytics.nifi.v2.elasticsearch.IndexElasticSearch.java Source code

Java tutorial

Introduction

Here is the source code for com.thinkbiganalytics.nifi.v2.elasticsearch.IndexElasticSearch.java

Source

package com.thinkbiganalytics.nifi.v2.elasticsearch;

/*-
 * #%L
 * thinkbig-nifi-elasticsearch-processors
 * %%
 * Copyright (C) 2017 ThinkBig Analytics
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.thinkbiganalytics.hashing.HashingUtil;
import com.thinkbiganalytics.nifi.processor.AbstractNiFiProcessor;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;

import java.net.InetAddress;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;

/**
 * This processor indexes json data in elasticsearch
 */
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({ "elasticsearch", "thinkbig" })
@CapabilityDescription("Write FlowFile from a JSON array to Elasticsearch (V2)")
public class IndexElasticSearch extends AbstractNiFiProcessor {

    /**
     * Success Relationship for JSON objects that are successfully indexed in elasticsearch
     */
    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success").description(
            "Json objects that are successfully indexed in elasticsearch are transferred to this relationship")
            .build();

    /**
     * Failure Relationship for JSON objects that are fail to index in elasticsearch
     */
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description(
            "Json objects that are un-successfully indexed in elasticsearch are transferred to this relationship")
            .build();

    /**
     * Property for the name of the index
     */
    public static final PropertyDescriptor INDEX_NAME = new PropertyDescriptor.Builder().name("IndexName")
            .description("The name of the index").required(true)
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true).build();

    /**
     * Property for the index type
     */
    public static final PropertyDescriptor TYPE = new PropertyDescriptor.Builder().name("Type")
            .description("Elasticsearch type").required(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .expressionLanguageSupported(true).build();

    /**
     * Property for the elastic search host name
     */
    public static final PropertyDescriptor HOST_NAME = new PropertyDescriptor.Builder().name("HostName")
            .description("Elasticsearch host").required(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .expressionLanguageSupported(true).build();

    /**
     * Property for the elastic search cluster name
     */
    public static final PropertyDescriptor CLUSTER_NAME = new PropertyDescriptor.Builder().name("ClusterName")
            .description("Elasticsearch cluster").required(true)
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true).build();

    /**
     * Property for the id to use for indexing into elasticsearch.
     */
    public static final PropertyDescriptor ID_FIELD = new PropertyDescriptor.Builder().name("IdField").description(
            "Id that you want to use for indexing into elasticsearch. If it is empty then a UUID will be generated")
            .required(false).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true)
            .build();

    /**
     * Property for Kylo category name
     */
    public static final PropertyDescriptor CATEGORY_NAME = new PropertyDescriptor.Builder().name("KyloCategory")
            .description("Kylo category system name for data to be indexed").required(true)
            .defaultValue("${category}").addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .expressionLanguageSupported(true).build();

    /**
     * Property for Kylo feed name
     */
    public static final PropertyDescriptor FEED_NAME = new PropertyDescriptor.Builder().name("KyloFeed")
            .description("Kylo feed system name for data to be indexed").required(true).defaultValue("${feed}")
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true).build();

    private final Set<Relationship> relationships;
    private final List<PropertyDescriptor> propDescriptors;

    /**
     * default constructor constructs the relationship and property collections
     */
    public IndexElasticSearch() {
        final Set<Relationship> r = new HashSet<>();
        r.add(REL_SUCCESS);
        r.add(REL_FAILURE);
        relationships = Collections.unmodifiableSet(r);

        final List<PropertyDescriptor> pds = new ArrayList<>();
        pds.add(INDEX_NAME);
        pds.add(TYPE);
        pds.add(HOST_NAME);
        pds.add(CLUSTER_NAME);
        pds.add(ID_FIELD);
        pds.add(CATEGORY_NAME);
        pds.add(FEED_NAME);
        propDescriptors = Collections.unmodifiableList(pds);
    }

    @Override
    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return propDescriptors;
    }

    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
        final ComponentLog logger = getLog();
        FlowFile flowFile = session.get();
        if (flowFile == null) {
            return;
        }
        try {
            String indexName = context.getProperty(INDEX_NAME).evaluateAttributeExpressions(flowFile).getValue();
            String type = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
            String hostName = context.getProperty(HOST_NAME).evaluateAttributeExpressions(flowFile).getValue();
            String clusterName = context.getProperty(CLUSTER_NAME).evaluateAttributeExpressions(flowFile)
                    .getValue();
            String idField = context.getProperty(ID_FIELD).evaluateAttributeExpressions(flowFile).getValue();
            String categoryName = context.getProperty(CATEGORY_NAME).evaluateAttributeExpressions(flowFile)
                    .getValue();
            String feedName = context.getProperty(FEED_NAME).evaluateAttributeExpressions(flowFile).getValue();

            final StringBuffer sb = new StringBuffer();
            session.read(flowFile, in -> sb.append(IOUtils.toString(in, Charset.defaultCharset())));

            logger.debug("The json that was received is: " + sb.toString());

            boolean success = sendToElasticSearch(sb.toString(), hostName, indexName, type, clusterName, idField,
                    categoryName, feedName);

            if (!success) {
                logger.info("*** Completed with failed status");
                session.transfer(flowFile, REL_FAILURE);
            } else {
                logger.info("*** Completed with success status");
                session.transfer(flowFile, REL_SUCCESS);
            }
        } catch (final Exception e) {
            logger.error("Unable to execute Elasticsearch job", new Object[] { flowFile, e });
            session.transfer(flowFile, REL_FAILURE);
        }

    }

    private boolean sendToElasticSearch(String json, String hostName, String index, String type, String clusterName,
            String idField, String categoryName, String feedName) throws Exception {
        final ComponentLog logger = getLog();
        Settings settings = Settings.settingsBuilder().put("cluster.name", clusterName).build();
        Client client = TransportClient.builder().settings(settings).build()
                .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostName), 9300));

        JSONArray array = new JSONArray(json);
        BulkRequestBuilder bulkRequest = client.prepareBulk();

        for (int i = 0; i < array.length(); i++) {
            JSONObject jsonObj = array.getJSONObject(i);
            String id = null;

            if (idField != null && idField.length() > 0) {
                id = jsonObj.getString(idField);
                logger.debug("Document index id using field " + idField + ": " + id);
            } else if (StringUtils.isNotEmpty(categoryName) && (StringUtils.isNotEmpty(feedName))) {
                String hash = HashingUtil.getHashMD5(jsonObj.toString());
                if (StringUtils.isNotEmpty(hash)) {
                    id = categoryName + "::" + feedName + "::" + hash;
                    logger.debug("Document index id using hash: " + id);
                }
            }

            if (StringUtils.isEmpty(id)) {
                id = UUID.randomUUID().toString();
                logger.debug("Document index id auto-generated + " + id);
            }

            jsonObj.put("post_date", String.valueOf(System.currentTimeMillis()));
            bulkRequest.add(client.prepareIndex(index, type, id).setSource(jsonObj.toString()));
        }
        BulkResponse bulkResponse = bulkRequest.get();
        if (bulkResponse.hasFailures()) {
            logger.error("Error occurred while batch updating" + bulkResponse.buildFailureMessage());
            return false;
        }
        return true;
    }
}