com.hortonworks.nifi.processors.BulkFetchDistributedMapCache.java Source code

Java tutorial

Introduction

Here is the source code for com.hortonworks.nifi.processors.BulkFetchDistributedMapCache.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hortonworks.nifi.processors;

import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.distributed.cache.client.Deserializer;
import org.apache.nifi.distributed.cache.client.DistributedMapCacheClient;
import org.apache.nifi.distributed.cache.client.Serializer;
import org.apache.nifi.distributed.cache.client.exception.DeserializationException;
import org.apache.nifi.distributed.cache.client.exception.SerializationException;
import org.apache.nifi.expression.AttributeExpression;
import org.apache.nifi.expression.AttributeExpression.ResultType;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.codehaus.jackson.map.ObjectMapper;

import scala.actors.threadpool.Arrays;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

@EventDriven
@SupportsBatching
@Tags({ "map", "cache", "fetch", "distributed" })
@InputRequirement(Requirement.INPUT_REQUIRED)
@CapabilityDescription("Computes a cache key from FlowFile attributes, for each incoming FlowFile, and fetches the value from the Distributed Map Cache associated "
        + "with that key. The incoming FlowFile's content is replaced with the binary data received by the Distributed Map Cache. If there is no value stored "
        + "under that key then the flow file will be routed to 'not-found'. Note that the processor will always attempt to read the entire cached value into "
        + "memory before placing it in it's destination. This could be potentially problematic if the cached value is very large.")
@WritesAttribute(attribute = "user-defined", description = "If the 'Put Cache Value In Attribute' property is set then whatever it is set to "
        + "will become the attribute key and the value would be whatever the response was from the Distributed Map Cache.")
@SeeAlso(classNames = { "org.apache.nifi.distributed.cache.client.DistributedMapCacheClientService",
        "org.apache.nifi.distributed.cache.server.map.DistributedMapCacheServer",
        "org.apache.nifi.processors.standard.PutDistributedMapCache" })
public class BulkFetchDistributedMapCache extends AbstractProcessor {

    public static final PropertyDescriptor PROP_DISTRIBUTED_CACHE_SERVICE = new PropertyDescriptor.Builder()
            .name("Distributed Cache Service")
            .description("The Controller Service that is used to get the cached values.").required(true)
            .identifiesControllerService(DistributedMapCacheClient.class).build();

    public static final PropertyDescriptor PROP_CACHE_ENTRY_IDENTIFIER = new PropertyDescriptor.Builder()
            .name("Cache Entry Identifier")
            .description(
                    "A FlowFile attribute, or the results of an Attribute Expression Language statement, which will be evaluated "
                            + "against a FlowFile in order to determine the value used to identify duplicates; it is this value that is cached")
            .required(true)
            .addValidator(StandardValidators.createAttributeExpressionLanguageValidator(ResultType.STRING, true))
            .defaultValue("${hash.value}").expressionLanguageSupported(true).build();

    public static final PropertyDescriptor PROP_PUT_CACHE_VALUE_IN_ATTRIBUTE = new PropertyDescriptor.Builder()
            .name("Put Cache Value In Attribute")
            .description(
                    "If set, the cache value received will be put into an attribute of the FlowFile instead of a the content of the"
                            + "FlowFile. The attribute key to put to is determined by evaluating value of this property.")
            .addValidator(StandardValidators
                    .createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING))
            .expressionLanguageSupported(true).build();

    public static final PropertyDescriptor PROP_PUT_ATTRIBUTE_MAX_LENGTH = new PropertyDescriptor.Builder()
            .name("Max Length To Put In Attribute")
            .description(
                    "If routing the cache value to an attribute of the FlowFile (by setting the \"Put Cache Value in attribute\" "
                            + "property), the number of characters put to the attribute value will be at most this amount. This is important because "
                            + "attributes are held in memory and large attributes will quickly cause out of memory issues. If the output goes "
                            + "longer than this value, it will be truncated to fit. Consider making this smaller if able.")
            .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR).defaultValue("256").build();

    public static final PropertyDescriptor PROP_CHARACTER_SET = new PropertyDescriptor.Builder()
            .name("Character Set")
            .description(
                    "The Character Set in which the cached value is encoded. This will only be used when routing to an attribute.")
            .required(false).addValidator(StandardValidators.CHARACTER_SET_VALIDATOR).defaultValue("UTF-8").build();

    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
            .description("If the cache was successfully communicated with it will be routed to this relationship")
            .build();
    public static final Relationship REL_NOT_FOUND = new Relationship.Builder().name("not-found").description(
            "If a FlowFile's Cache Entry Identifier was not found in the cache, it will be routed to this relationship")
            .build();
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description(
            "If unable to communicate with the cache or if the cache entry is evaluated to be blank, the FlowFile will be penalized and routed to this relationship")
            .build();
    private final Set<Relationship> relationships;

    private final Serializer<String> keySerializer = new StringSerializer();
    private final Deserializer<byte[]> valueDeserializer = new CacheValueDeserializer();

    public BulkFetchDistributedMapCache() {
        final Set<Relationship> rels = new HashSet<>();
        rels.add(REL_SUCCESS);
        rels.add(REL_NOT_FOUND);
        rels.add(REL_FAILURE);
        relationships = Collections.unmodifiableSet(rels);
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        final List<PropertyDescriptor> descriptors = new ArrayList<>();
        descriptors.add(PROP_CACHE_ENTRY_IDENTIFIER);
        descriptors.add(PROP_DISTRIBUTED_CACHE_SERVICE);
        descriptors.add(PROP_PUT_CACHE_VALUE_IN_ATTRIBUTE);
        descriptors.add(PROP_PUT_ATTRIBUTE_MAX_LENGTH);
        descriptors.add(PROP_CHARACTER_SET);
        return descriptors;
    }

    @Override
    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
        FlowFile flowFile = session.get();
        if (flowFile == null) {
            return;
        }

        final ComponentLog logger = getLogger();
        final String cacheKey = context.getProperty(PROP_CACHE_ENTRY_IDENTIFIER)
                .evaluateAttributeExpressions(flowFile).getValue();
        if (StringUtils.isBlank(cacheKey)) {
            logger.error("FlowFile {} has no attribute for given Cache Entry Identifier",
                    new Object[] { flowFile });
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        final DistributedMapCacheClient cache = context.getProperty(PROP_DISTRIBUTED_CACHE_SERVICE)
                .asControllerService(DistributedMapCacheClient.class);
        String selectedEncoding = context.getProperty(PROP_CHARACTER_SET).getValue();
        try {
            logger.info("********** CacheKey: " + cacheKey);
            String[] keys = cacheKey.split(",");
            logger.info("********** KeysArray: " + Arrays.toString(keys));
            List<Map<String, Object>> results = new ArrayList<Map<String, Object>>();
            for (String key : keys) {
                logger.info("********** In Loop for key: " + key);
                if (cache.containsKey(key, keySerializer)) {
                    Map<String, Object> currentResult = new HashMap<String, Object>();
                    String currentResultJson = new String(cache.get(key, keySerializer, valueDeserializer),
                            selectedEncoding);
                    logger.info("********** In Loop Current Result: " + currentResultJson);
                    currentResult.put(key, new ObjectMapper().readValue(currentResultJson, HashMap.class));
                    results.add(currentResult);
                } else {
                    logger.info("Could not find an entry in cache for {}; ", new Object[] { flowFile });
                }
            }
            logger.info("********** KeyListMaps: " + results);
            String jsonResult = new ObjectMapper().writeValueAsString(results);
            if (jsonResult.isEmpty()) {
                session.transfer(flowFile, REL_NOT_FOUND);
                logger.info("Could not find any of the entries in cache for {}; routing to not-found",
                        new Object[] { flowFile });
            } else {
                boolean putInAttribute = context.getProperty(PROP_PUT_CACHE_VALUE_IN_ATTRIBUTE).isSet();
                if (putInAttribute) {
                    String attributeName = context.getProperty(PROP_PUT_CACHE_VALUE_IN_ATTRIBUTE)
                            .evaluateAttributeExpressions(flowFile).getValue();
                    String attributeValue = jsonResult;

                    int maxLength = context.getProperty(PROP_PUT_ATTRIBUTE_MAX_LENGTH).asInteger();
                    if (maxLength < attributeValue.length()) {
                        attributeValue = attributeValue.substring(0, maxLength);
                    }

                    flowFile = session.putAttribute(flowFile, attributeName, attributeValue);
                } else {
                    flowFile = session.write(flowFile, new OutputStreamCallback() {
                        @Override
                        public void process(OutputStream out) throws IOException {
                            out.write(Byte.parseByte(jsonResult));
                        }
                    });
                }

                session.transfer(flowFile, REL_SUCCESS);
                if (putInAttribute) {
                    logger.info("Found a cache key of {} and added an attribute to {} with it's value.",
                            new Object[] { cacheKey, flowFile });
                } else {
                    logger.info("Found a cache key of {} and replaced the contents of {} with it's value.",
                            new Object[] { cacheKey, flowFile });
                }
            }
        } catch (final IOException e) {
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_FAILURE);
            logger.error("Unable to communicate with cache when processing {} due to {}",
                    new Object[] { flowFile, e });
        }
    }

    public static class CacheValueDeserializer implements Deserializer<byte[]> {

        @Override
        public byte[] deserialize(final byte[] input) throws DeserializationException, IOException {
            if (input == null || input.length == 0) {
                return null;
            }
            return input;
        }
    }

    public static class StringSerializer implements Serializer<String> {

        @Override
        public void serialize(final String value, final OutputStream out)
                throws SerializationException, IOException {
            out.write(value.getBytes(StandardCharsets.UTF_8));
        }
    }

}