org.apache.nifi.processors.standard.Wait.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nifi.processors.standard.Wait.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nifi.processors.standard;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyValue;
import org.apache.nifi.distributed.cache.client.AtomicDistributedMapCacheClient;
import org.apache.nifi.expression.AttributeExpression.ResultType;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.FlowFileFilter;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.standard.WaitNotifyProtocol.Signal;

import static org.apache.nifi.processor.FlowFileFilter.FlowFileFilterResult.ACCEPT_AND_CONTINUE;
import static org.apache.nifi.processor.FlowFileFilter.FlowFileFilterResult.ACCEPT_AND_TERMINATE;
import static org.apache.nifi.processor.FlowFileFilter.FlowFileFilterResult.REJECT_AND_CONTINUE;

@EventDriven
@SupportsBatching
@Tags({ "map", "cache", "wait", "hold", "distributed", "signal", "release" })
@InputRequirement(Requirement.INPUT_REQUIRED)
@CapabilityDescription("Routes incoming FlowFiles to the 'wait' relationship until a matching release signal "
        + "is stored in the distributed cache from a corresponding Notify processor. "
        + "When a matching release signal is identified, a waiting FlowFile is routed to the 'success' relationship, "
        + "with attributes copied from the FlowFile that produced the release signal from the Notify processor.  "
        + "The release signal entry is then removed from the cache. Waiting FlowFiles will be routed to 'expired' if they exceed the Expiration Duration. "

        + "If you need to wait for more than one signal, specify the desired number of signals via the 'Target Signal Count' property. "
        + "This is particularly useful with processors that split a source FlowFile into multiple fragments, such as SplitText. "
        + "In order to wait for all fragments to be processed, connect the 'original' relationship to a Wait processor, and the 'splits' relationship to "
        + "a corresponding Notify processor. Configure the Notify and Wait processors to use the '${fragment.identifier}' as the value "
        + "of 'Release Signal Identifier', and specify '${fragment.count}' as the value of 'Target Signal Count' in the Wait processor.")
@WritesAttributes({
        @WritesAttribute(attribute = "wait.start.timestamp", description = "All FlowFiles will have an attribute 'wait.start.timestamp', which sets the "
                + "initial epoch timestamp when the file first entered this processor.  This is used to determine the expiration time of the FlowFile."),
        @WritesAttribute(attribute = "wait.counter.<counterName>", description = "If a signal exists when the processor runs, "
                + "each count value in the signal is copied.") })
@SeeAlso(classNames = { "org.apache.nifi.distributed.cache.client.DistributedMapCacheClientService",
        "org.apache.nifi.distributed.cache.server.map.DistributedMapCacheServer",
        "org.apache.nifi.processors.standard.Notify" })
public class Wait extends AbstractProcessor {

    public static final String WAIT_START_TIMESTAMP = "wait.start.timestamp";

    // Identifies the distributed map cache client
    public static final PropertyDescriptor DISTRIBUTED_CACHE_SERVICE = new PropertyDescriptor.Builder()
            .name("distributed-cache-service").displayName("Distributed Cache Service")
            .description(
                    "The Controller Service that is used to check for release signals from a corresponding Notify processor")
            .required(true).identifiesControllerService(AtomicDistributedMapCacheClient.class).build();

    // Selects the FlowFile attribute or expression, whose value is used as cache key
    public static final PropertyDescriptor RELEASE_SIGNAL_IDENTIFIER = new PropertyDescriptor.Builder()
            .name("release-signal-id").displayName("Release Signal Identifier")
            .description("A value, or the results of an Attribute Expression Language statement, which will "
                    + "be evaluated against a FlowFile in order to determine the release signal cache key")
            .required(true)
            .addValidator(StandardValidators.createAttributeExpressionLanguageValidator(ResultType.STRING, true))
            .expressionLanguageSupported(true).build();

    public static final PropertyDescriptor TARGET_SIGNAL_COUNT = new PropertyDescriptor.Builder()
            .name("target-signal-count").displayName("Target Signal Count")
            .description("A value, or the results of an Attribute Expression Language statement, which will "
                    + "be evaluated against a FlowFile in order to determine the target signal count. "
                    + "This processor checks whether the signal count has reached this number. "
                    + "If Signal Counter Name is specified, this processor checks a particular counter, "
                    + "otherwise checks against total count in a signal.")
            .required(true).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
            .expressionLanguageSupported(true).defaultValue("1").build();

    public static final PropertyDescriptor SIGNAL_COUNTER_NAME = new PropertyDescriptor.Builder()
            .name("signal-counter-name").displayName("Signal Counter Name")
            .description("A value, or the results of an Attribute Expression Language statement, which will "
                    + "be evaluated against a FlowFile in order to determine the signal counter name. "
                    + "If not specified, this processor checks the total count in a signal.")
            .required(false)
            .addValidator(StandardValidators.createAttributeExpressionLanguageValidator(ResultType.STRING, true))
            .expressionLanguageSupported(true).build();

    public static final PropertyDescriptor WAIT_BUFFER_COUNT = new PropertyDescriptor.Builder()
            .name("wait-buffer-count").displayName("Wait Buffer Count")
            .description(
                    "Specify the maximum number of incoming FlowFiles that can be buffered to check whether it can move forward. "
                            + "The more buffer can provide the better performance, as it reduces the number of interactions with cache service "
                            + "by grouping FlowFiles by signal identifier. "
                            + "Only a signal identifier can be processed at a processor execution.")
            .required(true).addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR).defaultValue("1").build();

    public static final PropertyDescriptor RELEASABLE_FLOWFILE_COUNT = new PropertyDescriptor.Builder()
            .name("releasable-flowfile-count").displayName("Releasable FlowFile Count")
            .description("A value, or the results of an Attribute Expression Language statement, which will "
                    + "be evaluated against a FlowFile in order to determine the releasable FlowFile count. "
                    + "This specifies how many FlowFiles can be released when a target count reaches target signal count. "
                    + "Zero (0) has a special meaning, any number of FlowFiles can be released as long as signal count matches target.")
            .required(true).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
            .expressionLanguageSupported(true).defaultValue("1").build();

    // Selects the FlowFile attribute or expression, whose value is used as cache key
    public static final PropertyDescriptor EXPIRATION_DURATION = new PropertyDescriptor.Builder()
            .name("expiration-duration").displayName("Expiration Duration")
            .description(
                    "Indicates the duration after which waiting FlowFiles will be routed to the 'expired' relationship")
            .required(true).defaultValue("10 min").addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
            .expressionLanguageSupported(false).build();

    public static final AllowableValue ATTRIBUTE_COPY_REPLACE = new AllowableValue("replace", "Replace if present",
            "When cached attributes are copied onto released FlowFiles, they replace any matching attributes.");

    public static final AllowableValue ATTRIBUTE_COPY_KEEP_ORIGINAL = new AllowableValue("keeporiginal",
            "Keep original", "Attributes on released FlowFiles are not overwritten by copied cached attributes.");

    public static final PropertyDescriptor ATTRIBUTE_COPY_MODE = new PropertyDescriptor.Builder()
            .name("attribute-copy-mode").displayName("Attribute Copy Mode")
            .description("Specifies how to handle attributes copied from FlowFiles entering the Notify processor")
            .defaultValue(ATTRIBUTE_COPY_KEEP_ORIGINAL.getValue()).required(true)
            .allowableValues(ATTRIBUTE_COPY_REPLACE, ATTRIBUTE_COPY_KEEP_ORIGINAL)
            .expressionLanguageSupported(false).build();

    public static final AllowableValue WAIT_MODE_TRANSFER_TO_WAIT = new AllowableValue("wait",
            "Transfer to wait relationship",
            "Transfer a FlowFile to the 'wait' relationship when whose release signal has not been notified yet."
                    + " This mode allows other incoming FlowFiles to be enqueued by moving FlowFiles into the wait relationship.");

    public static final AllowableValue WAIT_MODE_KEEP_IN_UPSTREAM = new AllowableValue("keep",
            "Keep in the upstream connection",
            "Transfer a FlowFile to the upstream connection where it comes from when whose release signal has not been notified yet."
                    + " This mode helps keeping upstream connection being full so that the upstream source processor"
                    + " will not be scheduled while back-pressure is active and limit incoming FlowFiles. ");

    public static final PropertyDescriptor WAIT_MODE = new PropertyDescriptor.Builder().name("wait-mode")
            .displayName("Wait Mode").description("Specifies how to handle a FlowFile waiting for a notify signal")
            .defaultValue(WAIT_MODE_TRANSFER_TO_WAIT.getValue()).required(true)
            .allowableValues(WAIT_MODE_TRANSFER_TO_WAIT, WAIT_MODE_KEEP_IN_UPSTREAM)
            .expressionLanguageSupported(false).build();

    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
            .description(
                    "A FlowFile with a matching release signal in the cache will be routed to this relationship")
            .build();

    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description(
            "When the cache cannot be reached, or if the Release Signal Identifier evaluates to null or empty, FlowFiles will be routed to this relationship")
            .build();

    public static final Relationship REL_WAIT = new Relationship.Builder().name("wait")
            .description(
                    "A FlowFile with no matching release signal in the cache will be routed to this relationship")
            .build();

    public static final Relationship REL_EXPIRED = new Relationship.Builder().name("expired").description(
            "A FlowFile that has exceeded the configured Expiration Duration will be routed to this relationship")
            .build();

    private final Set<Relationship> relationships;

    public Wait() {
        final Set<Relationship> rels = new HashSet<>();
        rels.add(REL_SUCCESS);
        rels.add(REL_WAIT);
        rels.add(REL_EXPIRED);
        rels.add(REL_FAILURE);
        relationships = Collections.unmodifiableSet(rels);
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        final List<PropertyDescriptor> descriptors = new ArrayList<>();
        descriptors.add(RELEASE_SIGNAL_IDENTIFIER);
        descriptors.add(TARGET_SIGNAL_COUNT);
        descriptors.add(SIGNAL_COUNTER_NAME);
        descriptors.add(WAIT_BUFFER_COUNT);
        descriptors.add(RELEASABLE_FLOWFILE_COUNT);
        descriptors.add(EXPIRATION_DURATION);
        descriptors.add(DISTRIBUTED_CACHE_SERVICE);
        descriptors.add(ATTRIBUTE_COPY_MODE);
        descriptors.add(WAIT_MODE);
        return descriptors;
    }

    @Override
    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {

        final ComponentLog logger = getLogger();

        // Signal id is computed from attribute 'RELEASE_SIGNAL_IDENTIFIER' with expression language support
        final PropertyValue signalIdProperty = context.getProperty(RELEASE_SIGNAL_IDENTIFIER);
        final Integer bufferCount = context.getProperty(WAIT_BUFFER_COUNT).asInteger();

        final Map<Relationship, List<FlowFile>> processedFlowFiles = new HashMap<>();
        final Function<Relationship, List<FlowFile>> getFlowFilesFor = r -> processedFlowFiles.computeIfAbsent(r,
                k -> new ArrayList<>());

        final AtomicReference<String> targetSignalId = new AtomicReference<>();
        final AtomicInteger bufferedCount = new AtomicInteger(0);
        final List<FlowFile> failedFilteringFlowFiles = new ArrayList<>();
        final Supplier<FlowFileFilter.FlowFileFilterResult> acceptResultSupplier = () -> bufferedCount
                .incrementAndGet() == bufferCount ? ACCEPT_AND_TERMINATE : ACCEPT_AND_CONTINUE;
        final List<FlowFile> flowFiles = session.get(f -> {

            final String fSignalId = signalIdProperty.evaluateAttributeExpressions(f).getValue();

            // if the computed value is null, or empty, we transfer the FlowFile to failure relationship
            if (StringUtils.isBlank(fSignalId)) {
                // We can't penalize f before getting it from session, so keep it in a temporal list.
                logger.error("FlowFile {} has no attribute for given Release Signal Identifier",
                        new Object[] { f });
                failedFilteringFlowFiles.add(f);
                return ACCEPT_AND_CONTINUE;
            }

            final String targetSignalIdStr = targetSignalId.get();
            if (targetSignalIdStr == null) {
                // This is the first one.
                targetSignalId.set(fSignalId);
                return acceptResultSupplier.get();
            }

            if (targetSignalIdStr.equals(fSignalId)) {
                return acceptResultSupplier.get();
            }

            return REJECT_AND_CONTINUE;

        });

        final String attributeCopyMode = context.getProperty(ATTRIBUTE_COPY_MODE).getValue();
        final boolean replaceOriginalAttributes = ATTRIBUTE_COPY_REPLACE.getValue().equals(attributeCopyMode);
        final AtomicReference<Signal> signalRef = new AtomicReference<>();

        final Consumer<FlowFile> transferToFailure = flowFile -> {
            flowFile = session.penalize(flowFile);
            getFlowFilesFor.apply(REL_FAILURE).add(flowFile);
        };

        final Consumer<Entry<Relationship, List<FlowFile>>> transferFlowFiles = routedFlowFiles -> {
            Relationship relationship = routedFlowFiles.getKey();

            if (REL_WAIT.equals(relationship)) {
                final String waitMode = context.getProperty(WAIT_MODE).getValue();

                if (WAIT_MODE_KEEP_IN_UPSTREAM.getValue().equals(waitMode)) {
                    // Transfer to self.
                    relationship = Relationship.SELF;
                }
            }

            final List<FlowFile> flowFilesWithSignalAttributes = routedFlowFiles.getValue().stream()
                    .map(f -> copySignalAttributes(session, f, signalRef.get(), replaceOriginalAttributes))
                    .collect(Collectors.toList());
            session.transfer(flowFilesWithSignalAttributes, relationship);
        };

        failedFilteringFlowFiles.forEach(f -> {
            flowFiles.remove(f);
            transferToFailure.accept(f);
        });

        if (flowFiles.isEmpty()) {
            // If there was nothing but failed FlowFiles while filtering, transfer those and end immediately.
            processedFlowFiles.entrySet().forEach(transferFlowFiles);
            return;
        }

        // the cache client used to interact with the distributed cache
        final AtomicDistributedMapCacheClient cache = context.getProperty(DISTRIBUTED_CACHE_SERVICE)
                .asControllerService(AtomicDistributedMapCacheClient.class);
        final WaitNotifyProtocol protocol = new WaitNotifyProtocol(cache);

        final String signalId = targetSignalId.get();
        final Signal signal;

        // get notifying signal
        try {
            signal = protocol.getSignal(signalId);
            signalRef.set(signal);
        } catch (final IOException e) {
            throw new ProcessException(String.format("Failed to get signal for %s due to %s", signalId, e), e);
        }

        String targetCounterName = null;
        long targetCount = 1;
        int releasableFlowFileCount = 1;

        final List<FlowFile> candidates = new ArrayList<>();

        for (FlowFile flowFile : flowFiles) {
            // Set wait start timestamp if it's not set yet
            String waitStartTimestamp = flowFile.getAttribute(WAIT_START_TIMESTAMP);
            if (waitStartTimestamp == null) {
                waitStartTimestamp = String.valueOf(System.currentTimeMillis());
                flowFile = session.putAttribute(flowFile, WAIT_START_TIMESTAMP, waitStartTimestamp);
            }

            long lWaitStartTimestamp;
            try {
                lWaitStartTimestamp = Long.parseLong(waitStartTimestamp);
            } catch (NumberFormatException nfe) {
                logger.error("{} has an invalid value '{}' on FlowFile {}",
                        new Object[] { WAIT_START_TIMESTAMP, waitStartTimestamp, flowFile });
                transferToFailure.accept(flowFile);
                continue;
            }

            // check for expiration
            long expirationDuration = context.getProperty(EXPIRATION_DURATION).asTimePeriod(TimeUnit.MILLISECONDS);
            long now = System.currentTimeMillis();
            if (now > (lWaitStartTimestamp + expirationDuration)) {
                logger.info("FlowFile {} expired after {}ms",
                        new Object[] { flowFile, (now - lWaitStartTimestamp) });
                getFlowFilesFor.apply(REL_EXPIRED).add(flowFile);
                continue;
            }

            // If there's no signal yet, then we don't have to evaluate target counts. Return immediately.
            if (signal == null) {
                if (logger.isDebugEnabled()) {
                    logger.debug("No release signal found for {} on FlowFile {} yet",
                            new Object[] { signalId, flowFile });
                }
                getFlowFilesFor.apply(REL_WAIT).add(flowFile);
                continue;
            }

            // Fix target counter name and count from current FlowFile, if those are not set yet.
            if (candidates.isEmpty()) {
                targetCounterName = context.getProperty(SIGNAL_COUNTER_NAME).evaluateAttributeExpressions(flowFile)
                        .getValue();
                try {
                    targetCount = Long.valueOf(context.getProperty(TARGET_SIGNAL_COUNT)
                            .evaluateAttributeExpressions(flowFile).getValue());
                } catch (final NumberFormatException e) {
                    transferToFailure.accept(flowFile);
                    logger.error("Failed to parse targetCount when processing {} due to {}",
                            new Object[] { flowFile, e }, e);
                    continue;
                }
                try {
                    releasableFlowFileCount = Integer.valueOf(context.getProperty(RELEASABLE_FLOWFILE_COUNT)
                            .evaluateAttributeExpressions(flowFile).getValue());
                } catch (final NumberFormatException e) {
                    transferToFailure.accept(flowFile);
                    logger.error("Failed to parse releasableFlowFileCount when processing {} due to {}",
                            new Object[] { flowFile, e }, e);
                    continue;
                }
            }

            // FlowFile is now validated and added to candidates.
            candidates.add(flowFile);
        }

        boolean waitCompleted = false;
        boolean waitProgressed = false;
        if (signal != null && !candidates.isEmpty()) {

            if (releasableFlowFileCount > 1) {
                signal.releaseCandidatese(targetCounterName, targetCount, releasableFlowFileCount, candidates,
                        released -> getFlowFilesFor.apply(REL_SUCCESS).addAll(released),
                        waiting -> getFlowFilesFor.apply(REL_WAIT).addAll(waiting));
                waitProgressed = !getFlowFilesFor.apply(REL_SUCCESS).isEmpty();

            } else {
                // releasableFlowFileCount = 0 or 1
                boolean reachedTargetCount = StringUtils.isBlank(targetCounterName)
                        ? signal.isTotalCountReached(targetCount)
                        : signal.isCountReached(targetCounterName, targetCount);

                if (reachedTargetCount) {
                    if (releasableFlowFileCount == 0) {
                        getFlowFilesFor.apply(REL_SUCCESS).addAll(candidates);
                    } else {
                        // releasableFlowFileCount = 1
                        getFlowFilesFor.apply(REL_SUCCESS).add(candidates.remove(0));
                        getFlowFilesFor.apply(REL_WAIT).addAll(candidates);
                        // If releasableFlowFileCount == 0, leave signal as it is,
                        // so that any number of FlowFile can be released as long as target count condition matches.
                        waitCompleted = true;
                    }
                } else {
                    getFlowFilesFor.apply(REL_WAIT).addAll(candidates);
                }
            }
        }

        // Transfer FlowFiles.
        processedFlowFiles.entrySet().forEach(transferFlowFiles);

        // Update signal if needed.
        try {
            if (waitCompleted) {
                protocol.complete(signalId);
            } else if (waitProgressed) {
                protocol.replace(signal);
            }

        } catch (final IOException e) {
            session.rollback();
            throw new ProcessException(
                    String.format("Unable to communicate with cache while updating %s due to %s", signalId, e), e);
        }

    }

    private FlowFile copySignalAttributes(final ProcessSession session, final FlowFile flowFile,
            final Signal signal, final boolean replaceOriginal) {
        if (signal == null) {
            return flowFile;
        }

        // copy over attributes from release signal FlowFile, if provided
        final Map<String, String> attributesToCopy;
        if (replaceOriginal) {
            attributesToCopy = new HashMap<>(signal.getAttributes());
            attributesToCopy.remove("uuid");
        } else {
            // if the current FlowFile does *not* have the cached attribute, copy it
            attributesToCopy = signal.getAttributes().entrySet().stream()
                    .filter(e -> flowFile.getAttribute(e.getKey()) == null)
                    .collect(Collectors.toMap(Entry::getKey, Entry::getValue));
        }

        // Copy counter attributes
        final Map<String, Long> counts = signal.getCounts();
        final long totalCount = counts.entrySet().stream().mapToLong(e -> {
            final Long count = e.getValue();
            attributesToCopy.put("wait.counter." + e.getKey(), String.valueOf(count));
            return count;
        }).sum();
        attributesToCopy.put("wait.counter.total", String.valueOf(totalCount));

        return session.putAllAttributes(flowFile, attributesToCopy);
    }

}