com.thinkbiganalytics.nifi.v2.init.InitializeFeed.java Source code

Java tutorial

Introduction

Here is the source code for com.thinkbiganalytics.nifi.v2.init.InitializeFeed.java

Source

/**
 *
 */
package com.thinkbiganalytics.nifi.v2.init;

/*-
 * #%L
 * thinkbig-nifi-core-processors
 * %%
 * Copyright (C) 2017 ThinkBig Analytics
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.thinkbiganalytics.metadata.rest.model.feed.InitializationStatus;
import com.thinkbiganalytics.metadata.rest.model.feed.InitializationStatus.State;
import com.thinkbiganalytics.nifi.v2.common.CommonProperties;
import com.thinkbiganalytics.nifi.v2.common.FeedProcessor;

import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.AttributeExpression;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.DataUnit;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.FlowFileFilters;
import org.apache.nifi.processor.util.StandardValidators;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

/**
 */
@EventDriven
@InputRequirement(InputRequirement.Requirement.INPUT_ALLOWED)
@Tags({ "feed", "initialize", "initialization", "thinkbig" })
@CapabilityDescription("Controls setup of a feed by routing to an initialization or re-initialization flow.")
public class InitializeFeed extends FeedProcessor {

    public static final String REINITIALIZING_FLAG = "reinitializing";

    protected static final AllowableValue[] FAIL_STRATEGY_VALUES = new AllowableValue[] {
            new AllowableValue("FAIL", "Fail", "Immediately fail the flow file"),
            new AllowableValue("RETRY", "Retry",
                    "Retry initialization (if the appropriate time delay has expired) and penalize the flow file.") };

    protected static final PropertyDescriptor FAILURE_STRATEGY = new PropertyDescriptor.Builder()
            .name("Initialization Failure Strategy")
            .description(
                    "Indicates how this processor should behave when a flow file arrives after feed initialization has failed.")
            .allowableValues(FAIL_STRATEGY_VALUES).defaultValue("RETRY").required(true).build();

    protected static final PropertyDescriptor RETRY_DELAY = new PropertyDescriptor.Builder()
            .name("Initialization Retry Delay (seconds)")
            .description(
                    "The minimum amount of seconds to delay before an arriving flow file should trigger another attempt to "
                            + "initialize a feed that has previously failed initialization.  Any flow file arriving before this "
                            + "delay has expired will be immediately failed.")
            .required(false).defaultValue("60").addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
            .expressionLanguageSupported(true).build();

    protected static final PropertyDescriptor MAX_INIT_ATTEMPTS = new PropertyDescriptor.Builder()
            .name("Max Initialization Attempts")
            .description(
                    "The maximum number of times initialization will be retried where there are failures.  There is no limit if unset.")
            .required(true).defaultValue("5").addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
            .expressionLanguageSupported(true).build();

    protected static final PropertyDescriptor CLONE_INIT_FLOWFILE = new PropertyDescriptor.Builder()
            .name("Clone initialization flowfile")
            .description(
                    "Indicates whether the feed initialization flow will use a flowfile that is a clone of the input flowfile, i.e. includes all content.")
            .required(false).allowableValues(CommonProperties.BOOLEANS).defaultValue("true")
            .expressionLanguageSupported(true).build();

    protected static final PropertyDescriptor MAX_FLOW_FILES_COUNT = new PropertyDescriptor.Builder()
            .name("Max Flow File Count").description("The maximum number of flow files to process at one time")
            .required(false).defaultValue("500").addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
            .expressionLanguageSupported(true).build();

    protected static final PropertyDescriptor MAX_FLOW_FILES_SIZE = new PropertyDescriptor.Builder()
            .name("Max Flow Files size")
            .description("The maximum accumulated flow file sizes (in kilobytes) to process at one time")
            .required(false).defaultValue("1000").addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
            .expressionLanguageSupported(true).build();

    Relationship REL_INITIALIZE = new Relationship.Builder().name("Initialize").description("Begin initialization")
            .build();

    Relationship REL_REINITIALIZE = new Relationship.Builder().name("Re-Initialize")
            .description("Begin re-initialization").autoTerminateDefault(true).build();

    private Map<String, AtomicInteger> retryCounts = Collections.synchronizedMap(new HashMap<>());

    @OnScheduled
    public void scheduled(ProcessContext context) {
        super.scheduled(context);
        this.retryCounts.clear();
    }

    @Override
    protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) {
        return new PropertyDescriptor.Builder().name(propertyDescriptorName).required(false)
                .addValidator(StandardValidators
                        .createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING, true))
                .addValidator(StandardValidators.ATTRIBUTE_KEY_PROPERTY_NAME_VALIDATOR)
                .expressionLanguageSupported(true).dynamic(true).build();
    }

    /* (non-Javadoc)
     * @see org.apache.nifi.processor.AbstractProcessor#onTrigger(org.apache.nifi.processor.ProcessContext, org.apache.nifi.processor.ProcessSession)
     */
    @Override
    public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
        int maxFlowFilesCount = context.getProperty(MAX_FLOW_FILES_COUNT).evaluateAttributeExpressions()
                .asInteger();
        int maxFlowFilesSize = context.getProperty(MAX_FLOW_FILES_SIZE).evaluateAttributeExpressions().asInteger();
        Map<String, List<FlowFile>> batches = getFlowFileBatches(context, session, maxFlowFilesCount,
                maxFlowFilesSize);

        for (Entry<String, List<FlowFile>> entry : batches.entrySet()) {
            processFlowFiles(context, session, entry.getKey(), entry.getValue());
        }
    }

    private Map<String, List<FlowFile>> getFlowFileBatches(ProcessContext context, ProcessSession session,
            int maxFlowFilesCount, int maxFlowFilesSize) {
        Map<String, List<FlowFile>> map = new HashMap<>();
        List<FlowFile> allFiles = session
                .get(FlowFileFilters.newSizeBasedFilter(maxFlowFilesSize, DataUnit.KB, maxFlowFilesCount));

        allFiles.forEach(ff -> {
            FlowFile inputFF = ensureFeedId(context, session, ff);
            List<FlowFile> batch = map.computeIfAbsent(getFeedId(context, inputFF), k -> new ArrayList<>());
            batch.add(inputFF);
        });

        return map;
    }

    private void processFlowFiles(ProcessContext context, ProcessSession session, String feedId,
            List<FlowFile> batch) {
        if (batch != null) {
            InitializationStatus status = getMetadataRecorder().getInitializationStatus(feedId)
                    .orElse(new InitializationStatus(State.PENDING));

            switch (status.getState()) {
            case PENDING:
                pending(context, session, feedId, batch);
                break;
            case IN_PROGRESS:
                inProgress(context, session, feedId, batch);
                break;
            case FAILED:
                failed(context, session, feedId, batch, status.getTimestamp(), false);
                break;
            case REINITIALIZE:
                reinitialize(context, session, feedId, batch);
                break;
            case REINITIALIZE_FAILED:
                reinitializeFailed(context, session, feedId, batch, status.getTimestamp());
                break;
            case SUCCESS:
                success(context, session, feedId, batch);
            }
        }
    }

    @Override
    protected void addProperties(List<PropertyDescriptor> list) {
        super.addProperties(list);
        list.add(FAILURE_STRATEGY);
        list.add(RETRY_DELAY);
        list.add(MAX_INIT_ATTEMPTS);
        list.add(CLONE_INIT_FLOWFILE);
        list.add(MAX_FLOW_FILES_COUNT);
        list.add(MAX_FLOW_FILES_SIZE);
    }

    @Override
    protected void addRelationships(Set<Relationship> set) {
        super.addRelationships(set);
        set.add(CommonProperties.REL_SUCCESS);
        set.add(CommonProperties.REL_FAILURE);
        set.add(REL_INITIALIZE);
        set.add(REL_REINITIALIZE);
    }

    private void pending(ProcessContext context, ProcessSession session, String feedId, List<FlowFile> batch) {
        beginInitialization(context, session, feedId, batch, false);
        requeueFlowFiles(session, batch);
    }

    private void inProgress(ProcessContext context, ProcessSession session, String feedId, List<FlowFile> batch) {
        requeueFlowFiles(session, batch);
    }

    private void failed(ProcessContext context, ProcessSession session, String feedId, List<FlowFile> batch,
            DateTime failTime, boolean reinitializing) {
        String strategy = context.getProperty(FAILURE_STRATEGY).getValue();
        FlowFile inputFF = batch.stream().findFirst().get(); // batch size will always be > 0

        if (strategy.equals("RETRY")) {
            int delay = context.getProperty(RETRY_DELAY).evaluateAttributeExpressions(inputFF).asInteger();
            int max = context.getProperty(MAX_INIT_ATTEMPTS).evaluateAttributeExpressions(inputFF).asInteger();
            AtomicInteger count = getRetryCount(context, inputFF);

            if (count.getAndIncrement() >= max) {
                count.set(max);
                session.transfer(inputFF, CommonProperties.REL_FAILURE);
            } else if (failTime.plusSeconds(delay).isBefore(DateTime.now(DateTimeZone.UTC))) {
                beginInitialization(context, session, feedId, batch, reinitializing);
                requeueFlowFiles(session, batch);
            } else {
                session.transfer(inputFF, CommonProperties.REL_FAILURE);
            }
        } else {
            session.transfer(inputFF, CommonProperties.REL_FAILURE);
        }
    }

    private void reinitialize(ProcessContext context, ProcessSession session, String feedId, List<FlowFile> batch) {
        beginInitialization(context, session, feedId, batch, true);
        requeueFlowFiles(session, batch);
    }

    private void reinitializeFailed(ProcessContext context, ProcessSession session, String feedId,
            List<FlowFile> batch, DateTime failTime) {
        failed(context, session, feedId, batch, failTime, true);
    }

    private void success(ProcessContext context, ProcessSession session, String feedId, List<FlowFile> batch) {
        session.transfer(batch, CommonProperties.REL_SUCCESS);
    }

    private void beginInitialization(ProcessContext context, ProcessSession session, String feedId,
            List<FlowFile> batch, boolean reinitializing) {
        getMetadataRecorder().startFeedInitialization(feedId);
        FlowFile inputFF = batch.stream().findFirst().get(); // batch size will always be > 0
        FlowFile initFF;
        Relationship initRelationship;

        if (context.getProperty(CLONE_INIT_FLOWFILE).evaluateAttributeExpressions(inputFF).asBoolean()) {
            initFF = session.clone(inputFF);
        } else {
            initFF = session.create(inputFF);
        }

        if (reinitializing) {
            boolean useReinit = context.hasConnection(REL_REINITIALIZE);
            initRelationship = useReinit ? REL_REINITIALIZE : REL_INITIALIZE;
        } else {
            initRelationship = REL_INITIALIZE;
        }

        initFF = session.putAttribute(initFF, REINITIALIZING_FLAG, Boolean.valueOf(reinitializing).toString());
        session.transfer(initFF, initRelationship);
    }

    private void requeueFlowFiles(ProcessSession session, List<FlowFile> batch) {
        List<FlowFile> penalizedBatch = batch.stream().map(inputFF -> session.penalize(inputFF))
                .collect(Collectors.toList());
        session.transfer(penalizedBatch);
    }

    private AtomicInteger getRetryCount(ProcessContext context, FlowFile inputFF) {
        return this.retryCounts.computeIfAbsent(getFeedId(context, inputFF), k -> new AtomicInteger(0));
    }

}