com.clickha.nifi.processors.FetchFileTransferV2.java Source code

Java tutorial

Introduction

Here is the source code for com.clickha.nifi.processors.FetchFileTransferV2.java

Source

package com.clickha.nifi.processors;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;

import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.util.StopWatch;
import org.apache.nifi.util.Tuple;

import com.clickha.nifi.processors.util.FileTransferV2;
import com.clickha.nifi.processors.util.PermissionDeniedException;

/**
 * A base class for FetchSFTP, FetchFTP processors.
 *
 * Note that implementations of this class should never use the @SupportsBatching annotation! Doing so
 * could result in data loss!
 */
public abstract class FetchFileTransferV2 extends AbstractProcessor {

    static final AllowableValue COMPLETION_NONE = new AllowableValue("None", "None", "Leave the file as-is");
    static final AllowableValue COMPLETION_MOVE = new AllowableValue("Move File", "Move File",
            "Move the file to the directory specified by the <Move Destination Directory> property");
    static final AllowableValue COMPLETION_DELETE = new AllowableValue("Delete File", "Delete File",
            "Deletes the original file from the remote system");

    static final PropertyDescriptor HOSTNAME = new PropertyDescriptor.Builder().name("Hostname")
            .description("The fully-qualified hostname or IP address of the host to fetch the data from")
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true).required(true)
            .build();
    static final PropertyDescriptor UNDEFAULTED_PORT = new PropertyDescriptor.Builder().name("Port")
            .description("The port to connect to on the remote host to fetch the data from")
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true).required(true)
            .build();
    public static final PropertyDescriptor USERNAME = new PropertyDescriptor.Builder().name("Username")
            .description("Username").addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .expressionLanguageSupported(true).required(true).build();
    public static final PropertyDescriptor REMOTE_FILENAME = new PropertyDescriptor.Builder().name("Remote File")
            .description("The fully qualified filename on the remote system").required(true)
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).expressionLanguageSupported(true).build();
    static final PropertyDescriptor COMPLETION_STRATEGY = new PropertyDescriptor.Builder()
            .name("Completion Strategy")
            .description(
                    "Specifies what to do with the original file on the server once it has been pulled into NiFi. If the Completion Strategy fails, a warning will be "
                            + "logged but the data will still be transferred.")
            .expressionLanguageSupported(false).allowableValues(COMPLETION_NONE, COMPLETION_MOVE, COMPLETION_DELETE)
            .defaultValue(COMPLETION_NONE.getValue()).required(true).build();
    static final PropertyDescriptor MOVE_DESTINATION_DIR = new PropertyDescriptor.Builder()
            .name("Move Destination Directory")
            .description(
                    "The directory on the remote server to the move the original file to once it has been ingested into NiFi. "
                            + "This property is ignored unless the Completion Strategy is set to \"Move File\". The specified directory must already exist on"
                            + "the remote system, or the rename will fail.")
            .expressionLanguageSupported(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).required(false)
            .build();

    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
            .description("All FlowFiles that are received are routed to success").build();
    static final Relationship REL_COMMS_FAILURE = new Relationship.Builder().name("comms.failure").description(
            "Any FlowFile that could not be fetched from the remote server due to a communications failure will be transferred to this Relationship.")
            .build();
    static final Relationship REL_NOT_FOUND = new Relationship.Builder().name("not.found").description(
            "Any FlowFile for which we receive a 'Not Found' message from the remote server will be transferred to this Relationship.")
            .build();
    static final Relationship REL_PERMISSION_DENIED = new Relationship.Builder().name("permission.denied")
            .description(
                    "Any FlowFile that could not be fetched from the remote server due to insufficient permissions will be transferred to this Relationship.")
            .build();

    private final Map<Tuple<String, Integer>, BlockingQueue<FileTransferIdleWrapper>> fileTransferMap = new HashMap<>();
    private final long IDLE_CONNECTION_MILLIS = TimeUnit.SECONDS.toMillis(10L); // amount of time to wait before closing an idle connection
    private volatile long lastClearTime = System.currentTimeMillis();

    @Override
    public Set<Relationship> getRelationships() {
        final Set<Relationship> relationships = new HashSet<>();
        relationships.add(REL_SUCCESS);
        relationships.add(REL_NOT_FOUND);
        relationships.add(REL_PERMISSION_DENIED);
        relationships.add(REL_COMMS_FAILURE);
        return relationships;
    }

    /**
     * Close connections that are idle or optionally close all connections.
     * Connections are considered "idle" if they have not been used in 10 seconds.
     *
     * @param closeNonIdleConnections if <code>true</code> will close all connection; if <code>false</code> will close only idle connections
     */
    private void closeConnections(final boolean closeNonIdleConnections) {
        for (final Map.Entry<Tuple<String, Integer>, BlockingQueue<FileTransferIdleWrapper>> entry : fileTransferMap
                .entrySet()) {
            final BlockingQueue<FileTransferIdleWrapper> wrapperQueue = entry.getValue();

            final List<FileTransferIdleWrapper> putBack = new ArrayList<>();
            FileTransferIdleWrapper wrapper;
            while ((wrapper = wrapperQueue.poll()) != null) {
                final long lastUsed = wrapper.getLastUsed();
                final long nanosSinceLastUse = System.nanoTime() - lastUsed;
                if (!closeNonIdleConnections
                        && TimeUnit.NANOSECONDS.toMillis(nanosSinceLastUse) < IDLE_CONNECTION_MILLIS) {
                    putBack.add(wrapper);
                } else {
                    try {
                        wrapper.getFileTransfer().close();
                    } catch (final IOException ioe) {
                        getLogger().warn("Failed to close Idle Connection due to {}", new Object[] { ioe }, ioe);
                    }
                }
            }

            for (final FileTransferIdleWrapper toPutBack : putBack) {
                wrapperQueue.offer(toPutBack);
            }
        }
    }

    @OnStopped
    public void cleanup() {
        // close all connections
        closeConnections(true);
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        final List<PropertyDescriptor> properties = new ArrayList<>();
        properties.add(HOSTNAME);
        properties.add(UNDEFAULTED_PORT);
        properties.add(REMOTE_FILENAME);
        properties.add(COMPLETION_STRATEGY);
        properties.add(MOVE_DESTINATION_DIR);
        return properties;
    }

    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
        FlowFile flowFile = session.get();
        if (flowFile == null) {
            return;
        }

        final StopWatch stopWatch = new StopWatch(true);
        final String host = context.getProperty(HOSTNAME).evaluateAttributeExpressions(flowFile).getValue();
        final int port = context.getProperty(UNDEFAULTED_PORT).evaluateAttributeExpressions(flowFile).asInteger();
        final String filename = context.getProperty(REMOTE_FILENAME).evaluateAttributeExpressions(flowFile)
                .getValue();

        // Try to get a FileTransfer object from our cache.
        BlockingQueue<FileTransferIdleWrapper> transferQueue;
        synchronized (fileTransferMap) {
            final Tuple<String, Integer> tuple = new Tuple<>(host, port);

            transferQueue = fileTransferMap.get(tuple);
            if (transferQueue == null) {
                transferQueue = new LinkedBlockingQueue<>();
                fileTransferMap.put(tuple, transferQueue);
            }

            // periodically close idle connections
            if (System.currentTimeMillis() - lastClearTime > IDLE_CONNECTION_MILLIS) {
                closeConnections(false);
                lastClearTime = System.currentTimeMillis();
            }
        }

        // we have a queue of FileTransfer Objects. Get one from the queue or create a new one.
        FileTransferV2 transfer;
        FileTransferIdleWrapper transferWrapper = transferQueue.poll();
        if (transferWrapper == null) {
            transfer = createFileTransfer(context);
        } else {
            transfer = transferWrapper.getFileTransfer();
        }

        // Pull data from remote system.
        final InputStream in;
        try {
            in = transfer.getInputStream(filename, flowFile);

            flowFile = session.write(flowFile, new OutputStreamCallback() {
                @Override
                public void process(final OutputStream out) throws IOException {
                    StreamUtils.copy(in, out);
                    transfer.flush();
                }
            });
            transferQueue.offer(new FileTransferIdleWrapper(transfer, System.nanoTime()));
        } catch (final FileNotFoundException e) {
            getLogger().error(
                    "Failed to fetch content for {} from filename {} on remote host {} because the file could not be found on the remote system; routing to {}",
                    new Object[] { flowFile, filename, host, REL_NOT_FOUND.getName() });
            session.transfer(session.penalize(flowFile), REL_NOT_FOUND);
            session.getProvenanceReporter().route(flowFile, REL_NOT_FOUND);
            return;
        } catch (final PermissionDeniedException e) {
            getLogger().error(
                    "Failed to fetch content for {} from filename {} on remote host {} due to insufficient permissions; routing to {}",
                    new Object[] { flowFile, filename, host, REL_PERMISSION_DENIED.getName() });
            session.transfer(session.penalize(flowFile), REL_PERMISSION_DENIED);
            session.getProvenanceReporter().route(flowFile, REL_PERMISSION_DENIED);
            return;
        } catch (final ProcessException | IOException e) {
            try {
                transfer.close();
            } catch (final IOException e1) {
                getLogger().warn("Failed to close connection to {}:{} due to {}",
                        new Object[] { host, port, e.toString() }, e);
            }

            getLogger().error(
                    "Failed to fetch content for {} from filename {} on remote host {}:{} due to {}; routing to comms.failure",
                    new Object[] { flowFile, filename, host, port, e.toString() }, e);
            session.transfer(session.penalize(flowFile), REL_COMMS_FAILURE);
            return;
        }

        // Add FlowFile attributes
        final String protocolName = transfer.getProtocolName();
        final Map<String, String> attributes = new HashMap<>();
        attributes.put(protocolName + ".remote.host", host);
        attributes.put(protocolName + ".remote.port", String.valueOf(port));
        attributes.put(protocolName + ".remote.filename", filename);

        if (filename.contains("/")) {
            final String path = StringUtils.substringBeforeLast(filename, "/");
            final String filenameOnly = StringUtils.substringAfterLast(filename, "/");
            attributes.put(CoreAttributes.PATH.key(), path);
            attributes.put(CoreAttributes.FILENAME.key(), filenameOnly);
        } else {
            attributes.put(CoreAttributes.FILENAME.key(), filename);
        }
        flowFile = session.putAllAttributes(flowFile, attributes);

        // emit provenance event and transfer FlowFile
        session.getProvenanceReporter().fetch(flowFile, protocolName + "://" + host + ":" + port + "/" + filename,
                stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_SUCCESS);

        // it is critical that we commit the session before moving/deleting the remote file. Otherwise, we could have a situation where
        // we ingest the data, delete/move the remote file, and then NiFi dies/is shut down before the session is committed. This would
        // result in data loss! If we commit the session first, we are safe.
        session.commit();

        final String completionStrategy = context.getProperty(COMPLETION_STRATEGY).getValue();
        if (COMPLETION_DELETE.getValue().equalsIgnoreCase(completionStrategy)) {
            try {
                transfer.deleteFile(null, filename);
            } catch (final FileNotFoundException e) {
                // file doesn't exist -- effectively the same as removing it. Move on.
            } catch (final IOException ioe) {
                getLogger().warn(
                        "Successfully fetched the content for {} from {}:{}{} but failed to remove the remote file due to {}",
                        new Object[] { flowFile, host, port, filename, ioe }, ioe);
            }
        } else if (COMPLETION_MOVE.getValue().equalsIgnoreCase(completionStrategy)) {
            String targetDir = context.getProperty(MOVE_DESTINATION_DIR).evaluateAttributeExpressions(flowFile)
                    .getValue();
            if (!targetDir.endsWith("/")) {
                targetDir = targetDir + "/";
            }
            final String simpleFilename = StringUtils.substringAfterLast(filename, "/");
            final String target = targetDir + simpleFilename;

            try {
                transfer.rename(filename, target);
            } catch (final IOException ioe) {
                getLogger().warn(
                        "Successfully fetched the content for {} from {}:{}{} but failed to rename the remote file due to {}",
                        new Object[] { flowFile, host, port, filename, ioe }, ioe);
            }
        }
    }

    /**
     * Creates a new instance of a FileTransfer that can be used to pull files from a remote system.
     *
     * @param context the ProcessContext to use in order to obtain configured properties
     * @return a FileTransfer that can be used to pull files from a remote system
     */
    protected abstract FileTransferV2 createFileTransfer(ProcessContext context);

    /**
     * Wrapper around a FileTransfer object that is used to know when the FileTransfer was last used, so that
     * we have the ability to close connections that are "idle," or unused for some period of time.
     */
    private static class FileTransferIdleWrapper {
        private final FileTransferV2 fileTransfer;
        private final long lastUsed;

        public FileTransferIdleWrapper(final FileTransferV2 fileTransfer, final long lastUsed) {
            this.fileTransfer = fileTransfer;
            this.lastUsed = lastUsed;
        }

        public FileTransferV2 getFileTransfer() {
            return fileTransfer;
        }

        public long getLastUsed() {
            return this.lastUsed;
        }
    }
}