org.apache.nifi.processors.standard.ExecuteProcess.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nifi.processors.standard.ExecuteProcess.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nifi.processors.standard;

import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.Restricted;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.annotation.lifecycle.OnUnscheduled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.Validator;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.standard.util.ArgumentUtils;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

@InputRequirement(Requirement.INPUT_FORBIDDEN)
@Tags({ "command", "process", "source", "external", "invoke", "script", "restricted" })
@CapabilityDescription("Runs an operating system command specified by the user and writes the output of that command to a FlowFile. If the command is expected "
        + "to be long-running, the Processor can output the partial data on a specified interval. When this option is used, the output is expected to be in textual "
        + "format, as it typically does not make sense to split binary data on arbitrary time-based intervals.")
@DynamicProperty(name = "An environment variable name", value = "An environment variable value", description = "These environment variables are passed to the process spawned by this Processor")
@Restricted("Provides operator the ability to execute arbitrary code assuming all permissions that NiFi has.")
@WritesAttributes({ @WritesAttribute(attribute = "command", description = "Executed command"),
        @WritesAttribute(attribute = "command.arguments", description = "Arguments of the command") })
public class ExecuteProcess extends AbstractProcessor {

    final static String ATTRIBUTE_COMMAND = "command";
    final static String ATTRIBUTE_COMMAND_ARGS = "command.arguments";

    public static final PropertyDescriptor COMMAND = new PropertyDescriptor.Builder().name("Command").description(
            "Specifies the command to be executed; if just the name of an executable is provided, it must be in the user's environment PATH.")
            .required(true).expressionLanguageSupported(false).addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .build();

    public static final PropertyDescriptor COMMAND_ARGUMENTS = new PropertyDescriptor.Builder()
            .name("Command Arguments")
            .description(
                    "The arguments to supply to the executable delimited by white space. White space can be escaped by enclosing it in double-quotes.")
            .required(false).expressionLanguageSupported(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .build();

    public static final PropertyDescriptor WORKING_DIR = new PropertyDescriptor.Builder().name("Working Directory")
            .description("The directory to use as the current working directory when executing the command")
            .expressionLanguageSupported(false)
            .addValidator(StandardValidators.createDirectoryExistsValidator(false, true)).required(false).build();

    public static final PropertyDescriptor BATCH_DURATION = new PropertyDescriptor.Builder().name("Batch Duration")
            .description(
                    "If the process is expected to be long-running and produce textual output, a batch duration can be specified so "
                            + "that the output will be captured for this amount of time and a FlowFile will then be sent out with the results "
                            + "and a new FlowFile will be started, rather than waiting for the process to finish before sending out the results")
            .required(false).expressionLanguageSupported(false)
            .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR).build();

    public static final PropertyDescriptor REDIRECT_ERROR_STREAM = new PropertyDescriptor.Builder()
            .name("Redirect Error Stream")
            .description("If true will redirect any error stream output of the process to the output stream. "
                    + "This is particularly helpful for processes which write extensively to the error stream or for troubleshooting.")
            .required(false).allowableValues("true", "false").defaultValue("false")
            .expressionLanguageSupported(false).addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();

    private static final Validator characterValidator = new StandardValidators.StringLengthValidator(1, 1);

    static final PropertyDescriptor ARG_DELIMITER = new PropertyDescriptor.Builder().name("Argument Delimiter")
            .description(
                    "Delimiter to use to separate arguments for a command [default: space]. Must be a single character.")
            .addValidator(Validator.VALID).addValidator(characterValidator).required(true).defaultValue(" ")
            .build();

    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
            .description("All created FlowFiles are routed to this relationship").build();

    private volatile Process externalProcess;

    private volatile ExecutorService executor;
    private Future<?> longRunningProcess;
    private AtomicBoolean failure = new AtomicBoolean(false);
    private volatile ProxyOutputStream proxyOut;

    @Override
    public Set<Relationship> getRelationships() {
        return Collections.singleton(REL_SUCCESS);
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        final List<PropertyDescriptor> properties = new ArrayList<>();
        properties.add(COMMAND);
        properties.add(COMMAND_ARGUMENTS);
        properties.add(BATCH_DURATION);
        properties.add(REDIRECT_ERROR_STREAM);
        properties.add(ARG_DELIMITER);
        return properties;
    }

    @Override
    protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
        return new PropertyDescriptor.Builder().name(propertyDescriptorName)
                .description("Sets the environment variable '" + propertyDescriptorName
                        + "' for the process' environment")
                .dynamic(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    }

    @OnScheduled
    public void setupExecutor(final ProcessContext context) {
        executor = Executors.newFixedThreadPool(context.getMaxConcurrentTasks() * 2, new ThreadFactory() {
            private final ThreadFactory defaultFactory = Executors.defaultThreadFactory();

            @Override
            public Thread newThread(final Runnable r) {
                final Thread t = defaultFactory.newThread(r);
                t.setName("ExecuteProcess " + getIdentifier() + " Task");
                return t;
            }
        });
    }

    @OnUnscheduled
    public void shutdownExecutor() {
        try {
            executor.shutdown();
        } finally {
            if (this.externalProcess.isAlive()) {
                this.getLogger().info("Process hasn't terminated, forcing the interrupt");
                this.externalProcess.destroyForcibly();
            }
        }
    }

    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
        if (proxyOut == null) {
            proxyOut = new ProxyOutputStream(getLogger());
        }

        final Long batchNanos = context.getProperty(BATCH_DURATION).asTimePeriod(TimeUnit.NANOSECONDS);

        final String command = context.getProperty(COMMAND).getValue();
        final String arguments = context.getProperty(COMMAND_ARGUMENTS).isSet()
                ? context.getProperty(COMMAND_ARGUMENTS).evaluateAttributeExpressions().getValue()
                : null;

        final List<String> commandStrings = createCommandStrings(context, command, arguments);
        final String commandString = StringUtils.join(commandStrings, " ");

        if (longRunningProcess == null || longRunningProcess.isDone()) {
            try {
                longRunningProcess = launchProcess(context, commandStrings, batchNanos, proxyOut);
            } catch (final IOException ioe) {
                getLogger().error("Failed to create process due to {}", new Object[] { ioe });
                context.yield();
                return;
            }
        } else {
            getLogger().info("Read from long running process");
        }

        if (!isScheduled()) {
            getLogger().info("User stopped processor; will terminate process immediately");
            longRunningProcess.cancel(true);
            return;
        }

        // Create a FlowFile that we can write to and set the OutputStream for the FlowFile
        // as the delegate for the ProxyOuptutStream, then wait until the process finishes
        // or until the specified amount of time
        FlowFile flowFile = session.create();
        flowFile = session.write(flowFile, new OutputStreamCallback() {
            @Override
            public void process(final OutputStream flowFileOut) throws IOException {
                try (final OutputStream out = new BufferedOutputStream(flowFileOut)) {
                    proxyOut.setDelegate(out);

                    if (batchNanos == null) {
                        // we are not creating batches; wait until process terminates.
                        // NB!!! Maybe get(long timeout, TimeUnit unit) should
                        // be used to avoid waiting forever.
                        try {
                            longRunningProcess.get();
                        } catch (final InterruptedException ie) {
                        } catch (final ExecutionException ee) {
                            getLogger().error("Process execution failed due to {}", new Object[] { ee.getCause() });
                        }
                    } else {
                        // wait the allotted amount of time.
                        try {
                            TimeUnit.NANOSECONDS.sleep(batchNanos);
                        } catch (final InterruptedException ie) {
                        }
                    }

                    proxyOut.setDelegate(null); // prevent from writing to this
                    // stream
                }
            }
        });

        if (flowFile.getSize() == 0L) {
            // If no data was written to the file, remove it
            session.remove(flowFile);
        } else if (failure.get()) {
            // If there was a failure processing the output of the Process, remove the FlowFile
            session.remove(flowFile);
            getLogger().error("Failed to read data from Process, so will not generate FlowFile");
        } else {
            // add command and arguments as attribute
            flowFile = session.putAttribute(flowFile, ATTRIBUTE_COMMAND, command);
            if (arguments != null) {
                flowFile = session.putAttribute(flowFile, ATTRIBUTE_COMMAND_ARGS, arguments);
            }

            // All was good. Generate event and transfer FlowFile.
            session.getProvenanceReporter().create(flowFile, "Created from command: " + commandString);
            getLogger().info("Created {} and routed to success", new Object[] { flowFile });
            session.transfer(flowFile, REL_SUCCESS);
        }

        // Commit the session so that the FlowFile is transferred to the next processor
        session.commit();
    }

    protected List<String> createCommandStrings(final ProcessContext context, final String command,
            final String arguments) {
        final List<String> args = ArgumentUtils.splitArgs(arguments,
                context.getProperty(ARG_DELIMITER).getValue().charAt(0));
        final List<String> commandStrings = new ArrayList<>(args.size() + 1);
        commandStrings.add(command);
        commandStrings.addAll(args);
        return commandStrings;
    }

    protected Future<?> launchProcess(final ProcessContext context, final List<String> commandStrings,
            final Long batchNanos, final ProxyOutputStream proxyOut) throws IOException {

        final Boolean redirectErrorStream = context.getProperty(REDIRECT_ERROR_STREAM).asBoolean();

        final ProcessBuilder builder = new ProcessBuilder(commandStrings);
        final String workingDirName = context.getProperty(WORKING_DIR).getValue();
        if (workingDirName != null) {
            builder.directory(new File(workingDirName));
        }

        final Map<String, String> environment = new HashMap<>();
        for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
            if (entry.getKey().isDynamic()) {
                environment.put(entry.getKey().getName(), entry.getValue());
            }
        }

        if (!environment.isEmpty()) {
            builder.environment().putAll(environment);
        }

        getLogger().info("Start creating new Process > {} ", new Object[] { commandStrings });
        this.externalProcess = builder.redirectErrorStream(redirectErrorStream).start();

        // Submit task to read error stream from process
        if (!redirectErrorStream) {
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try (final BufferedReader reader = new BufferedReader(
                            new InputStreamReader(externalProcess.getErrorStream()))) {
                        reader.lines().filter(line -> line != null && line.length() > 0).forEach(getLogger()::warn);
                    } catch (final IOException ioe) {
                    }
                }
            });
        }

        // Submit task to read output of Process and write to FlowFile.
        failure = new AtomicBoolean(false);
        final Future<?> future = executor.submit(new Callable<Object>() {
            @Override
            public Object call() throws IOException {
                try {
                    if (batchNanos == null) {
                        // if we aren't batching, just copy the stream from the
                        // process to the flowfile.
                        try (final BufferedInputStream bufferedIn = new BufferedInputStream(
                                externalProcess.getInputStream())) {
                            final byte[] buffer = new byte[4096];
                            int len;
                            while ((len = bufferedIn.read(buffer)) > 0) {

                                // NB!!!! Maybe all data should be read from
                                // input stream in case of !isScheduled() to
                                // avoid subprocess deadlock?
                                // (we just don't write data to proxyOut)
                                // Or because we don't use this subprocess
                                // anymore anyway, we don't care?
                                if (!isScheduled()) {
                                    return null;
                                }

                                proxyOut.write(buffer, 0, len);
                            }
                        }
                    } else {
                        // we are batching, which means that the output of the
                        // process is text. It doesn't make sense to grab
                        // arbitrary batches of bytes from some process and send
                        // it along as a piece of data, so we assume that
                        // setting a batch during means text.
                        // Also, we don't want that text to get split up in the
                        // middle of a line, so we use BufferedReader
                        // to read lines of text and write them as lines of text.
                        try (final BufferedReader reader = new BufferedReader(
                                new InputStreamReader(externalProcess.getInputStream()))) {
                            String line;

                            while ((line = reader.readLine()) != null) {
                                if (!isScheduled()) {
                                    return null;
                                }

                                proxyOut.write((line + "\n").getBytes(StandardCharsets.UTF_8));
                            }
                        }
                    }
                } catch (final IOException ioe) {
                    failure.set(true);
                    throw ioe;
                } finally {
                    try {
                        // Since we are going to exit anyway, one sec gives it an extra chance to exit gracefully.
                        // In the future consider exposing it via configuration.
                        boolean terminated = externalProcess.waitFor(1000, TimeUnit.MILLISECONDS);
                        int exitCode = terminated ? externalProcess.exitValue() : -9999;
                        getLogger().info("Process finished with exit code {} ", new Object[] { exitCode });
                    } catch (InterruptedException e1) {
                        Thread.currentThread().interrupt();
                    }
                }

                return null;
            }
        });

        return future;
    }

    /**
     * Output stream that is used to wrap another output stream in a way that the underlying output stream can be swapped out for a different one when needed
     */
    private static class ProxyOutputStream extends OutputStream {

        private final ComponentLog logger;

        private final Lock lock = new ReentrantLock();
        private OutputStream delegate;

        public ProxyOutputStream(final ComponentLog logger) {
            this.logger = logger;
        }

        public void setDelegate(final OutputStream delegate) {
            lock.lock();
            try {
                logger.trace("Switching delegate from {} to {}", new Object[] { this.delegate, delegate });
                this.delegate = delegate;
            } finally {
                lock.unlock();
            }
        }

        private void sleep(final long millis) {
            try {
                Thread.sleep(millis);
            } catch (final InterruptedException ie) {
                Thread.currentThread().interrupt();
            }
        }

        @Override
        public void write(final int b) throws IOException {
            lock.lock();
            try {
                while (true) {
                    if (delegate != null) {
                        logger.trace("Writing to {}", new Object[] { delegate });

                        delegate.write(b);
                        return;
                    } else {
                        lock.unlock();
                        sleep(1L);
                        lock.lock();
                    }
                }
            } finally {
                lock.unlock();
            }
        }

        @Override
        public void write(final byte[] b, final int off, final int len) throws IOException {
            lock.lock();
            try {
                while (true) {
                    if (delegate != null) {
                        logger.trace("Writing to {}", new Object[] { delegate });
                        delegate.write(b, off, len);
                        return;
                    } else {
                        lock.unlock();
                        sleep(1L);
                        lock.lock();
                    }
                }
            } finally {
                lock.unlock();
            }
        }

        @Override
        public void write(final byte[] b) throws IOException {
            write(b, 0, b.length);
        }

        @Override
        public void close() throws IOException {
        }

        @Override
        public void flush() throws IOException {
            lock.lock();
            try {
                while (true) {
                    if (delegate != null) {
                        delegate.flush();
                        return;
                    } else {
                        lock.unlock();
                        sleep(1L);
                        lock.lock();
                    }
                }
            } finally {
                lock.unlock();
            }
        }
    }
}