org.apache.nifi.processors.hive.PutHiveQL.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nifi.processors.hive.PutHiveQL.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nifi.processors.hive;

import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.ReadsAttribute;
import org.apache.nifi.annotation.behavior.ReadsAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.dbcp.hive.HiveDBCPService;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;

import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.SQLNonTransientException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

@SeeAlso(SelectHiveQL.class)
@InputRequirement(Requirement.INPUT_REQUIRED)
@Tags({ "sql", "hive", "put", "database", "update", "insert" })
@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command "
        + "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes "
        + "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be "
        + "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.")
@ReadsAttributes({
        @ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer "
                + "that represents the JDBC Type of the parameter."),
        @ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as "
                + "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.") })
public class PutHiveQL extends AbstractHiveQLProcessor {

    public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder().name("hive-batch-size")
            .displayName("Batch Size")
            .description("The preferred number of FlowFiles to put to the database in a single transaction")
            .required(true).addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR).defaultValue("100").build();

    public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder()
            .name("statement-delimiter").displayName("Statement Delimiter")
            .description("Statement Delimiter used to separate SQL statements in a multiple statement script")
            .required(true).defaultValue(";").addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .expressionLanguageSupported(false).build();

    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
            .description("A FlowFile is routed to this relationship after the database is successfully updated")
            .build();
    public static final Relationship REL_RETRY = new Relationship.Builder().name("retry").description(
            "A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed")
            .build();
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description(
            "A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, "
                    + "such as an invalid query or an integrity constraint violation")
            .build();

    private final static List<PropertyDescriptor> propertyDescriptors;
    private final static Set<Relationship> relationships;

    /*
     * Will ensure that the list of property descriptors is built only once.
     * Will also create a Set of relationships
     */
    static {
        List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
        _propertyDescriptors.add(HIVE_DBCP_SERVICE);
        _propertyDescriptors.add(BATCH_SIZE);
        _propertyDescriptors.add(CHARSET);
        _propertyDescriptors.add(STATEMENT_DELIMITER);
        propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);

        Set<Relationship> _relationships = new HashSet<>();
        _relationships.add(REL_SUCCESS);
        _relationships.add(REL_FAILURE);
        _relationships.add(REL_RETRY);
        relationships = Collections.unmodifiableSet(_relationships);
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return propertyDescriptors;
    }

    @Override
    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
        final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
        final List<FlowFile> flowFiles = session.get(batchSize);

        if (flowFiles.isEmpty()) {
            return;
        }

        final long startNanos = System.nanoTime();
        final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
        final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE)
                .asControllerService(HiveDBCPService.class);
        final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue();

        try (final Connection conn = dbcpService.getConnection()) {

            for (FlowFile flowFile : flowFiles) {
                try {
                    final String script = getHiveQL(session, flowFile, charset);
                    String regex = "(?<!\\\\)" + Pattern.quote(statementDelimiter);

                    String[] hiveQLs = script.split(regex);

                    int loc = 1;
                    for (String hiveQL : hiveQLs) {
                        getLogger().debug("HiveQL: {}", new Object[] { hiveQL });

                        if (!StringUtils.isEmpty(hiveQL.trim())) {
                            final PreparedStatement stmt = conn.prepareStatement(hiveQL.trim());

                            // Get ParameterMetadata
                            // Hive JDBC Doesn't support this yet:
                            // ParameterMetaData pmd = stmt.getParameterMetaData();
                            // int paramCount = pmd.getParameterCount();

                            int paramCount = StringUtils.countMatches(hiveQL, "?");

                            if (paramCount > 0) {
                                loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes());
                            }

                            // Execute the statement
                            stmt.execute();
                        }
                    }
                    // Emit a Provenance SEND event
                    final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);

                    session.getProvenanceReporter().send(flowFile, dbcpService.getConnectionURL(),
                            transmissionMillis, true);
                    session.transfer(flowFile, REL_SUCCESS);

                } catch (final SQLException e) {

                    if (e instanceof SQLNonTransientException) {
                        getLogger().error("Failed to update Hive for {} due to {}; routing to failure",
                                new Object[] { flowFile, e });
                        session.transfer(flowFile, REL_FAILURE);
                    } else {
                        getLogger().error(
                                "Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry",
                                new Object[] { flowFile, e });
                        flowFile = session.penalize(flowFile);
                        session.transfer(flowFile, REL_RETRY);
                    }

                }
            }
        } catch (final SQLException sqle) {
            // There was a problem getting the connection, yield and retry the flowfiles
            getLogger().error(
                    "Failed to get Hive connection due to {}; it is possible that retrying the operation will succeed, so routing to retry",
                    new Object[] { sqle });
            session.transfer(flowFiles, REL_RETRY);
            context.yield();
        }
    }
}