org.apache.nifi.processors.mongodb.RunMongoAggregation.java Source code

Introduction

Here is the source code for org.apache.nifi.processors.mongodb.RunMongoAggregation.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.nifi.processors.mongodb;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.BasicDBObject;
import com.mongodb.client.AggregateIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.JsonValidator;
import org.apache.nifi.processor.util.StandardValidators;
import org.bson.Document;
import org.bson.conversions.Bson;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

@Tags({ "mongo", "aggregation", "aggregate" })
@CapabilityDescription("A processor that runs an aggregation query whenever a flowfile is received.")
@InputRequirement(InputRequirement.Requirement.INPUT_ALLOWED)
@EventDriven
public class RunMongoAggregation extends AbstractMongoProcessor {

    private final static Set<Relationship> relationships;
    private final static List<PropertyDescriptor> propertyDescriptors;

    static final Relationship REL_ORIGINAL = new Relationship.Builder()
            .description("The input flowfile gets sent to this relationship when the query succeeds.")
            .name("original").build();
    static final Relationship REL_FAILURE = new Relationship.Builder()
            .description("The input flowfile gets sent to this relationship when the query fails.").name("failure")
            .build();
    static final Relationship REL_RESULTS = new Relationship.Builder()
            .description("The result set of the aggregation will be sent to this relationship.").name("results")
            .build();

    static final List<Bson> buildAggregationQuery(String query) throws IOException {
        List<Bson> result = new ArrayList<>();

        ObjectMapper mapper = new ObjectMapper();
        List<Map> values = mapper.readValue(query, List.class);
        for (Map<?, ?> val : values) {
            result.add(new BasicDBObject(val));
        }

        return result;
    }

    static final PropertyDescriptor QUERY = new PropertyDescriptor.Builder().name("mongo-agg-query")
            .displayName("Query").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
            .description("The aggregation query to be executed.").required(true)
            .addValidator(JsonValidator.INSTANCE).build();

    static final PropertyDescriptor ALLOW_DISK_USE = new PropertyDescriptor.Builder().name("allow-disk-use")
            .displayName("Allow Disk Use")
            .description("Set this to true to enable writing data to temporary files to prevent exceeding the "
                    + "maximum memory use limit during aggregation pipeline staged when handling large datasets.")
            .required(true).allowableValues("true", "false").defaultValue("false")
            .addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();

    static {
        List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
        _propertyDescriptors.addAll(descriptors);
        _propertyDescriptors.add(CHARSET);
        _propertyDescriptors.add(QUERY);
        _propertyDescriptors.add(ALLOW_DISK_USE);
        _propertyDescriptors.add(JSON_TYPE);
        _propertyDescriptors.add(QUERY_ATTRIBUTE);
        _propertyDescriptors.add(BATCH_SIZE);
        _propertyDescriptors.add(RESULTS_PER_FLOWFILE);
        _propertyDescriptors.add(DATE_FORMAT);
        _propertyDescriptors.add(SSL_CONTEXT_SERVICE);
        _propertyDescriptors.add(CLIENT_AUTH);
        propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);

        final Set<Relationship> _relationships = new HashSet<>();
        _relationships.add(REL_RESULTS);
        _relationships.add(REL_ORIGINAL);
        _relationships.add(REL_FAILURE);
        relationships = Collections.unmodifiableSet(_relationships);
    }

    @Override
    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @Override
    public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return propertyDescriptors;
    }

    private String buildBatch(List<Document> batch) {
        String retVal;
        try {
            retVal = objectMapper.writeValueAsString(batch.size() > 1 ? batch : batch.get(0));
        } catch (Exception e) {
            retVal = null;
        }

        return retVal;
    }

    @Override
    public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
        FlowFile flowFile = null;
        if (context.hasIncomingConnection()) {
            flowFile = session.get();

            if (flowFile == null && context.hasNonLoopConnection()) {
                return;
            }
        }

        final String query = context.getProperty(QUERY).evaluateAttributeExpressions(flowFile).getValue();
        final Boolean allowDiskUse = context.getProperty(ALLOW_DISK_USE).asBoolean();
        final String queryAttr = context.getProperty(QUERY_ATTRIBUTE).evaluateAttributeExpressions(flowFile)
                .getValue();
        final Integer batchSize = context.getProperty(BATCH_SIZE).asInteger();
        final Integer resultsPerFlowfile = context.getProperty(RESULTS_PER_FLOWFILE).asInteger();
        final String jsonTypeSetting = context.getProperty(JSON_TYPE).getValue();
        final String dateFormat = context.getProperty(DATE_FORMAT).evaluateAttributeExpressions(flowFile)
                .getValue();

        configureMapper(jsonTypeSetting, dateFormat);

        Map<String, String> attrs = new HashMap<>();
        if (queryAttr != null && queryAttr.trim().length() > 0) {
            attrs.put(queryAttr, query);
        }

        MongoCursor<Document> iter = null;

        try {
            MongoCollection<Document> collection = getCollection(context, flowFile);
            List<Bson> aggQuery = buildAggregationQuery(query);
            AggregateIterable<Document> it = collection.aggregate(aggQuery).allowDiskUse(allowDiskUse);
            ;
            it.batchSize(batchSize != null ? batchSize : 1);

            iter = it.iterator();
            List<Document> batch = new ArrayList<>();

            while (iter.hasNext()) {
                batch.add(iter.next());
                if (batch.size() == resultsPerFlowfile) {
                    writeBatch(buildBatch(batch), flowFile, context, session, attrs, REL_RESULTS);
                    batch = new ArrayList<>();
                }
            }

            if (batch.size() > 0) {
                writeBatch(buildBatch(batch), flowFile, context, session, attrs, REL_RESULTS);
            }

            if (flowFile != null) {
                session.transfer(flowFile, REL_ORIGINAL);
            }
        } catch (Exception e) {
            getLogger().error("Error running MongoDB aggregation query.", e);
            if (flowFile != null) {
                session.transfer(flowFile, REL_FAILURE);
            }
        } finally {
            if (iter != null) {
                iter.close();
            }
        }
    }
}