co.cask.cdap.internal.app.verification.FlowVerification.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.verification.FlowVerification.java

Source

/*
 * Copyright  2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.verification;

import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.flow.FlowSpecification;
import co.cask.cdap.api.flow.FlowletConnection;
import co.cask.cdap.api.flow.FlowletDefinition;
import co.cask.cdap.app.queue.QueueSpecification;
import co.cask.cdap.app.queue.QueueSpecificationGenerator;
import co.cask.cdap.app.verification.VerifyResult;
import co.cask.cdap.error.Err;
import co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator;
import co.cask.cdap.proto.Id;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;

import java.util.Collection;
import java.util.Map;
import java.util.Set;

/**
* This verifies a give {@link co.cask.cdap.api.flow.Flow}.
* <p/>
* <p>
* Following are the checks that are done for a {@link co.cask.cdap.api.flow.Flow}
* <ul>
* <li>Verify Flow Meta Information - Name is id</li>
* <li>There should be atleast one or two flowlets</li>
* <li>Verify information for each Flowlet</li>
* <li>There should be atleast one connection</li>
* <li>Verify schema's across connections on flowlet</li>
* </ul>
* <p/>
* </p>
*/
public class FlowVerification extends ProgramVerification<FlowSpecification> {

    /**
     * Verifies a single {@link FlowSpecification} for a {@link co.cask.cdap.api.flow.Flow}.
     *
     * @param input to be verified
     * @return An instance of {@link VerifyResult} depending of status of verification.
     */
    @Override
    public VerifyResult verify(Id.Application appId, final FlowSpecification input) {
        VerifyResult verifyResult = super.verify(appId, input);
        if (!verifyResult.isSuccess()) {
            return verifyResult;
        }

        String flowName = input.getName();

        // Check if there are no flowlets.
        if (input.getFlowlets().isEmpty()) {
            return VerifyResult.failure(Err.Flow.ATLEAST_ONE_FLOWLET, flowName);
        }

        // Check if there no connections.
        if (input.getConnections().isEmpty()) {
            return VerifyResult.failure(Err.Flow.ATLEAST_ONE_CONNECTION, flowName);
        }

        // We go through each Flowlet and verify the flowlets.

        // First collect all source flowlet names
        Set<String> sourceFlowletNames = Sets.newHashSet();
        for (FlowletConnection connection : input.getConnections()) {
            if (connection.getSourceType() == FlowletConnection.Type.FLOWLET) {
                sourceFlowletNames.add(connection.getSourceName());
            }
        }

        for (Map.Entry<String, FlowletDefinition> entry : input.getFlowlets().entrySet()) {
            FlowletDefinition defn = entry.getValue();
            String flowletName = defn.getFlowletSpec().getName();

            // Check if the Flowlet Name is an ID.
            if (!isId(defn.getFlowletSpec().getName())) {
                return VerifyResult.failure(Err.NOT_AN_ID, flowName + ":" + flowletName);
            }

            // We check if all the dataset names used are ids
            for (String dataSet : defn.getDatasets()) {
                if (!isId(dataSet)) {
                    return VerifyResult.failure(Err.NOT_AN_ID, flowName + ":" + flowletName + ":" + dataSet);
                }
            }

            // Check if the flowlet has output, it must be appear as source flowlet in at least one connection
            if (entry.getValue().getOutputs().size() > 0 && !sourceFlowletNames.contains(flowletName)) {
                return VerifyResult.failure(Err.Flow.OUTPUT_NOT_CONNECTED, flowName, flowletName);
            }
        }

        // NOTE: We should unify the logic here and the queue spec generation, as they are doing the same thing.
        Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> queueSpecTable = new SimpleQueueSpecificationGenerator(
                appId).create(input);

        // For all connections, there should be an entry in the table.
        for (FlowletConnection connection : input.getConnections()) {
            QueueSpecificationGenerator.Node node = new QueueSpecificationGenerator.Node(connection.getSourceType(),
                    connection.getSourceName());
            if (!queueSpecTable.contains(node, connection.getTargetName())) {
                return VerifyResult.failure(Err.Flow.NO_INPUT_FOR_OUTPUT, flowName, connection.getTargetName(),
                        connection.getSourceType(), connection.getSourceName());
            }
        }

        // For each output entity, check for any unconnected output
        for (QueueSpecificationGenerator.Node node : queueSpecTable.rowKeySet()) {
            // For stream output, no need to check
            if (node.getType() == FlowletConnection.Type.STREAM) {
                continue;
            }

            // For all outputs of a flowlet, remove all the matched connected schema, if there is anything left,
            // then it's a incomplete flow connection (has output not connect to any input).
            Multimap<String, Schema> outputs = toMultimap(input.getFlowlets().get(node.getName()).getOutputs());
            for (Map.Entry<String, Set<QueueSpecification>> entry : queueSpecTable.row(node).entrySet()) {
                for (QueueSpecification queueSpec : entry.getValue()) {
                    outputs.remove(queueSpec.getQueueName().getSimpleName(), queueSpec.getOutputSchema());
                }
            }

            if (!outputs.isEmpty()) {
                return VerifyResult.failure(Err.Flow.MORE_OUTPUT_NOT_ALLOWED, flowName,
                        node.getType().toString().toLowerCase(), node.getName(), outputs);
            }
        }

        return VerifyResult.success();
    }

    @Override
    protected String getName(FlowSpecification input) {
        return input.getName();
    }

    private <K, V> Multimap<K, V> toMultimap(Map<K, ? extends Collection<V>> map) {
        Multimap<K, V> result = HashMultimap.create();

        for (Map.Entry<K, ? extends Collection<V>> entry : map.entrySet()) {
            result.putAll(entry.getKey(), entry.getValue());
        }
        return result;
    }
}