org.pentaho.di.trans.dataservice.streaming.execution.StreamingGeneratedTransExecution.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.trans.dataservice.streaming.execution.StreamingGeneratedTransExecution.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2018 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.dataservice.streaming.execution;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Throwables;
import io.reactivex.BackpressureOverflowStrategy;
import io.reactivex.BackpressureStrategy;
import io.reactivex.Observer;
import io.reactivex.functions.Consumer;
import io.reactivex.subjects.BehaviorSubject;
import io.reactivex.subjects.PublishSubject;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.trans.RowProducer;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.dataservice.client.api.IDataServiceClientService;
import org.pentaho.di.trans.dataservice.utils.DataServiceConstants;
import org.pentaho.di.trans.step.RowAdapter;
import org.pentaho.di.trans.step.StepInterface;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Class to represent the Streaming execution of boundary Generated Transformation (SQL query over dataservices)
 */
public class StreamingGeneratedTransExecution implements Runnable {

    private static final Log logger = LogFactory.getLog(StreamingGeneratedTransExecution.class);

    private final StreamingServiceTransExecutor serviceExecutor;
    private final Trans genTrans;
    private final String injectorStepName;
    private final String resultStepName;
    private final BehaviorSubject<List<RowMetaAndData>> genTransCachePublishSubject = BehaviorSubject
            .createDefault(new ArrayList<>());
    private final String query;
    private final AtomicBoolean isRunning = new AtomicBoolean(false);
    private StreamExecutionListener stream;
    private final AtomicInteger consumersCount = new AtomicInteger(0);

    private final PublishSubject<List<RowMetaAndData>> generatedDataObservable;

    private IDataServiceClientService.StreamingMode windowMode;
    private long windowSize;
    private long windowEvery;
    private long windowLimit;
    private String streamingGeneratedTransCacheKey;

    /**
     * Constructor.
     *
     * @param serviceExecutor The {@link StreamingServiceTransExecutor} service transformation executor object.
     * @param genTrans The {@link org.pentaho.di.trans.Trans} generated transformation.
     * @param rowConsumer The consumer to be registered in the publish subject stream
     * @param injectorStepName The name of the step in the generated transformation where rows are injected.
     * @param resultStepName The name of the step in the generated transformation where the results are retreived.
     * @param query The query to be executed.
     * @param windowMode The streaming window mode.
     * @param windowSize The query window size. Number of rows for boundary ROW_BASED streamingType and milliseconds for boundary
     *                 TIME_BASED streamingType.
     * @param windowEvery The query window rate. Number of rows for boundary ROW_BASED streamingType and milliseconds for boundary
     *                 TIME_BASED streamingType.
     * @param windowLimit The query max window size. Number of rows for boundary TIME_BASED streamingType and milliseconds for boundary
     *                 ROW_BASED streamingType.
     * @param streamingGeneratedTransCacheKey The generated transformation cache key, so that it could be easily accessed.
     */
    public StreamingGeneratedTransExecution(final StreamingServiceTransExecutor serviceExecutor,
            final Trans genTrans, final Observer<List<RowMetaAndData>> rowConsumer, boolean pollingMode,
            final String injectorStepName, final String resultStepName, final String query,
            final IDataServiceClientService.StreamingMode windowMode, long windowSize, long windowEvery,
            long windowLimit, String streamingGeneratedTransCacheKey) {
        this.serviceExecutor = serviceExecutor;
        this.genTrans = genTrans;
        this.injectorStepName = injectorStepName;
        this.resultStepName = resultStepName;
        this.query = query;
        this.windowMode = windowMode;
        this.windowSize = windowSize;
        this.windowEvery = windowEvery;
        this.windowLimit = windowLimit;
        this.streamingGeneratedTransCacheKey = streamingGeneratedTransCacheKey;

        this.generatedDataObservable = PublishSubject.create();
        this.addNewRowConsumer(rowConsumer, pollingMode);
    }

    /**
     * Gets the window consumer, that should be used to consume the data produced in the service executor.
     * @return A {@link Consumer} that accepts a list of {@link RowMetaAndData} produced by the service executor.
     */
    @VisibleForTesting
    protected Consumer<List<RowMetaAndData>> getWindowConsumer() {
        return rowMetaAndDataList -> getGeneratedDataObservable().onNext(rowMetaAndDataList);
    }

    /**
     * Spans boundary thread to execute the transformation.
     */
    @Override
    public void run() {
        //we stop back pressure by only processing the last data
        getGeneratedDataObservable().toFlowable(BackpressureStrategy.LATEST).onBackpressureBuffer(1, () -> {
        }, BackpressureOverflowStrategy.DROP_OLDEST)
                .doOnError(t -> logger.error("Error receiving data from the service transformation observable", t))
                .doOnNext(rowMetaAndDataList -> {
                    if (consumersCount.get() > 0) {
                        serviceExecutor.touchServiceListener(this.streamingGeneratedTransCacheKey);
                    }
                }).subscribe(rowMetaAndDataList -> this.runGenTrans(rowMetaAndDataList));

        // This is where we will inject the rows from the service transformation step
        if (this.stream == null) {
            this.stream = serviceExecutor.getBuffer(query, getWindowConsumer(), windowMode, windowSize, windowEvery,
                    windowLimit);
        }
        try {
            if (stream == null) {
                this.runGenTrans(Collections.emptyList());
            }
        } catch (KettleStepException e) {
            throw Throwables.propagate(e);
        }
    }

    /**
     * Runs the generated transformation with the rows given by param.
     *
     * @param rowIterator The {@link List} input rows.
     * @throws KettleStepException
     */
    @VisibleForTesting
    protected void runGenTrans(final List<RowMetaAndData> rowIterator) throws KettleStepException {
        if (isRunning.compareAndSet(false, true)) {
            try {
                LogChannelInterface log = genTrans.getLogChannel();
                RowProducer rowProducer;
                StepInterface resultStep;

                genTrans.getTransListeners().clear();
                genTrans.cleanup();
                synchronized (serviceExecutor.getServiceTrans()) {
                    if (Thread.currentThread().isInterrupted()) {
                        //avoids InterruptedException caused by the transformations being stopped in the meantime
                        return;
                    }
                    genTrans.prepareExecution(null);
                    rowProducer = genTrans.addRowProducer(injectorStepName, 0);
                    genTrans.startThreads();
                }

                resultStep = genTrans.findRunThread(resultStepName);
                resultStep.cleanup();

                List<RowMetaAndData> rowsList = new ArrayList<>();
                RowAdapter rowListener = getRowWrittenListener(rowsList);
                resultStep.addRowListener(rowListener);
                injectRows(rowIterator, log, rowProducer);

                rowProducer.finished();

                // The execution will not continue until the trans finishes)
                waitForGeneratedTransToFinnish();

                genTrans.stopAll();
                resultStep.removeRowListener(rowListener);

                if (!this.genTransCachePublishSubject.hasComplete()) {
                    this.genTransCachePublishSubject.onNext(Collections.unmodifiableList(rowsList));
                }

                log.logDetailed(DataServiceConstants.STREAMING_GENERATED_TRANSFORMATION_STOPPED);
            } catch (KettleException e) {
                throw new KettleStepException(e);
            } finally {
                isRunning.set(false);
                genTrans.setRunning(false);
                genTrans.setStopped(true);
            }
        }
    }

    @VisibleForTesting
    protected void waitForGeneratedTransToFinnish() {
        try {
            genTrans.waitUntilFinished();
        } catch (RuntimeException e) {
            if (e.getCause() instanceof InterruptedException) {
                logger.debug(
                        "The generated transformation was stopped, which resulted in an interruption of the generated transformation wait until finished call");
                //this is normal if the transformation is terminated while we are waiting for it to finnish
            } else {
                throw new RuntimeException(e);
            }
        }
    }

    @VisibleForTesting
    protected RowAdapter getRowWrittenListener(List<RowMetaAndData> tempList) {
        return new RowAdapter() {
            @Override
            public void rowWrittenEvent(RowMetaInterface rowMetaInterface, Object[] objects)
                    throws KettleStepException {
                tempList.add(new RowMetaAndData(rowMetaInterface, objects));
            }
        };
    }

    @VisibleForTesting
    protected void injectRows(List<RowMetaAndData> rowIterator, LogChannelInterface log, RowProducer rowProducer) {
        for (RowMetaAndData injectRows : rowIterator) {
            while (!rowProducer.putRowWait(injectRows.getRowMeta(), injectRows.getData(), 1, TimeUnit.SECONDS)
                    && genTrans.isRunning()) {
                // Row queue was full, try again
                log.logRowlevel(DataServiceConstants.ROW_BUFFER_IS_FULL_TRYING_AGAIN);
            }
        }
    }

    /**
     * Adds a new consumer that will receive the result of the generated transformation
     * @param consumer The consumer to register.
     * @param pollingMode True if the new consumer is a polling consumer, False if it is a push one.
     */
    public void addNewRowConsumer(final Observer<List<RowMetaAndData>> consumer, boolean pollingMode) {
        if (pollingMode) {
            this.genTransCachePublishSubject.safeSubscribe(consumer);
        } else {
            this.genTransCachePublishSubject.doOnDispose(() -> consumersCount.decrementAndGet())
                    .safeSubscribe(consumer);
            consumersCount.incrementAndGet();
        }
    }

    /**
     * Clears all the row consumers from the cache, and wrap-up resources used by them.
     */
    public void clearRowConsumers() {
        this.genTransCachePublishSubject.onComplete();
        consumersCount.set(0);
    }

    @VisibleForTesting
    protected PublishSubject<List<RowMetaAndData>> getGeneratedDataObservable() {
        return generatedDataObservable;
    }
}