org.pentaho.di.trans.steps.blockingstep.BlockingStep.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.trans.steps.blockingstep.BlockingStep.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.blockingstep;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.SocketTimeoutException;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.vfs.FileObject;
import org.apache.commons.vfs.FileSystemException;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleFileException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

/**
 * A step that blocks throughput until the input ends, then it will either output the last row or the complete input.
 */
public class BlockingStep extends BaseStep implements StepInterface {

    private static Class<?> PKG = BlockingStepMeta.class; // for i18n purposes, needed by Translator2!!

    private BlockingStepMeta meta;
    private BlockingStepData data;
    private Object[] lastRow;

    public BlockingStep(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
            Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    private boolean addBuffer(RowMetaInterface rowMeta, Object[] r) {
        if (r != null) {
            data.buffer.add(r); // Save row
        }

        // Time to write to disk: buffer in core is full!
        if (data.buffer.size() == meta.getCacheSize() // Buffer is full: dump to disk
                || (data.files.size() > 0 && r == null && data.buffer.size() > 0) // No more records: join from disk
        ) {
            // Then write them to disk...
            DataOutputStream dos;
            GZIPOutputStream gzos;
            int p;

            try {
                FileObject fileObject = KettleVFS.createTempFile(meta.getPrefix(), ".tmp",
                        environmentSubstitute(meta.getDirectory()), getTransMeta());

                data.files.add(fileObject); // Remember the files!
                OutputStream outputStream = KettleVFS.getOutputStream(fileObject, false);
                if (meta.getCompress()) {
                    gzos = new GZIPOutputStream(new BufferedOutputStream(outputStream));
                    dos = new DataOutputStream(gzos);
                } else {
                    dos = new DataOutputStream(outputStream);
                    gzos = null;
                }

                // How many records do we have?
                dos.writeInt(data.buffer.size());

                for (p = 0; p < data.buffer.size(); p++) {
                    // Just write the data, nothing else
                    rowMeta.writeData(dos, data.buffer.get(p));
                }
                // Close temp-file
                dos.close(); // close data stream
                if (gzos != null) {
                    gzos.close(); // close gzip stream
                }
                outputStream.close(); // close file stream
            } catch (Exception e) {
                logError("Error processing tmp-file: " + e.toString());
                return false;
            }

            data.buffer.clear();
        }

        return true;
    }

    private Object[] getBuffer() {
        Object[] retval;

        // Open all files at once and read one row from each file...
        if (data.files.size() > 0 && (data.dis.size() == 0 || data.fis.size() == 0)) {
            if (log.isBasic()) {
                logBasic(BaseMessages.getString(PKG, "BlockingStep.Log.Openfiles"));
            }

            try {
                FileObject fileObject = data.files.get(0);
                String filename = KettleVFS.getFilename(fileObject);
                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "BlockingStep.Log.Openfilename1") + filename
                            + BaseMessages.getString(PKG, "BlockingStep.Log.Openfilename2"));
                }
                InputStream fi = KettleVFS.getInputStream(fileObject);
                DataInputStream di;
                data.fis.add(fi);
                if (meta.getCompress()) {
                    GZIPInputStream gzfi = new GZIPInputStream(new BufferedInputStream(fi));
                    di = new DataInputStream(gzfi);
                    data.gzis.add(gzfi);
                } else {
                    di = new DataInputStream(fi);
                }
                data.dis.add(di);

                // How long is the buffer?
                int buffersize = di.readInt();

                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "BlockingStep.Log.BufferSize1") + filename
                            + BaseMessages.getString(PKG, "BlockingStep.Log.BufferSize2") + buffersize + " "
                            + BaseMessages.getString(PKG, "BlockingStep.Log.BufferSize3"));
                }

                if (buffersize > 0) {
                    // Read a row from temp-file
                    data.rowbuffer.add(data.outputRowMeta.readData(di));
                }
            } catch (Exception e) {
                logError(BaseMessages.getString(PKG, "BlockingStepMeta.ErrorReadingFile") + e.toString());
                logError(Const.getStackTracker(e));
            }
        }

        if (data.files.size() == 0) {
            if (data.buffer.size() > 0) {
                retval = data.buffer.get(0);
                data.buffer.remove(0);
            } else {
                retval = null;
            }
        } else {
            if (data.rowbuffer.size() == 0) {
                retval = null;
            } else {
                retval = data.rowbuffer.get(0);

                data.rowbuffer.remove(0);

                // now get another
                FileObject file = data.files.get(0);
                DataInputStream di = data.dis.get(0);
                InputStream fi = data.fis.get(0);
                GZIPInputStream gzfi = (meta.getCompress()) ? data.gzis.get(0) : null;

                try {
                    data.rowbuffer.add(0, data.outputRowMeta.readData(di));
                } catch (SocketTimeoutException e) {
                    logError(BaseMessages.getString(PKG, "System.Log.UnexpectedError") + " : " + e.toString());
                    logError(Const.getStackTracker(e));
                    setErrors(1);
                    stopAll();
                } catch (KettleFileException fe) {
                    // empty file or EOF mostly
                    try {
                        di.close();
                        fi.close();
                        if (gzfi != null) {
                            gzfi.close();
                        }
                        file.delete();
                    } catch (IOException e) {
                        logError(
                                BaseMessages.getString(PKG, "BlockingStepMeta.UnableDeleteFile") + file.toString());
                        setErrors(1);
                        stopAll();
                        return null;
                    }

                    data.files.remove(0);
                    data.dis.remove(0);
                    data.fis.remove(0);
                    if (gzfi != null) {
                        data.gzis.remove(0);
                    }
                }
            }
        }
        return retval;
    }

    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        if ((data.dis != null) && (data.dis.size() > 0)) {
            for (DataInputStream is : data.dis) {
                BaseStep.closeQuietly(is);
            }
        }
        // remove temp files
        for (int f = 0; f < data.files.size(); f++) {
            FileObject fileToDelete = data.files.get(f);
            try {
                if (fileToDelete != null && fileToDelete.exists()) {
                    fileToDelete.delete();
                }
            } catch (FileSystemException e) {
                logError(e.getLocalizedMessage(), e);
            }
        }
        super.dispose(smi, sdi);
    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (BlockingStepMeta) smi;
        data = (BlockingStepData) sdi;

        if (super.init(smi, sdi)) {
            // Add init code here.
            return true;
        }
        return false;
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {

        boolean err = true;
        Object[] r = getRow(); // Get row from input rowset & set row busy!

        // initialize
        if (first && r != null) {
            data.outputRowMeta = getInputRowMeta().clone();
        }

        if (!meta.isPassAllRows()) {
            if (r == null) {
                // no more input to be expected...
                if (lastRow != null) {
                    putRow(data.outputRowMeta, lastRow);
                }
                setOutputDone();
                return false;
            }

            lastRow = r;
            return true;
        } else {
            // The mode in which we pass all rows to the output.
            err = addBuffer(getInputRowMeta(), r);
            if (!err) {
                setOutputDone(); // signal receiver we're finished.
                return false;
            }

            if (r == null) {
                // no more input to be expected...
                // Now we can start the output!
                r = getBuffer();
                while (r != null && !isStopped()) {
                    if (log.isRowLevel()) {
                        logRowlevel("Read row: " + getInputRowMeta().getString(r));
                    }

                    putRow(data.outputRowMeta, r); // copy row to possible alternate rowset(s).

                    r = getBuffer();
                }

                setOutputDone(); // signal receiver we're finished.
                return false;
            }

            return true;
        }
    }

}