org.pentaho.di.trans.steps.luciddbbulkloader.LucidDBBulkLoader.java Source code

Introduction

Here is the source code for org.pentaho.di.trans.steps.luciddbbulkloader.LucidDBBulkLoader.java
Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.luciddbbulkloader;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.SQLWarning;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.commons.vfs.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.DBCache;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMeta;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.util.StreamLogger;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

/**
 * Performs a bulk load to a LucidDB table.
 *
 * Based on Sven Boden's Oracle Bulk Loader step
 *
 * @author matt
 * @since 14-nov-2008
 */
public class LucidDBBulkLoader extends BaseStep implements StepInterface {
    private static Class<?> PKG = LucidDBBulkLoaderMeta.class; // for i18n purposes, needed by Translator2!!

    private LucidDBBulkLoaderMeta meta;
    private LucidDBBulkLoaderData data;

    // private SqlRunner sqlRunner;

    public LucidDBBulkLoader(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr,
            TransMeta transMeta, Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    public boolean execute(LucidDBBulkLoaderMeta meta, boolean wait) throws KettleException {
        Runtime rt = Runtime.getRuntime();

        try {
            String tableName = environmentSubstitute(meta.getTableName());

            // 1) Set up the FIFO folder, create the directory and path to it...
            //
            String fifoVfsDirectory = environmentSubstitute(meta.getFifoDirectory());
            FileObject directory = KettleVFS.getFileObject(fifoVfsDirectory, getTransMeta());
            directory.createFolder();
            String fifoDirectory = KettleVFS.getFilename(directory);

            // 2) Create the FIFO file using the "mkfifo" command...
            // Make sure to log all the possible output, also from STDERR
            //
            data.fifoFilename = KettleVFS.getFilename(directory) + Const.FILE_SEPARATOR + tableName + ".csv";
            data.bcpFilename = KettleVFS.getFilename(directory) + Const.FILE_SEPARATOR + tableName + ".bcp";

            File fifoFile = new File(data.fifoFilename);
            if (!fifoFile.exists()) {
                String mkFifoCmd = "mkfifo " + data.fifoFilename + "";
                logBasic("Creating FIFO file using this command : " + mkFifoCmd);
                Process mkFifoProcess = rt.exec(mkFifoCmd);
                StreamLogger errorLogger = new StreamLogger(log, mkFifoProcess.getErrorStream(), "mkFifoError");
                StreamLogger outputLogger = new StreamLogger(log, mkFifoProcess.getInputStream(), "mkFifoOuptut");
                new Thread(errorLogger).start();
                new Thread(outputLogger).start();
                int result = mkFifoProcess.waitFor();
                if (result != 0) {
                    throw new Exception("Return code " + result + " received from statement : " + mkFifoCmd);
                }
            }

            // 3) Make a connection to LucidDB for sending SQL commands
            // (Also, we need a clear cache for getting up-to-date target metadata)
            DBCache.getInstance().clear(meta.getDatabaseMeta().getName());
            if (meta.getDatabaseMeta() == null) {
                logError(BaseMessages.getString(PKG, "LuciDBBulkLoader.Init.ConnectionMissing", getStepname()));
                return false;
            }
            data.db = new Database(this, meta.getDatabaseMeta());
            data.db.shareVariablesWith(this);
            // Connect to the database
            if (getTransMeta().isUsingUniqueConnections()) {
                synchronized (getTrans()) {
                    data.db.connect(getTrans().getTransactionId(), getPartitionID());
                }
            } else {
                data.db.connect(getPartitionID());
            }

            logBasic("Connected to LucidDB");

            // 4) Now we are ready to create the LucidDB FIFO server that will handle the actual bulk loading.
            //
            String fifoServerStatement = "";
            fifoServerStatement += "create or replace server " + meta.getFifoServerName() + Const.CR;
            fifoServerStatement += "foreign data wrapper sys_file_wrapper" + Const.CR;
            fifoServerStatement += "options (" + Const.CR;
            fifoServerStatement += "directory '" + fifoDirectory + "'," + Const.CR;
            fifoServerStatement += "file_extension 'csv'," + Const.CR;
            fifoServerStatement += "with_header 'no'," + Const.CR;
            fifoServerStatement += "num_rows_scan '0'," + Const.CR;
            fifoServerStatement += "lenient 'no');" + Const.CR;

            logBasic("Creating LucidDB fifo_server with the following command: " + fifoServerStatement);
            data.db.execStatements(fifoServerStatement);

            // 5) Set the error limit in the LucidDB session
            // REVIEW jvs 13-Dec-2008: is this guaranteed to retain the same
            // connection?
            String errorMaxStatement = "";
            errorMaxStatement += "alter session set \"errorMax\" = " + meta.getMaxErrors() + ";" + Const.CR;
            logBasic("Setting error limit in LucidDB session with the following command: " + errorMaxStatement);
            data.db.execStatements(errorMaxStatement);

            // 6) Now we also need to create a bulk loader file .bcp
            //
            createBulkLoadConfigFile(data.bcpFilename);

            // 7) execute the actual load command!
            // This will actually block until the load is done in the
            // separate execution thread; see notes in executeLoadCommand
            // on why it's important for this to occur BEFORE
            // opening our end of the FIFO.
            //
            executeLoadCommand(tableName);

            // 8) We have to write rows to the FIFO file later on.
            data.fifoStream = new BufferedOutputStream(new FileOutputStream(fifoFile));
        } catch (Exception ex) {
            throw new KettleException(ex);
        }

        return true;
    }

    private void executeLoadCommand(String tableName) throws KettleException {
        String loadCommand = "";
        loadCommand += "insert into " + data.schemaTable + Const.CR;
        loadCommand += "select * from " + meta.getFifoServerName() + ".\"DEFAULT\"." + tableName + Const.CR;

        // NOTE jvs 13-Dec-2008: We prepare the SQL before spawning the thread
        // to execute it. The reason is that if a SQL validation exception
        // occurs during preparation (e.g. due to datatype mismatch), we don't
        // even want to open our end of the FIFO, otherwise we can get stuck
        // since the server is never going to open its end until execution,
        // which ain't gonna happen in that case.

        logBasic("Preparing load command : " + Const.CR + loadCommand);
        PreparedStatement ps = data.db.prepareSQL(loadCommand);

        data.sqlRunner = new SqlRunner(data, ps);
        data.sqlRunner.start();
    }

    private void createBulkLoadConfigFile(String bcpFilename) throws KettleException {
        File bcpFile = new File(bcpFilename);
        FileWriter writer = null;

        try {
            writer = new FileWriter(bcpFile);

            // The first Line is the version number, usually 9.0
            //
            writer.write("9.0" + Const.CR);

            // The second line contains the number of columns...
            //
            writer.write(meta.getFieldTable().length + Const.CR);

            RowMetaInterface targetFieldMeta = meta.getRequiredFields(this);

            data.bulkFormatMeta = new ValueMetaInterface[meta.getFieldTable().length];

            // The next block lists the columns from 1..N where N is the number of columns...
            //
            for (int i = 0; i < meta.getFieldTable().length; i++) {
                ValueMetaInterface field = getInputRowMeta().getValueMeta(data.keynrs[i]);

                // Col 1 : the column number (i+1)
                //
                writer.write(Integer.toString(i + 1) + " ");

                // Col 2 : the data type
                //
                String dataType = null;
                switch (field.getType()) {
                case ValueMetaInterface.TYPE_STRING:
                    dataType = "SQLVARCHAR";
                    break;
                case ValueMetaInterface.TYPE_BIGNUMBER:
                    dataType = "SQLREAL";
                    break;
                case ValueMetaInterface.TYPE_NUMBER:
                    dataType = "SQLFLT8";
                    break;
                case ValueMetaInterface.TYPE_INTEGER:
                    dataType = "SQLBIGINT";
                    break;
                case ValueMetaInterface.TYPE_DATE:
                    // Use the actual datatypes in the target table to
                    // determine how to create the control file column
                    // definition for date/time fields.
                    if (targetFieldMeta.getValueMetaList().get(i).getOriginalColumnType() == Types.DATE) {
                        data.bulkFormatMeta[i] = data.bulkDateMeta;
                        dataType = "SQLDATE";
                    } else {
                        data.bulkFormatMeta[i] = data.bulkTimestampMeta;
                        dataType = "SQLTIMESTAMP";
                    }
                    break;
                // REVIEW jvs 13-Dec-2008: enable boolean support?
                case ValueMetaInterface.TYPE_BOOLEAN:
                    dataType = "SQLCHAR";
                    break;
                default:
                    dataType = "SQLVARCHAR";
                    break;
                }
                writer.write(dataType + " ");

                // Col 3 : an ignored column (start position or something like that)
                //
                writer.write("0 ");

                // Col 4 : the data length, just put the length metadata in here
                //
                if (field.getLength() == -1) {
                    writer.write("1000 ");
                } else {
                    writer.write("" + field.getLength() + " ");
                }

                // Col 5 : The separator is also ignored, we're going to put a tab in here, like in the sample
                //
                writer.write("\"\\t\" "); // "\t"

                // Col 6 : the column number again...
                //
                writer.write(Integer.toString(i + 1) + " ");

                // Col 7 : The identifier
                //
                writer.write(meta.getFieldTable()[i] + " ");

                // Col 8 : Collation / Format : leave it empty/default at the time being
                //
                writer.write("\"\" "); // ""

                // Newline to finish
                //
                writer.write(Const.CR);
            }
        } catch (Exception e) {
            throw new KettleException("Unable to create BCP control file", e);
        } finally {
            // That's it, close shop
            //
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    throw new KettleException("Unable to close BCP file '" + bcpFilename + "'", e);
                }
            }
        }
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        meta = (LucidDBBulkLoaderMeta) smi;
        data = (LucidDBBulkLoaderData) sdi;

        try {
            Object[] r = getRow(); // Get row from input rowset & set row busy!
            if (r == null) {
                // no more input to be expected...

                setOutputDone();

                // Close the fifo file...
                //
                data.fifoStream.close();
                data.fifoStream = null;

                // wait for the INSERT statement to finish and check for any
                // error and/or warning...
                data.sqlRunner.join();
                SqlRunner sqlRunner = data.sqlRunner;
                data.sqlRunner = null;
                for (String warning : sqlRunner.warnings) {
                    // REVIEW jvs 13-Dec-2008: It would be nice if there were
                    // a logWarning instead?
                    logError(" (WARNING) " + warning);
                }
                sqlRunner.checkExcn();

                // If there was no fatal exception, but there were warnings,
                // retrieve the rejected row count
                if (!sqlRunner.warnings.isEmpty()) {
                    ResultSet rs = data.db.openQuery("SELECT PARAM_VALUE "
                            + "FROM SYS_ROOT.USER_SESSION_PARAMETERS " + "WHERE PARAM_NAME='lastRowsRejected'");
                    try {
                        rs.next();
                        setLinesRejected(rs.getInt(1));
                    } finally {
                        rs.close();
                    }
                }

                return false;
            }

            if (first) {
                first = false;

                // Cache field indexes.
                //
                data.keynrs = new int[meta.getFieldStream().length];
                for (int i = 0; i < data.keynrs.length; i++) {
                    data.keynrs[i] = getInputRowMeta().indexOfValue(meta.getFieldStream()[i]);
                }

                // execute the client statement...
                //
                execute(meta, true);
            }

            writeRowToBulk(getInputRowMeta(), r);
            putRow(getInputRowMeta(), r);
            incrementLinesOutput();

            return true;
        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "LucidDBBulkLoader.Log.ErrorInStep"), e);
            setErrors(1);
            stopAll();
            setOutputDone(); // signal end to receiver(s)
            return false;
        }
    }

    private void writeRowToBulk(RowMetaInterface rowMeta, Object[] r) throws KettleException {

        try {
            // So, we have this output stream to which we can write CSV data to.
            // Basically, what we need to do is write the binary data (from strings to it as part of this proof of concept)
            //
            // The data format required is essentially:
            //
            for (int i = 0; i < data.keynrs.length; i++) {
                if (i > 0) {
                    // Write a separator
                    //
                    data.fifoStream.write(data.separator);
                }

                int index = data.keynrs[i];
                ValueMetaInterface valueMeta = rowMeta.getValueMeta(index);
                Object valueData = r[index];

                if (valueData != null) {
                    switch (valueMeta.getType()) {
                    case ValueMetaInterface.TYPE_STRING:
                        data.fifoStream.write(data.quote);
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            // We had a string, just dump it back.
                            data.fifoStream.write((byte[]) valueData);
                        } else {
                            data.fifoStream.write(valueMeta.getString(valueData).getBytes());
                        }
                        data.fifoStream.write(data.quote);
                        break;
                    case ValueMetaInterface.TYPE_INTEGER:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            data.fifoStream.write((byte[]) valueData);
                        } else {
                            data.fifoStream.write(Long.toString(valueMeta.getInteger(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_DATE:
                        // REVIEW jvs 13-Dec-2008: Is it OK to ignore
                        // FieldFormatOk like this?
                        /*
                         * if (false && valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                         * data.fifoStream.write((byte[])valueData); } else {
                         */
                        Date date = valueMeta.getDate(valueData);
                        // Convert it to the ISO timestamp format
                        // "yyyy-MM-dd HH:mm:ss" // or date format
                        // "yyyy-MM-dd" as appropriate, since LucidDB
                        // follows SQL:2003 here
                        data.fifoStream.write(data.bulkFormatMeta[i].getString(date).getBytes());
                        // }
                        break;
                    case ValueMetaInterface.TYPE_BOOLEAN:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            data.fifoStream.write((byte[]) valueData);
                        } else {
                            data.fifoStream.write(Boolean.toString(valueMeta.getBoolean(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_NUMBER:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            data.fifoStream.write((byte[]) valueData);
                        } else {
                            data.fifoStream.write(Double.toString(valueMeta.getNumber(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_BIGNUMBER:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            data.fifoStream.write((byte[]) valueData);
                        } else {
                            data.fifoStream.write(valueMeta.getString(valueData).getBytes());
                        }
                        break;
                    default:
                        break;
                    }
                }
            }

            // finally write a newline
            //
            data.fifoStream.write(data.newline);
        } catch (Exception e) {
            throw new KettleException("Error serializing rows of data to the fifo file", e);
        }

    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (LucidDBBulkLoaderMeta) smi;
        data = (LucidDBBulkLoaderData) sdi;

        if (super.init(smi, sdi)) {
            data.quote = "\"".getBytes();
            data.separator = ",".getBytes();
            data.newline = Const.CR.getBytes();

            data.bulkTimestampMeta = new ValueMeta("timestampMeta", ValueMetaInterface.TYPE_DATE);
            data.bulkTimestampMeta.setConversionMask("yyyy-MM-dd HH:mm:ss");
            data.bulkTimestampMeta.setStringEncoding(meta.getEncoding());

            data.bulkDateMeta = new ValueMeta("dateMeta", ValueMetaInterface.TYPE_DATE);
            data.bulkDateMeta.setConversionMask("yyyy-MM-dd");
            data.bulkDateMeta.setStringEncoding(meta.getEncoding());

            data.bulkNumberMeta = new ValueMeta("numberMeta", ValueMetaInterface.TYPE_NUMBER);
            data.bulkNumberMeta.setConversionMask("#.#");
            data.bulkNumberMeta.setGroupingSymbol(",");
            data.bulkNumberMeta.setDecimalSymbol(".");
            data.bulkNumberMeta.setStringEncoding(meta.getEncoding());

            data.bufferSize = Const.toInt(environmentSubstitute(meta.getBufferSize()), 100000);

            // Allocate the buffer
            //
            data.rowBuffer = new byte[data.bufferSize][];
            data.bufferIndex = 0;

            // Schema-table combination...
            data.schemaTable = meta.getDatabaseMeta().getQuotedSchemaTableCombination(
                    environmentSubstitute(meta.getSchemaName()), environmentSubstitute(meta.getTableName()));

            return true;
        }
        return false;
    }

    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (LucidDBBulkLoaderMeta) smi;
        data = (LucidDBBulkLoaderData) sdi;

        // Close the output streams if still needed.
        //
        try {
            if (data.fifoStream != null) {
                data.fifoStream.close();
            }

            // Stop the SQL execution thread
            if (data.sqlRunner != null) {
                data.sqlRunner.join();
                data.sqlRunner = null;
            }
            // And finally, release the database connection
            if (data.db != null) {
                data.db.disconnect();
                data.db = null;
            }
        } catch (Exception e) {
            setErrors(1L);
            logError("Unexpected error encountered while closing the client connection", e);
        }

        super.dispose(smi, sdi);
    }

    static class SqlRunner extends Thread {
        private LucidDBBulkLoaderData data;

        private PreparedStatement ps;

        private SQLException ex;

        List<String> warnings;

        SqlRunner(LucidDBBulkLoaderData data, PreparedStatement ps) {
            this.data = data;
            this.ps = ps;
            warnings = new ArrayList<String>();
        }

        public void run() {
            try {
                // TODO jvs 12-Dec-2008: cross-check result against actual number
                // of rows sent.
                ps.executeUpdate();

                // Pump out any warnings and save them.
                SQLWarning warning = ps.getWarnings();
                while (warning != null) {
                    warnings.add(warning.getMessage());
                    warning = warning.getNextWarning();
                }
            } catch (SQLException ex) {
                this.ex = ex;
            } finally {
                try {
                    data.db.closePreparedStatement(ps);
                } catch (KettleException ke) {
                    // not much we can do with this
                } finally {
                    ps = null;
                }
            }
        }

        void checkExcn() throws SQLException {
            // This is called from the main thread context to rethrow any saved
            // excn.
            if (ex != null) {
                throw ex;
            }
        }
    }
}