com.panet.imeta.trans.steps.monetdbbulkloader.MonetDBBulkLoader.java Source code

Java tutorial

Introduction

Here is the source code for com.panet.imeta.trans.steps.monetdbbulkloader.MonetDBBulkLoader.java

Source

/* Copyright (c) 2007 Pentaho Corporation.  All rights reserved. 
* This software was developed by Pentaho Corporation and is provided under the terms 
* of the GNU Lesser General Public License, Version 2.1. You may not use 
* this file except in compliance with the license. If you need a copy of the license, 
* please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Pentaho 
* Data Integration.  The Initial Developer is Pentaho Corporation.
*
* Software distributed under the GNU Lesser Public License is distributed on an "AS IS" 
* basis, WITHOUT WARRANTY OF ANY KIND, either express or  implied. Please refer to 
* the license for the specific language governing your rights and limitations.*/

package com.panet.imeta.trans.steps.monetdbbulkloader;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Date;

import org.apache.commons.vfs.FileObject;

import com.panet.imeta.core.Const;
import com.panet.imeta.core.database.DatabaseMeta;
import com.panet.imeta.core.exception.KettleException;
import com.panet.imeta.core.row.RowMetaInterface;
import com.panet.imeta.core.row.ValueMeta;
import com.panet.imeta.core.row.ValueMetaInterface;
import com.panet.imeta.core.util.StreamLogger;
import com.panet.imeta.core.vfs.KettleVFS;
import com.panet.imeta.trans.Trans;
import com.panet.imeta.trans.TransMeta;
import com.panet.imeta.trans.step.BaseStep;
import com.panet.imeta.trans.step.StepDataInterface;
import com.panet.imeta.trans.step.StepInterface;
import com.panet.imeta.trans.step.StepMeta;
import com.panet.imeta.trans.step.StepMetaInterface;

/**
 * Performs a bulk load to a MonetDB table.
 *
 * Based on (copied from) Sven Boden's Oracle Bulk Loader step
 * 
 * @author matt
 * @since  22-aug-2008
 */
public class MonetDBBulkLoader extends BaseStep implements StepInterface {
    private MonetDBBulkLoaderMeta meta;
    private MonetDBBulkLoaderData data;

    public MonetDBBulkLoader(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr,
            TransMeta transMeta, Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    /**
     * Create the command line for a psql process depending on the meta
     * information supplied.
     * 
     * @param meta The meta data to create the command line from
     * @param password Use the real password or not
     * 
     * @return The string to execute.
     * 
     * @throws KettleException Upon any exception
     */
    public String createCommandLine(MonetDBBulkLoaderMeta meta, boolean password) throws KettleException {
        StringBuffer sb = new StringBuffer(300);

        if (!Const.isEmpty(meta.getMClientPath())) {
            try {
                FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getMClientPath()));
                String psqlexec = KettleVFS.getFilename(fileObject);
                sb.append(psqlexec);
            } catch (IOException ex) {
                throw new KettleException("Error retrieving mclient application string", ex);
            }
        } else {
            throw new KettleException("No mclient application specified");
        }

        // Add standard options to the mclient command:
        //
        sb.append(" -lsql");

        // See if the encoding is set...
        //
        if (!Const.isEmpty(meta.getEncoding())) {
            sb.append(" --encoding=");
            sb.append(environmentSubstitute(meta.getEncoding()));
        }

        if (!Const.isEmpty(meta.getLogFile())) {
            try {
                FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getLogFile()));

                sb.append(" --log=");
                sb.append('\'').append(KettleVFS.getFilename(fileObject)).append('\'');
            } catch (IOException ex) {
                throw new KettleException("Error retrieving logfile string", ex);
            }
        }

        DatabaseMeta dm = meta.getDatabaseMeta();
        if (dm != null) {
            String user = environmentSubstitute(Const.NVL(dm.getUsername(), ""));
            String pass = environmentSubstitute(Const.NVL(dm.getPassword(), ""));
            String hostname = environmentSubstitute(Const.NVL(dm.getHostname(), ""));
            String portnum = environmentSubstitute(Const.NVL(dm.getDatabasePortNumberString(), ""));
            String dbname = environmentSubstitute(Const.NVL(dm.getDatabaseName(), ""));

            if (!Const.isEmpty(user)) {
                sb.append(" --user=").append(user);
            }
            if (!Const.isEmpty(pass)) {
                sb.append(" --passwd=");
                if (password) {
                    sb.append(pass);
                } else {
                    sb.append("******");
                }
            }
            if (!Const.isEmpty(hostname)) {
                sb.append(" --host=").append(hostname);
            }
            if (!Const.isEmpty(portnum) && Const.toInt(portnum, -1) > 0) {
                sb.append(" --port=").append(portnum);
            }
            if (!Const.isEmpty(dbname)) {
                sb.append(" --database=").append(dbname);
            }
        } else {
            throw new KettleException("No connection specified");
        }

        return sb.toString();
    }

    public boolean execute(MonetDBBulkLoaderMeta meta, boolean wait) throws KettleException {
        Runtime rt = Runtime.getRuntime();

        try {
            String cmd = createCommandLine(meta, true);

            logBasic("Executing command: " + cmd);
            data.mClientlProcess = rt.exec(cmd);

            // any error message?
            //
            data.errorLogger = new StreamLogger(data.mClientlProcess.getErrorStream(), "ERROR");

            // any output?
            data.outputLogger = new StreamLogger(data.mClientlProcess.getInputStream(), "OUTPUT");

            // Where do we send the data to?  --> To STDIN of the mclient process
            //
            data.monetOutputStream = data.mClientlProcess.getOutputStream();

            // kick them off
            new Thread(data.errorLogger).start();
            new Thread(data.outputLogger).start();

            // OK, from here on, we need to feed the COPY INTO command followed by the data into the monetOutputStream
            //
        } catch (Exception ex) {
            throw new KettleException("Error while executing mclient : " + createCommandLine(meta, false), ex);
        }

        return true;
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        meta = (MonetDBBulkLoaderMeta) smi;
        data = (MonetDBBulkLoaderData) sdi;

        try {
            Object[] r = getRow(); // Get row from input rowset & set row busy!
            if (r == null) // no more input to be expected...
            {
                setOutputDone();

                // Close the output stream...
                //
                data.monetOutputStream.flush();
                data.monetOutputStream.close();

                // wait for the mclient process to finish and check for any error...
                //
                int exitVal = data.mClientlProcess.waitFor();
                logBasic(Messages.getString("MonetDBBulkLoader.Log.ExitValuePsqlPath", "" + exitVal)); //$NON-NLS-1$

                return false;
            }

            if (first) {
                first = false;

                // Cache field indexes.
                //
                data.keynrs = new int[meta.getFieldStream().length];
                for (int i = 0; i < data.keynrs.length; i++) {
                    data.keynrs[i] = getInputRowMeta().indexOfValue(meta.getFieldStream()[i]);
                }

                // execute the psql statement...
                //
                execute(meta, true);
            }

            writeRowToMonetDB(getInputRowMeta(), r);
            putRow(getInputRowMeta(), r);
            incrementLinesOutput();

            return true;
        } catch (Exception e) {
            logError(Messages.getString("MonetDBBulkLoader.Log.ErrorInStep"), e); //$NON-NLS-1$
            setErrors(1);
            stopAll();
            setOutputDone(); // signal end to receiver(s)
            return false;
        }
    }

    private void writeRowToMonetDB(RowMetaInterface rowMeta, Object[] r) throws KettleException {
        if (data.bufferIndex < data.bufferSize) {
            addRowToBuffer(rowMeta, r);
        } else {
            writeBufferToMonetDB();
        }
    }

    private void addRowToBuffer(RowMetaInterface rowMeta, Object[] r) throws KettleException {

        ByteArrayOutputStream line = new ByteArrayOutputStream(25000);

        try {
            // So, we have this output stream to which we can write CSV data to.
            // Basically, what we need to do is write the binary data (from strings to it as part of this proof of concept)
            //
            // The data format required is essentially:
            //
            for (int i = 0; i < data.keynrs.length; i++) {
                if (i > 0) {
                    // Write a separator 
                    //
                    line.write(data.separator);
                }

                int index = data.keynrs[i];
                ValueMetaInterface valueMeta = rowMeta.getValueMeta(index);
                Object valueData = r[index];

                if (valueData != null) {
                    switch (valueMeta.getType()) {
                    case ValueMetaInterface.TYPE_STRING:
                        line.write(data.quote);
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            // We had a string, just dump it back.
                            line.write((byte[]) valueData);
                        } else {
                            line.write(valueMeta.getString(valueData).getBytes());
                        }
                        line.write(data.quote);
                        break;
                    case ValueMetaInterface.TYPE_INTEGER:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            line.write((byte[]) valueData);
                        } else {
                            line.write(Long.toString(valueMeta.getInteger(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_DATE:
                        // Keep the data format as indicated.
                        //
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            line.write((byte[]) valueData);
                        } else {
                            Date date = valueMeta.getDate(valueData);
                            // Convert it to the MonetDB date format "yyyy/MM/dd HH:mm:ss"
                            //
                            line.write(data.monetDateMeta.getString(date).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_BOOLEAN:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            line.write((byte[]) valueData);
                        } else {
                            line.write(Boolean.toString(valueMeta.getBoolean(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_NUMBER:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            line.write((byte[]) valueData);
                        } else {
                            line.write(Double.toString(valueMeta.getNumber(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_BIGNUMBER:
                        if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
                            line.write((byte[]) valueData);
                        } else {
                            line.write(valueMeta.getString(valueData).getBytes());
                        }
                        break;
                    }
                }
            }

            // finally write a newline
            //
            line.write(data.newline);

            // Now that we have the line, grab the content and store it in the buffer...
            //
            data.rowBuffer[data.bufferIndex] = line.toByteArray();
            data.bufferIndex++;
        } catch (Exception e) {
            throw new KettleException("Error serializing rows of data to the psql command", e);
        }

    }

    private void writeBufferToMonetDB() throws KettleException {
        if (data.bufferIndex == 0)
            return;

        try {
            // first write the COPY INTO command...
            //
            String cmd = "COPY " + data.bufferIndex + " RECORDS INTO " + data.schemaTable + " FROM STDIN;";
            if (log.isDetailed())
                logDetailed(cmd);
            data.monetOutputStream.write(cmd.getBytes());

            for (int i = 0; i < data.bufferIndex; i++) {
                data.monetOutputStream.write(data.rowBuffer[i]);
                if (log.isRowLevel())
                    logRowlevel(new String(data.rowBuffer[i]));
            }

            // Also write an empty row
            //
            data.monetOutputStream.write(Const.CR.getBytes());
            if (log.isRowLevel())
                logRowlevel(Const.CR);

            // reset the buffer pointer...
            //
            data.bufferIndex = 0;
        } catch (Exception e) {
            throw new KettleException("An error occurred writing data to the mclient process", e);
        }
    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (MonetDBBulkLoaderMeta) smi;
        data = (MonetDBBulkLoaderData) sdi;

        if (super.init(smi, sdi)) {
            data.quote = "\"".getBytes();
            data.separator = "|".getBytes();
            data.newline = Const.CR.getBytes();

            data.monetDateMeta = new ValueMeta("dateMeta", ValueMetaInterface.TYPE_DATE);
            data.monetDateMeta.setConversionMask("yyyy/MM/dd HH:mm:ss");
            data.monetDateMeta.setStringEncoding(meta.getEncoding());

            data.monetNumberMeta = new ValueMeta("numberMeta", ValueMetaInterface.TYPE_NUMBER);
            data.monetNumberMeta.setConversionMask("#.#");
            data.monetNumberMeta.setGroupingSymbol(",");
            data.monetNumberMeta.setDecimalSymbol(".");
            data.monetNumberMeta.setStringEncoding(meta.getEncoding());

            data.bufferSize = Const.toInt(environmentSubstitute(meta.getBufferSize()), 100000);

            // Allocate the buffer
            // 
            data.rowBuffer = new byte[data.bufferSize][];
            data.bufferIndex = 0;

            // Schema-table combination...
            data.schemaTable = meta.getDatabaseMeta().getSchemaTableCombination(
                    environmentSubstitute(meta.getSchemaName()), environmentSubstitute(meta.getTableName()));

            return true;
        }
        return false;
    }

    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (MonetDBBulkLoaderMeta) smi;
        data = (MonetDBBulkLoaderData) sdi;

        // Close the mclient output stream
        //
        try {
            data.monetOutputStream.close();
            int exitValue = data.mClientlProcess.waitFor();
            logDetailed("Exit value for the mclient process was : " + exitValue);
        } catch (Exception e) {
            setErrors(1L);
            logError("Unexpected error encountered while finishing the mclient process", e);
        }

        super.dispose(smi, sdi);
    }

    //
    // Run is were the action happens!
    //
    public void run() {
        BaseStep.runStepThread(this, meta, data);
    }
}