org.inquidia.kettle.plugins.tokenreplacement.TokenReplacement.java Source code

Java tutorial

Introduction

Here is the source code for org.inquidia.kettle.plugins.tokenreplacement.TokenReplacement.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.inquidia.kettle.plugins.tokenreplacement;

import org.apache.commons.lang.BooleanUtils;
import org.apache.commons.vfs.FileObject;
import org.codehaus.groovy.runtime.StringBufferWriter;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.Iterator;

/**
 * @author Chris
 * @since 1-nov-2014
 */
public class TokenReplacement extends BaseStep implements StepInterface {
    private static Class<?> PKG = TokenReplacementMeta.class; // for i18n purposes, needed by Translator2!!

    public TokenReplacementMeta meta;

    public TokenReplacementData data;

    public TokenReplacement(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
            Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    public synchronized boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        meta = (TokenReplacementMeta) smi;
        data = (TokenReplacementData) sdi;

        boolean result = true;
        Object[] r = getRow(); // This also waits for a row to be finished.

        if (first && r != null) {
            first = false;

            data.inputRowMeta = getInputRowMeta();
            data.outputRowMeta = getInputRowMeta().clone();

            if (meta.getOutputType().equalsIgnoreCase("field")) {
                meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);

            }
            if (meta.getOutputType().equalsIgnoreCase("file") && !meta.isOutputFileNameInField()) {
                if (meta.getOutputFileName() != null) {
                    String filename = meta.buildFilename(meta.getOutputFileName(), getTransMeta(), getCopy(),
                            getPartitionID(), data.splitnr);
                    openNewOutputFile(filename);
                } else {
                    throw new KettleException("Output file name cannot be null.");
                }
            }
        }

        if (r == null) {
            // no more input to be expected...
            closeAllOutputFiles();
            setOutputDone();
            return false;
        }

        if (meta.getOutputType().equalsIgnoreCase("file") && !meta.isOutputFileNameInField()
                && meta.getSplitEvery() > 0 && data.rowNumber % meta.getSplitEvery() == 0) {
            if (data.rowNumber > 0) {
                closeAllOutputFiles();
                data.splitnr++;
                String filename = meta.buildFilename(meta.getOutputFileName(), getTransMeta(), getCopy(),
                        getPartitionID(), data.splitnr);
                openNewOutputFile(filename);
            }
        }

        String outputFilename = "";
        if (meta.getOutputType().equalsIgnoreCase("file") && !meta.isOutputFileNameInField()) {
            outputFilename = meta.buildFilename(meta.getOutputFileName(), getTransMeta(), getCopy(),
                    getPartitionID(), data.splitnr);
        } else if (meta.getOutputType().equalsIgnoreCase("file") && meta.isOutputFileNameInField()) {
            String filenameValue = data.inputRowMeta.getString(r,
                    environmentSubstitute(meta.getOutputFileNameField()), "");
            if (!Const.isEmpty(filenameValue)) {
                outputFilename = filenameValue;
            } else {
                throw new KettleException("Filename cannot be empty.");
            }
        }

        //Create token resolver
        TokenResolver resolver = new TokenResolver();

        for (TokenReplacementField field : meta.getTokenReplacementFields()) {
            if (data.inputRowMeta.indexOfValue(field.getName()) >= 0) {
                String fieldValue = environmentSubstitute(data.inputRowMeta.getString(r, field.getName(), null));
                if (fieldValue == null && !BooleanUtils
                        .toBoolean(Const.getEnvironmentVariable("KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL", "N"))) {
                    fieldValue = Const.nullToEmpty(fieldValue);
                }
                resolver.addToken(field.getTokenName(), fieldValue);
            } else {
                throw new KettleValueException("Field " + field.getName() + " not found on input stream.");
            }
        }

        Reader reader;
        String inputFilename = "";

        if (meta.getInputType().equalsIgnoreCase("text")) {
            reader = new TokenReplacingReader(resolver, new StringReader(meta.getInputText()),
                    environmentSubstitute(meta.getTokenStartString()),
                    environmentSubstitute(meta.getTokenEndString()));

        } else if (meta.getInputType().equalsIgnoreCase("field")) {
            if (data.inputRowMeta.indexOfValue(meta.getInputFieldName()) >= 0) {
                String inputString = data.inputRowMeta.getString(r, meta.getInputFieldName(), "");
                reader = new TokenReplacingReader(resolver, new StringReader(inputString),
                        environmentSubstitute(meta.getTokenStartString()),
                        environmentSubstitute(meta.getTokenEndString()));

            } else {
                throw new KettleValueException(
                        "Input field " + meta.getInputFieldName() + " not found on input stream.");
            }
        } else if (meta.getInputType().equalsIgnoreCase("file")) {
            if (meta.isInputFileNameInField()) {
                if (data.inputRowMeta.indexOfValue(environmentSubstitute(meta.getInputFileNameField())) >= 0) {
                    inputFilename = data.inputRowMeta.getString(r,
                            environmentSubstitute(meta.getInputFileNameField()), "");
                } else {
                    throw new KettleValueException("Input filename field "
                            + environmentSubstitute(meta.getInputFileNameField()) + " not found on input stream.");
                }
            } else {
                inputFilename = environmentSubstitute(meta.getInputFileName());
            }

            if (Const.isEmpty(inputFilename)) {
                throw new KettleValueException("Input filename cannot be empty");
            }

            FileObject file = KettleVFS.getFileObject(inputFilename, getTransMeta());
            reader = new TokenReplacingReader(resolver,
                    new InputStreamReader(KettleVFS.getInputStream(inputFilename, getTransMeta())),
                    environmentSubstitute(meta.getTokenStartString()),
                    environmentSubstitute(meta.getTokenEndString()));

            if (meta.isAddInputFileNameToResult()) {
                ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL,
                        KettleVFS.getFileObject(inputFilename, getTransMeta()), getTransMeta().getName(),
                        getStepname());
                resultFile.setComment(BaseMessages.getString(PKG, "TokenReplacement.AddInputResultFile"));
                addResultFile(resultFile);
            }
        } else {
            throw new KettleException("Unsupported input type " + meta.getInputType());
        }

        Writer stringWriter = null;
        OutputStream bufferedWriter = null;

        if (meta.getOutputType().equalsIgnoreCase("field")) {
            stringWriter = new StringBufferWriter(new StringBuffer(5000));
        } else if (meta.getOutputType().equalsIgnoreCase("file")) {

            if (inputFilename.equals(outputFilename)) {
                throw new KettleException("Input and output filenames must not be the same " + inputFilename);
            }

            int fileIndex = data.openFiles.indexOf(outputFilename);
            if (fileIndex < 0) {
                openNewOutputFile(outputFilename);
                fileIndex = data.openFiles.indexOf(outputFilename);
            }

            bufferedWriter = data.openBufferedWriters.get(fileIndex);

        } else {
            throw new KettleException("Unsupported output type " + meta.getOutputType());
        }

        String output = "";

        try {
            char[] cbuf = new char[5000];
            StringBuffer sb = new StringBuffer();
            int length = 0;
            while ((length = reader.read(cbuf)) > 0) {
                if (meta.getOutputType().equalsIgnoreCase("field")) {
                    stringWriter.write(cbuf, 0, length);
                } else if (meta.getOutputType().equalsIgnoreCase("file")) {
                    CharBuffer cBuffer = CharBuffer.wrap(cbuf, 0, length);
                    ByteBuffer bBuffer = Charset.forName(meta.getOutputFileEncoding()).encode(cBuffer);
                    byte[] bytes = new byte[bBuffer.limit()];
                    bBuffer.get(bytes);
                    bufferedWriter.write(bytes);

                } //No else.  Anything else will be thrown to a Kettle exception prior to getting here.
                cbuf = new char[5000];
            }

            if (meta.getOutputType().equalsIgnoreCase("field")) {
                output += stringWriter.toString();
            } else if (meta.getOutputType().equalsIgnoreCase("file")) {
                bufferedWriter.write(meta.getOutputFileFormatString().getBytes());
            }
        } catch (IOException ex) {
            throw new KettleException(ex.getMessage(), ex);
        } finally {
            try {
                reader.close();
                if (stringWriter != null) {
                    stringWriter.close();
                }

                reader = null;
                stringWriter = null;

            } catch (IOException ex) {
                throw new KettleException(ex.getMessage(), ex);
            }

        }

        if (meta.getOutputType().equalsIgnoreCase("field")) {
            r = RowDataUtil.addValueData(r, data.outputRowMeta.size() - 1, output);
        } else if (meta.getOutputType().equalsIgnoreCase("file")) {
            incrementLinesWritten();
        }

        putRow(data.outputRowMeta, r); // in case we want it to go further...
        data.rowNumber++;
        if (checkFeedback(getLinesOutput())) {
            logBasic("linenr " + getLinesOutput());
        }

        return result;
    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (TokenReplacementMeta) smi;
        data = (TokenReplacementData) sdi;

        if (super.init(smi, sdi)) {
            return true;
        }

        return false;
    }

    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (TokenReplacementMeta) smi;
        data = (TokenReplacementData) sdi;

        try {
            closeAllOutputFiles();
        } catch (KettleException ex) {
        }

        super.dispose(smi, sdi);
    }

    public void openNewOutputFile(String filename) throws KettleException {
        if (data.openFiles.contains(filename)) {
            logDetailed("File " + filename + " is already open.");
            return;
        }
        if (meta.isCreateParentFolder()) {
            try {
                createParentFolder(filename);
            } catch (Exception ex) {
                throw new KettleException(ex.getMessage(), ex);
            }
        }

        boolean fileExists = KettleVFS.fileExists(filename);

        OutputStream writer = KettleVFS.getOutputStream(filename, meta.isAppendOutputFileName());
        OutputStream bufferedWriter = new BufferedOutputStream(writer, 5000);

        try {
            if (fileExists && meta.isAppendOutputFileName()) {
                bufferedWriter.write(meta.getOutputFileFormatString().getBytes());
            }
        } catch (IOException ex) {
            throw new KettleException(ex.getMessage(), ex);
        }

        data.openFiles.add(filename);
        data.openWriters.add(writer);
        data.openBufferedWriters.add(bufferedWriter);

    }

    public void closeAllOutputFiles() throws KettleException {
        try {
            Iterator<OutputStream> itWriter = data.openWriters.iterator();
            Iterator<OutputStream> itBufferedWriter = data.openBufferedWriters.iterator();
            Iterator<String> itFilename = data.openFiles.iterator();
            if (meta.isAddOutputFileNameToResult()) {
                while (itFilename.hasNext()) {
                    ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL,
                            KettleVFS.getFileObject(itFilename.next(), getTransMeta()), getTransMeta().getName(),
                            getStepname());
                    resultFile.setComment(BaseMessages.getString(PKG, "TokenReplacement.AddOutputResultFile"));
                    addResultFile(resultFile);
                }
            }

            while (itBufferedWriter.hasNext()) {
                OutputStream bufferedWriter = itBufferedWriter.next();
                if (bufferedWriter != null) {
                    bufferedWriter.flush();
                    bufferedWriter.close();
                }
            }

            while (itWriter.hasNext()) {
                OutputStream writer = itWriter.next();
                if (writer != null) {
                    writer.close();
                }
            }

            data.openBufferedWriters.clear();
            data.openWriters.clear();
            data.openFiles.clear();
        } catch (IOException ex) {
            throw new KettleException(ex.getMessage(), ex);
        }
    }

    private void createParentFolder(String filename) throws Exception {
        // Check for parent folder
        FileObject parentfolder = null;
        try {
            // Get parent folder
            parentfolder = KettleVFS.getFileObject(filename).getParent();
            if (parentfolder.exists()) {
                if (isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "TokenReplacement.Log.ParentFolderExist",
                            parentfolder.getName()));
                }
            } else {
                if (isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "TokenReplacement.Log.ParentFolderNotExist",
                            parentfolder.getName()));
                }
                if (meta.isCreateParentFolder()) {
                    parentfolder.createFolder();
                    if (isDetailed()) {
                        logDetailed(BaseMessages.getString(PKG, "TokenReplacement.Log.ParentFolderCreated",
                                parentfolder.getName()));
                    }
                } else {
                    throw new KettleException(BaseMessages.getString(PKG,
                            "TokenReplacement.Log.ParentFolderNotExistCreateIt", parentfolder.getName(), filename));
                }
            }

        } finally {
            if (parentfolder != null) {
                try {
                    parentfolder.close();
                } catch (Exception ex) {
                    // Ignore
                }
            }
        }
    }

}