org.pentaho.di.trans.steps.loadfileinput.LoadFileInput.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.trans.steps.loadfileinput.LoadFileInput.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.loadfileinput;

import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.List;

import org.apache.commons.vfs.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.fileinput.FileInputList;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

/**
 * Read files, parse them and convert them to rows and writes these to one or more output streams.
 *
 * @author Samatar
 * @since 20-06-2007
 */
public class LoadFileInput extends BaseStep implements StepInterface {
    private static Class<?> PKG = LoadFileInputMeta.class; // for i18n purposes, needed by Translator2!!

    private LoadFileInputMeta meta;
    private LoadFileInputData data;

    public LoadFileInput(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
            Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    private void addFileToResultFilesname(FileObject file) throws Exception {
        if (meta.addResultFile()) {
            // Add this to the result file names...
            ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, file, getTransMeta().getName(),
                    getStepname());
            resultFile.setComment("File was read by a LoadFileInput step");
            addResultFile(resultFile);
        }
    }

    private boolean openNextFile() {
        try {
            if (meta.getIsInFields()) {
                data.readrow = getRow(); // Grab another row ...

                if (data.readrow == null) { // finished processing!

                    if (isDetailed()) {
                        logDetailed(BaseMessages.getString(PKG, "LoadFileInput.Log.FinishedProcessing"));
                    }
                    return false;
                }

                if (first) {
                    first = false;

                    data.inputRowMeta = getInputRowMeta();
                    data.outputRowMeta = data.inputRowMeta.clone();
                    meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);

                    // Create convert meta-data objects that will contain Date & Number formatters
                    //
                    data.convertRowMeta = data.outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING);

                    if (meta.getIsInFields()) {
                        // Check is filename field is provided
                        if (Const.isEmpty(meta.getDynamicFilenameField())) {
                            logError(BaseMessages.getString(PKG, "LoadFileInput.Log.NoField"));
                            throw new KettleException(BaseMessages.getString(PKG, "LoadFileInput.Log.NoField"));
                        }

                        // cache the position of the field
                        if (data.indexOfFilenameField < 0) {
                            data.indexOfFilenameField = data.inputRowMeta
                                    .indexOfValue(meta.getDynamicFilenameField());
                            if (data.indexOfFilenameField < 0) {
                                // The field is unreachable !
                                logError(BaseMessages.getString(PKG, "LoadFileInput.Log.ErrorFindingField") + "["
                                        + meta.getDynamicFilenameField() + "]");
                                throw new KettleException(
                                        BaseMessages.getString(PKG, "LoadFileInput.Exception.CouldnotFindField",
                                                meta.getDynamicFilenameField()));
                            }
                        }
                        // Get the number of previous fields
                        data.totalpreviousfields = data.inputRowMeta.size();

                    }
                } // end if first

                // get field value
                String Fieldvalue = data.inputRowMeta.getString(data.readrow, data.indexOfFilenameField);

                if (isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "LoadFileInput.Log.Stream",
                            meta.getDynamicFilenameField(), Fieldvalue));
                }

                FileObject file = null;
                try {
                    // Source is a file.
                    data.file = KettleVFS.getFileObject(Fieldvalue);
                } catch (Exception e) {
                    throw new KettleException(e);
                } finally {
                    try {
                        if (file != null) {
                            file.close();
                        }
                    } catch (Exception e) {
                        // Ignore errors
                    }
                }
            } else {
                if (data.filenr >= data.files.nrOfFiles()) {
                    // finished processing!

                    if (isDetailed()) {
                        logDetailed(BaseMessages.getString(PKG, "LoadFileInput.Log.FinishedProcessing"));
                    }
                    return false;
                }

                // Is this the last file?
                data.last_file = (data.filenr == data.files.nrOfFiles() - 1);
                data.file = data.files.getFile(data.filenr);
            }

            // Check if file is empty
            data.fileSize = data.file.getContent().getSize();
            // Move file pointer ahead!
            data.filenr++;

            if (meta.isIgnoreEmptyFile() && data.fileSize == 0) {
                logError(BaseMessages.getString(PKG, "LoadFileInput.Error.FileSizeZero", "" + data.file.getName()));
                openNextFile();

            } else {
                if (isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "LoadFileInput.Log.OpeningFile", data.file.toString()));
                }
                data.filename = KettleVFS.getFilename(data.file);
                // Add additional fields?
                if (meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0) {
                    data.shortFilename = data.file.getName().getBaseName();
                }
                if (meta.getPathField() != null && meta.getPathField().length() > 0) {
                    data.path = KettleVFS.getFilename(data.file.getParent());
                }
                if (meta.isHiddenField() != null && meta.isHiddenField().length() > 0) {
                    data.hidden = data.file.isHidden();
                }
                if (meta.getExtensionField() != null && meta.getExtensionField().length() > 0) {
                    data.extension = data.file.getName().getExtension();
                }
                if (meta.getLastModificationDateField() != null
                        && meta.getLastModificationDateField().length() > 0) {
                    data.lastModificationDateTime = new Date(data.file.getContent().getLastModifiedTime());
                }
                if (meta.getUriField() != null && meta.getUriField().length() > 0) {
                    data.uriName = data.file.getName().getURI();
                }
                if (meta.getRootUriField() != null && meta.getRootUriField().length() > 0) {
                    data.rootUriName = data.file.getName().getRootURI();
                }
                // get File content
                getFileContent();

                addFileToResultFilesname(data.file);

                if (isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "LoadFileInput.Log.FileOpened", data.file.toString()));
                }
            }

        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "LoadFileInput.Log.UnableToOpenFile", "" + data.filenr,
                    data.file.toString(), e.toString()));
            stopAll();
            setErrors(1);
            return false;
        }
        return true;
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        try {
            // Grab a row
            Object[] outputRowData = getOneRow();
            if (outputRowData == null) {
                setOutputDone(); // signal end to receiver(s)
                return false; // end of data or error.
            }

            if (isRowLevel()) {
                logRowlevel(BaseMessages.getString(PKG, "LoadFileInput.Log.ReadRow",
                        data.outputRowMeta.getString(outputRowData)));
            }

            putRow(data.outputRowMeta, outputRowData);

            if (meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit()) // limit has been reached: stop now.
            {
                setOutputDone();
                return false;
            }
        } catch (KettleException e) {
            logError(BaseMessages.getString(PKG, "LoadFileInput.ErrorInStepRunning", e.getMessage()));
            logError(Const.getStackTracker(e));
            setErrors(1);
            stopAll();
            setOutputDone(); // signal end to receiver(s)
            return false;
        }
        return true;

    }

    private void getFileContent() throws KettleException {
        try {
            data.filecontent = getTextFileContent(data.file.toString(), meta.getEncoding());
        } catch (java.lang.OutOfMemoryError o) {
            logError("There is no enaugh memory to load the content of the file [" + data.file.getName() + "]");
            throw new KettleException(o);
        } catch (Exception e) {
            throw new KettleException(e);
        }
    }

    /**
     * Read a text file.
     *
     * @param vfsFilename
     *          the filename or URL to read from
     * @param charSetName
     *          the character set of the string (UTF-8, ISO8859-1, etc)
     * @return The content of the file as a String
     * @throws KettleException
     */
    public static String getTextFileContent(String vfsFilename, String encoding) throws KettleException {
        InputStream inputStream = null;
        InputStreamReader reader = null;

        String retval = null;
        try {
            inputStream = KettleVFS.getInputStream(vfsFilename);

            if (!Const.isEmpty(encoding)) {
                reader = new InputStreamReader(new BufferedInputStream(inputStream), encoding);
            } else {
                reader = new InputStreamReader(new BufferedInputStream(inputStream));
            }

            int c;
            StringBuffer stringBuffer = new StringBuffer();
            while ((c = reader.read()) != -1) {
                stringBuffer.append((char) c);
            }

            retval = stringBuffer.toString();
        } catch (Exception e) {
            throw new KettleException(BaseMessages.getString(PKG, "LoadFileInput.Error.GettingFileContent",
                    vfsFilename, e.toString()));
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (Exception e) { /* Ignore */
                }
            }
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (Exception e) { /* Ignore */
                }
            }
        }

        return retval;
    }

    private void handleMissingFiles() throws KettleException {
        List<FileObject> nonExistantFiles = data.files.getNonExistantFiles();

        if (nonExistantFiles.size() != 0) {
            String message = FileInputList.getRequiredFilesDescription(nonExistantFiles);
            logError(BaseMessages.getString(PKG, "LoadFileInput.Log.RequiredFilesTitle"),
                    BaseMessages.getString(PKG, "LoadFileInput.Log.RequiredFiles", message));

            throw new KettleException(
                    BaseMessages.getString(PKG, "LoadFileInput.Log.RequiredFilesMissing", message));
        }

        List<FileObject> nonAccessibleFiles = data.files.getNonAccessibleFiles();
        if (nonAccessibleFiles.size() != 0) {
            String message = FileInputList.getRequiredFilesDescription(nonAccessibleFiles);
            logError(BaseMessages.getString(PKG, "LoadFileInput.Log.RequiredFilesTitle"),
                    BaseMessages.getString(PKG, "LoadFileInput.Log.RequiredNotAccessibleFiles", message));
            throw new KettleException(
                    BaseMessages.getString(PKG, "LoadFileInput.Log.RequiredNotAccessibleFilesMissing", message));
        }
    }

    /**
     * Build an empty row based on the meta-data...
     *
     * @return
     */

    private Object[] buildEmptyRow() {
        Object[] rowData = RowDataUtil.allocateRowData(data.outputRowMeta.size());

        return rowData;
    }

    private Object[] getOneRow() throws KettleException {
        if (!openNextFile()) {
            return null;
        }

        // Build an empty row based on the meta-data
        Object[] outputRowData = buildEmptyRow();

        try {
            // Create new row or clone
            if (meta.getIsInFields()) {
                System.arraycopy(data.readrow, 0, outputRowData, 0, data.readrow.length);
            }

            // Read fields...
            for (int i = 0; i < data.nrInputFields; i++) {
                // Get field
                LoadFileInputField loadFileInputField = meta.getInputFields()[i];

                String o = null;
                switch (loadFileInputField.getElementType()) {
                case LoadFileInputField.ELEMENT_TYPE_FILECONTENT:

                    // DO Trimming!
                    switch (loadFileInputField.getTrimType()) {
                    case LoadFileInputField.TYPE_TRIM_LEFT:
                        data.filecontent = Const.ltrim(data.filecontent);
                        break;
                    case LoadFileInputField.TYPE_TRIM_RIGHT:
                        data.filecontent = Const.rtrim(data.filecontent);
                        break;
                    case LoadFileInputField.TYPE_TRIM_BOTH:
                        data.filecontent = Const.trim(data.filecontent);
                        break;
                    default:
                        break;
                    }
                    o = data.filecontent;
                    break;
                case LoadFileInputField.ELEMENT_TYPE_FILESIZE:
                    o = String.valueOf(data.fileSize);
                    break;
                default:
                    break;
                }

                int indexField = data.totalpreviousfields + i;
                // Do conversions
                //
                ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta(indexField);
                ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(indexField);
                outputRowData[indexField] = targetValueMeta.convertData(sourceValueMeta, o);

                // Do we need to repeat this field if it is null?
                if (loadFileInputField.isRepeated()) {
                    if (data.previousRow != null && o == null) {
                        outputRowData[indexField] = data.previousRow[indexField];
                    }
                }
            } // End of loop over fields...
            int rowIndex = data.totalpreviousfields + data.nrInputFields;

            // See if we need to add the filename to the row...
            if (meta.includeFilename() && meta.getFilenameField() != null && meta.getFilenameField().length() > 0) {
                outputRowData[rowIndex++] = data.filename;
            }

            // See if we need to add the row number to the row...
            if (meta.includeRowNumber() && meta.getRowNumberField() != null
                    && meta.getRowNumberField().length() > 0) {
                outputRowData[rowIndex++] = new Long(data.rownr);
            }
            // Possibly add short filename...
            if (meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0) {
                outputRowData[rowIndex++] = data.shortFilename;
            }
            // Add Extension
            if (meta.getExtensionField() != null && meta.getExtensionField().length() > 0) {
                outputRowData[rowIndex++] = data.extension;
            }
            // add path
            if (meta.getPathField() != null && meta.getPathField().length() > 0) {
                outputRowData[rowIndex++] = data.path;
            }

            // add Hidden
            if (meta.isHiddenField() != null && meta.isHiddenField().length() > 0) {
                outputRowData[rowIndex++] = new Boolean(data.hidden);
            }
            // Add modification date
            if (meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0) {
                outputRowData[rowIndex++] = data.lastModificationDateTime;
            }
            // Add Uri
            if (meta.getUriField() != null && meta.getUriField().length() > 0) {
                outputRowData[rowIndex++] = data.uriName;
            }
            // Add RootUri
            if (meta.getRootUriField() != null && meta.getRootUriField().length() > 0) {
                outputRowData[rowIndex++] = data.rootUriName;
            }
            RowMetaInterface irow = getInputRowMeta();

            data.previousRow = irow == null ? outputRowData : irow.cloneRow(outputRowData); // copy it to make
            // surely the next step doesn't change it in between...

            incrementLinesInput();
            data.rownr++;

        } catch (Exception e) {
            throw new KettleException("Impossible de charger le fichier", e);
        }

        return outputRowData;
    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (LoadFileInputMeta) smi;
        data = (LoadFileInputData) sdi;

        if (super.init(smi, sdi)) {
            if (!meta.getIsInFields()) {
                try {
                    data.files = meta.getFiles(this);
                    handleMissingFiles();
                    // Create the output row meta-data
                    data.outputRowMeta = new RowMeta();
                    meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore); // get the
                                                                                                                // metadata
                                                                                                                // populated

                    // Create convert meta-data objects that will contain Date & Number formatters
                    //
                    data.convertRowMeta = data.outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING);
                } catch (Exception e) {
                    logError("Error at step initialization: " + e.toString());
                    logError(Const.getStackTracker(e));
                    return false;
                }
            }
            data.rownr = 1L;
            data.nrInputFields = meta.getInputFields().length;

            return true;
        }
        return false;
    }

    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (LoadFileInputMeta) smi;
        data = (LoadFileInputData) sdi;
        if (data.file != null) {
            try {
                data.file.close();
            } catch (Exception e) {
                // Ignore errors
            }
        }
        super.dispose(smi, sdi);
    }

}