be.ibridge.kettle.trans.step.excelinput.ExcelInput.java Source code

Java tutorial

Introduction

Here is the source code for be.ibridge.kettle.trans.step.excelinput.ExcelInput.java

Source

/**********************************************************************
 **                                                                   **
 **               This code belongs to the KETTLE project.            **
 **                                                                   **
 ** Kettle, from version 2.2 on, is released into the public domain   **
 ** under the Lesser GNU Public License (LGPL).                       **
 **                                                                   **
 ** For more details, please read the document LICENSE.txt, included  **
 ** in this project                                                   **
 **                                                                   **
 ** http://www.kettle.be                                              **
 ** info@kettle.be                                                    **
 **                                                                   **
 **********************************************************************/

package be.ibridge.kettle.trans.step.excelinput;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.TimeZone;

import jxl.BooleanCell;
import jxl.Cell;
import jxl.CellType;
import jxl.DateCell;
import jxl.LabelCell;
import jxl.NumberCell;
import jxl.Sheet;
import jxl.Workbook;

import org.apache.commons.vfs.FileObject;

import be.ibridge.kettle.core.Const;
import be.ibridge.kettle.core.ResultFile;
import be.ibridge.kettle.core.Row;
import be.ibridge.kettle.core.exception.KettleException;
import be.ibridge.kettle.core.value.Value;
import be.ibridge.kettle.core.vfs.KettleVFS;
import be.ibridge.kettle.trans.Trans;
import be.ibridge.kettle.trans.TransMeta;
import be.ibridge.kettle.trans.step.BaseStep;
import be.ibridge.kettle.trans.step.StepDataInterface;
import be.ibridge.kettle.trans.step.StepInterface;
import be.ibridge.kettle.trans.step.StepMeta;
import be.ibridge.kettle.trans.step.StepMetaInterface;
import be.ibridge.kettle.trans.step.errorhandling.CompositeFileErrorHandler;
import be.ibridge.kettle.trans.step.errorhandling.FileErrorHandlerContentLineNumber;
import be.ibridge.kettle.trans.step.errorhandling.FileErrorHandlerMissingFiles;
import be.ibridge.kettle.trans.step.fileinput.FileInputList;
import be.ibridge.kettle.trans.step.playlist.FilePlayListAll;
import be.ibridge.kettle.trans.step.playlist.FilePlayListReplay;

/**
 * This class reads data from one or more Microsoft Excel files.
 * 
 * @author Matt
 * @since 19-NOV-2003
 * 
 */
public class ExcelInput extends BaseStep implements StepInterface {
    private ExcelInputMeta meta;

    private ExcelInputData data;

    public ExcelInput(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
            Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    private Row fillRow(Row baserow, int startcolumn, ExcelInputRow excelInputRow) throws KettleException {
        Row r = new Row(baserow);

        // Keep track whether or not we handled an error for this line yet.
        boolean errorHandled = false;

        // Set values in the row...
        for (int i = startcolumn; i < excelInputRow.cells.length && i - startcolumn < r.size(); i++) {
            Cell cell = excelInputRow.cells[i];

            int rowcolumn = i - startcolumn;

            Value v = r.getValue(rowcolumn);

            try {
                checkType(cell, v);
            } catch (KettleException ex) {
                if (!meta.isErrorIgnored())
                    throw ex;
                logBasic("Warning processing [" + v + "] from Excel file [" + data.filename + "] : "
                        + ex.getMessage());
                if (!errorHandled) {
                    data.errorHandler.handleLineError(excelInputRow.rownr, excelInputRow.sheetName);
                    errorHandled = true;
                }

                if (meta.isErrorLineSkipped()) {
                    r.setIgnore();
                    return r;
                }
            }

            CellType cellType = cell.getType();
            if (CellType.BOOLEAN.equals(cellType) || CellType.BOOLEAN_FORMULA.equals(cellType)) {
                v.setValue(((BooleanCell) cell).getValue());
            } else {
                if (CellType.DATE.equals(cellType) || CellType.DATE_FORMULA.equals(cellType)) {
                    Date date = ((DateCell) cell).getDate();
                    long time = date.getTime();
                    int offset = TimeZone.getDefault().getOffset(time);
                    v.setValue(new Date(time - offset));
                } else {
                    if (CellType.LABEL.equals(cellType) || CellType.STRING_FORMULA.equals(cellType)) {
                        v.setValue(((LabelCell) cell).getString());
                        switch (meta.getField()[rowcolumn].getTrimType()) {
                        case ExcelInputMeta.TYPE_TRIM_LEFT:
                            v.ltrim();
                            break;
                        case ExcelInputMeta.TYPE_TRIM_RIGHT:
                            v.rtrim();
                            break;
                        case ExcelInputMeta.TYPE_TRIM_BOTH:
                            v.trim();
                            break;
                        default:
                            break;
                        }
                    } else {
                        if (CellType.NUMBER.equals(cellType) || CellType.NUMBER_FORMULA.equals(cellType)) {
                            v.setValue(((NumberCell) cell).getValue());
                        } else {
                            if (log.isDetailed())
                                logDetailed("Unknown type : " + cell.getType().toString() + " : ["
                                        + cell.getContents() + "]");
                            v.setNull();
                        }
                    }
                }
            }

            ExcelInputField field = meta.getField()[rowcolumn];

            // Change to the appropriate type if needed...
            //
            try {
                if (v.getType() != field.getType()) {
                    switch (v.getType()) {
                    // Use case: we find a String: convert it using the supplied format to the desired type...
                    //
                    case Value.VALUE_TYPE_STRING:
                        switch (field.getType()) {
                        case Value.VALUE_TYPE_DATE:
                            v.str2dat(field.getFormat());
                            break;
                        case Value.VALUE_TYPE_NUMBER:
                            v.str2num(field.getFormat(), field.getDecimalSymbol(), field.getGroupSymbol(),
                                    field.getCurrencySymbol());
                            break;
                        default:
                            v.setType(field.getType());
                            break;
                        }
                        break;

                    // Use case: we find a numeric value: convert it using the supplied format to the desired data type...
                    //
                    case Value.VALUE_TYPE_NUMBER:
                    case Value.VALUE_TYPE_INTEGER:
                        switch (field.getType()) {
                        case Value.VALUE_TYPE_STRING:
                            v.num2str(field.getFormat(), field.getDecimalSymbol(), field.getGroupSymbol(),
                                    field.getCurrencySymbol());
                            break;
                        case Value.VALUE_TYPE_DATE:
                            v.num2str("#").str2dat(field.getFormat());
                            break;
                        default:
                            v.setType(field.getType());
                            break;
                        }
                        break;
                    // Use case: we find a date: convert it using the supplied format to String...
                    //
                    case Value.VALUE_TYPE_DATE:
                        switch (field.getType()) {
                        case Value.VALUE_TYPE_STRING:
                            v.dat2str(field.getFormat());
                            break;
                        default:
                            v.setType(field.getType());
                            break;
                        }
                        break;
                    default:
                        v.setType(field.getType());
                    }
                }
            } catch (KettleException ex) {
                if (!meta.isErrorIgnored())
                    throw ex;
                logBasic("Warning processing [" + v + "] from Excel file [" + data.filename + "] : "
                        + ex.toString());
                if (!errorHandled) // check if we didn't log an error already for this one.
                {
                    data.errorHandler.handleLineError(excelInputRow.rownr, excelInputRow.sheetName);
                    errorHandled = true;
                }

                if (meta.isErrorLineSkipped()) {
                    r.setIgnore();
                    return r;
                } else {
                    v.setNull();
                }
            }

            // Set the meta-data of the field: length and precision
            //
            v.setLength(meta.getField()[rowcolumn].getLength(), meta.getField()[rowcolumn].getPrecision());
        }

        // Do we need to include the filename?
        if (meta.getFileField() != null && meta.getFileField().length() > 0) {
            Value value = new Value(meta.getFileField(), data.filename);
            value.setLength(data.maxfilelength);
            r.addValue(value);
        }

        // Do we need to include the sheetname?
        if (meta.getSheetField() != null && meta.getSheetField().length() > 0) {
            Value value = new Value(meta.getSheetField(), excelInputRow.sheetName);
            value.setLength(data.maxsheetlength);
            r.addValue(value);
        }

        // Do we need to include the sheet rownumber?
        if (meta.getSheetRowNumberField() != null && meta.getSheetRowNumberField().length() > 0) {
            Value value = new Value(meta.getSheetRowNumberField(), Value.VALUE_TYPE_INTEGER);
            value.setValue(data.rownr);
            r.addValue(value);
        }

        // Do we need to include the rownumber?
        if (meta.getRowNumberField() != null && meta.getRowNumberField().length() > 0) {
            Value value = new Value(meta.getRowNumberField(), Value.VALUE_TYPE_INTEGER);
            value.setValue(linesWritten + 1);
            r.addValue(value);
        }

        return r;
    }

    private void checkType(Cell cell, Value v) throws KettleException {
        if (!meta.isStrictTypes())
            return;
        CellType cellType = cell.getType();
        if (cellType.equals(CellType.BOOLEAN)) {
            if (!(v.getType() == Value.VALUE_TYPE_STRING || v.getType() == Value.VALUE_TYPE_NONE
                    || v.getType() == Value.VALUE_TYPE_BOOLEAN))
                throw new KettleException("Invalid type Boolean, expected " + v.getTypeDesc());
        } else if (cellType.equals(CellType.DATE)) {
            if (!(v.getType() == Value.VALUE_TYPE_STRING || v.getType() == Value.VALUE_TYPE_NONE
                    || v.getType() == Value.VALUE_TYPE_DATE))
                throw new KettleException(
                        "Invalid type Date: " + cell.getContents() + ", expected " + v.getTypeDesc());
        } else if (cellType.equals(CellType.LABEL)) {
            if (v.getType() == Value.VALUE_TYPE_BOOLEAN || v.getType() == Value.VALUE_TYPE_DATE
                    || v.getType() == Value.VALUE_TYPE_INTEGER || v.getType() == Value.VALUE_TYPE_NUMBER)
                throw new KettleException(
                        "Invalid type Label: " + cell.getContents() + ", expected " + v.getTypeDesc());
        } else if (cellType.equals(CellType.EMPTY)) {
            // ok
        } else if (cellType.equals(CellType.NUMBER)) {
            if (!(v.getType() == Value.VALUE_TYPE_STRING || v.getType() == Value.VALUE_TYPE_NONE
                    || v.getType() == Value.VALUE_TYPE_INTEGER || v.getType() == Value.VALUE_TYPE_BIGNUMBER
                    || v.getType() == Value.VALUE_TYPE_NUMBER))
                throw new KettleException(
                        "Invalid type Number: " + cell.getContents() + ", expected " + v.getTypeDesc());
        } else {
            throw new KettleException("Unsupported type " + cellType + " with value: " + cell.getContents());
        }
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        meta = (ExcelInputMeta) smi;
        data = (ExcelInputData) sdi;

        if (first) {
            first = false;

            if (meta.isAcceptingFilenames()) {
                // Read the files from the specified input stream...
                data.files.getFiles().clear();

                int idx = -1;
                Row fileRow = getRowFrom(meta.getAcceptingStepName());
                while (fileRow != null) {
                    if (idx < 0) {
                        idx = fileRow.searchValueIndex(meta.getAcceptingField());
                        if (idx < 0) {
                            logError("The filename field [" + meta.getAcceptingField()
                                    + "] could not be found in the input rows.");
                            setErrors(1);
                            stopAll();
                            return false;
                        }
                    }
                    Value fileValue = fileRow.getValue(idx);
                    try {
                        data.files.addFile(KettleVFS.getFileObject(fileValue.getString()));
                    } catch (IOException e) {
                        throw new KettleException(
                                "Unexpected error creating file object for " + fileValue.getString(), e);
                    }

                    // Grab another row
                    fileRow = getRowFrom(meta.getAcceptingStepName());
                }

            }

            handleMissingFiles();
        }

        // See if we're not done processing...
        // We are done processing if the filenr >= number of files.
        if (data.filenr >= data.files.nrOfFiles()) {
            if (log.isDetailed())
                logDetailed("No more files to be processes! (" + data.filenr + " files done)");
            setOutputDone(); // signal end to receiver(s)
            return false; // end of data or error.
        }

        if (meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit()) {
            // The close of the workbook is in dispose()
            if (log.isDetailed())
                logDetailed("Row limit of [" + meta.getRowLimit() + "] reached: stop processing.");
            setOutputDone(); // signal end to receiver(s)
            return false; // end of data or error.
        }

        Row r = getRowFromWorkbooks();
        if (r != null) {
            if (!r.isIgnored()) {
                // OK, see if we need to repeat values.
                if (data.previousRow != null) {
                    for (int i = 0; i < meta.getField().length; i++) {
                        Value field = r.getValue(i);
                        if (field.isNull() && meta.getField()[i].isRepeated()) {
                            // Take the value from the previous row.
                            Value repeat = data.previousRow.getValue(i);
                            field.setValue(repeat);
                        }
                    }
                }

                // Remember this row for the next time around!
                data.previousRow = r;

                // Send out the good news: we found a row of data!
                putRow(r);
            }
            return true;
        } else {
            return false;
        }

    }

    private void handleMissingFiles() throws KettleException {
        List nonExistantFiles = data.files.getNonExistantFiles();

        if (nonExistantFiles.size() != 0) {
            String message = FileInputList.getRequiredFilesDescription(nonExistantFiles);
            log.logBasic("Required files", "WARNING: Missing " + message);
            if (meta.isErrorIgnored())
                for (Iterator iter = nonExistantFiles.iterator(); iter.hasNext();) {
                    data.errorHandler.handleNonExistantFile((FileObject) iter.next());
                }
            else
                throw new KettleException("Following required files are missing: " + message);
        }

        List nonAccessibleFiles = data.files.getNonAccessibleFiles();
        if (nonAccessibleFiles.size() != 0) {
            String message = FileInputList.getRequiredFilesDescription(nonAccessibleFiles);
            log.logBasic("Required files", "WARNING: Not accessible " + message);
            if (meta.isErrorIgnored())
                for (Iterator iter = nonAccessibleFiles.iterator(); iter.hasNext();) {
                    data.errorHandler.handleNonAccessibleFile((FileObject) iter.next());
                }
            else
                throw new KettleException("Following required files are not accessible: " + message);
        }
    }

    public Row getRowFromWorkbooks() {
        // This procedure outputs a single Excel data row on the destination
        // rowsets...

        Row retval = new Row();
        retval.setIgnore();

        try {
            // First, see if a file has been opened?
            if (data.workbook == null) {
                // Open a new workbook..
                data.file = data.files.getFile(data.filenr);
                data.filename = KettleVFS.getFilename(data.file);

                ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, data.file,
                        getTransMeta().getName(), toString());
                resultFile.setComment("File was read by an Excel input step");
                addResultFile(resultFile);

                if (log.isDetailed())
                    logDetailed("Opening workbook #" + data.filenr + " : " + data.filename);
                data.workbook = Workbook.getWorkbook(data.file.getContent().getInputStream());
                data.errorHandler.handleFile(data.file);
                // Start at the first sheet again...
                data.sheetnr = 0;

            }

            boolean nextsheet = false;

            // What sheet were we handling?
            if (log.isDebug())
                logDetailed("Get sheet #" + data.filenr + "." + data.sheetnr);
            String sheetName = meta.getSheetName()[data.sheetnr];
            Sheet sheet = data.workbook.getSheet(sheetName);
            if (sheet != null) {
                // at what row do we continue reading?
                if (data.rownr < 0) {
                    data.rownr = meta.getStartRow()[data.sheetnr];

                    // Add an extra row if we have a header row to skip...
                    if (meta.startsWithHeader()) {
                        data.rownr++;
                    }
                }
                // Start at the specified column
                data.colnr = meta.getStartColumn()[data.sheetnr];

                // Build a new row and fill in the data from the sheet...
                try {
                    Cell line[] = sheet.getRow(data.rownr);
                    // Already increase cursor 1 row               
                    int lineNr = ++data.rownr;
                    // Excel starts counting at 0
                    if (!data.filePlayList.isProcessingNeeded(data.file, lineNr, sheetName)) {
                        retval.setIgnore();
                    } else {
                        if (log.isRowLevel())
                            logRowlevel("Get line #" + lineNr + " from sheet #" + data.filenr + "." + data.sheetnr);

                        if (log.isRowLevel())
                            logRowlevel("Read line with " + line.length + " cells");
                        ExcelInputRow excelInputRow = new ExcelInputRow(sheet.getName(), lineNr, line);
                        Row r = fillRow(data.row, data.colnr, excelInputRow);
                        if (log.isRowLevel())
                            logRowlevel("Converted line to row #" + lineNr + " : " + r);

                        boolean isEmpty = isLineEmpty(line);
                        if (!isEmpty || !meta.ignoreEmptyRows()) {
                            // Put the row
                            retval = r;
                        }

                        if (isEmpty && meta.stopOnEmpty()) {
                            nextsheet = true;
                        }
                    }
                } catch (ArrayIndexOutOfBoundsException e) {
                    if (log.isRowLevel())
                        logRowlevel("Out of index error: move to next sheet!");
                    // We tried to read below the last line in the sheet.
                    // Go to the next sheet...
                    nextsheet = true;
                }
            } else {
                nextsheet = true;
            }

            if (nextsheet) {
                // Go to the next sheet
                data.sheetnr++;

                // Reset the start-row:
                data.rownr = -1;

                // no previous row yet, don't take it from the previous sheet!
                // (that whould be plain wrong!)
                data.previousRow = null;

                // Perhaps it was the last sheet?
                if (data.sheetnr >= meta.getSheetName().length) {
                    jumpToNextFile();
                }
            }
        } catch (Exception e) {
            logError("Error processing row from Excel file [" + data.filename + "] : " + e.toString());
            setErrors(1);
            stopAll();
            return null;
        }

        return retval;
    }

    private boolean isLineEmpty(Cell[] line) {
        if (line.length == 0)
            return true;

        boolean isEmpty = true;
        for (int i = 0; i < line.length && isEmpty; i++) {
            if (!Const.isEmpty(line[i].getContents()))
                isEmpty = false;
        }
        return isEmpty;
    }

    private void jumpToNextFile() throws KettleException {
        data.sheetnr = 0;

        // Reset the start-row:
        data.rownr = -1;

        // no previous row yet, don't take it from the previous sheet! (that
        // whould be plain wrong!)
        data.previousRow = null;

        // Close the workbook!
        data.workbook.close();
        data.workbook = null; // marker to open again.
        data.errorHandler.close();

        // advance to the next file!
        data.filenr++;
    }

    private void initErrorHandling() {
        List errorHandlers = new ArrayList(2);
        if (meta.getLineNumberFilesDestinationDirectory() != null)
            errorHandlers.add(new FileErrorHandlerContentLineNumber(getTrans().getCurrentDate(),
                    meta.getLineNumberFilesDestinationDirectory(), meta.getLineNumberFilesExtension(), "Latin1",
                    this));
        if (meta.getErrorFilesDestinationDirectory() != null)
            errorHandlers.add(new FileErrorHandlerMissingFiles(getTrans().getCurrentDate(),
                    meta.getErrorFilesDestinationDirectory(), meta.getErrorFilesExtension(), "Latin1", this));
        data.errorHandler = new CompositeFileErrorHandler(errorHandlers);
    }

    private void initReplayFactory() {
        Date replayDate = getTrans().getReplayDate();
        if (replayDate == null)
            data.filePlayList = FilePlayListAll.INSTANCE;
        else
            data.filePlayList = new FilePlayListReplay(replayDate, meta.getLineNumberFilesDestinationDirectory(),
                    meta.getLineNumberFilesExtension(), meta.getErrorFilesDestinationDirectory(),
                    meta.getErrorFilesExtension(), "Latin1");
    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (ExcelInputMeta) smi;
        data = (ExcelInputData) sdi;

        if (super.init(smi, sdi)) {
            initErrorHandling();
            initReplayFactory();
            data.files = meta.getFileList();
            if (data.files.nrOfFiles() == 0 && data.files.nrOfMissingFiles() == 0 && !meta.isAcceptingFilenames()) {
                logError("No file(s) specified! Stop processing.");
                return false;
            }

            data.row = meta.getEmptyFields();
            if (data.row.size() > 0) {
                // Determine the maximum filename length...
                data.maxfilelength = -1;

                for (Iterator iter = data.files.getFiles().iterator(); iter.hasNext();) {
                    FileObject file = (FileObject) iter.next();
                    String name = KettleVFS.getFilename(file);
                    if (name.length() > data.maxfilelength)
                        data.maxfilelength = name.length();
                }

                // Determine the maximum sheetname length...
                data.maxsheetlength = -1;
                for (int i = 0; i < meta.getSheetName().length; i++)
                    if (meta.getSheetName()[i].length() > data.maxsheetlength)
                        data.maxsheetlength = meta.getSheetName()[i].length();

                return true;
            } else {
                logError("No input fields defined!");
            }

        }
        return false;
    }

    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (ExcelInputMeta) smi;
        data = (ExcelInputData) sdi;

        if (data.workbook != null)
            data.workbook.close();
        try {
            data.errorHandler.close();
        } catch (KettleException e) {
            if (log.isDebug()) {
                logDebug("Could not close errorHandler: " + e.toString());
                logDebug(Const.getStackTracker(e));
            }
        }

        super.dispose(smi, sdi);
    }

    public void run() {
        try {
            logBasic("Starting to run...");
            while (processRow(meta, data) && !isStopped())
                ;
        } catch (Exception e) {
            logError("Unexpected error : " + e.toString());
            logError(Const.getStackTracker(e));
            setErrors(1);
            stopAll();
        } finally {
            dispose(meta, data);
            logSummary();
            markStop();
        }
    }
}