org.pentaho.di.trans.steps.xmlinput.XMLInput.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.trans.steps.xmlinput.XMLInput.java

Source

/*******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2012 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.xmlinput;

import org.apache.commons.vfs.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * Read all sorts of text files, convert them to rows and writes these to one or
 * more output streams.
 * 
 * @author Matt
 * @since 4-apr-2003
 */
public class XMLInput extends BaseStep implements StepInterface {
    private static Class<?> PKG = XMLInputMeta.class; // for i18n purposes, needed by Translator2!!   $NON-NLS-1$

    private XMLInputMeta meta;

    private XMLInputData data;

    public XMLInput(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
            Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {

        if (first) // we just got started
        {
            first = false;
            data.outputRowMeta = new RowMeta();
            meta.getFields(data.outputRowMeta, getStepname(), null, null, this);

            // For String to <type> conversions, we allocate a conversion meta data row as well...
            //
            data.convertRowMeta = data.outputRowMeta.clone();
            for (int i = 0; i < data.convertRowMeta.size(); i++) {
                data.convertRowMeta.getValueMeta(i).setType(ValueMetaInterface.TYPE_STRING);
            }
        }

        Object[] outputRowData = getRowFromXML();
        if (outputRowData == null) {
            setOutputDone(); // signal end to receiver(s)
            return false; // This is the end of this step.
        }

        if (log.isRowLevel())
            logRowlevel(BaseMessages.getString(PKG, "XMLInput.Log.ReadRow", outputRowData.toString()));

        incrementLinesInput();

        putRow(data.outputRowMeta, outputRowData);

        // limit has been reached, stop now.
        if (meta.getRowLimit() > 0 && data.rownr >= meta.getRowLimit()) {
            setOutputDone();
            return false;
        }

        return true;
    }

    private Object[] getRowFromXML() throws KettleValueException {
        // finished reading the file, read the next file

        while (data.itemPosition >= data.itemCount || data.file == null) {
            data.file = null;
            if (!openNextFile()) {
                return null;
            }
        }

        Object[] outputRowData = buildEmptyRow();

        // Get the item in the XML file...

        // First get the appropriate node

        Node itemNode;
        if (meta.getInputPosition().length > 1) {
            itemNode = XMLHandler.getSubNodeByNr(data.section, data.itemElement, data.itemPosition);
        } else {
            itemNode = data.section; // Only the root node, 1 element to read
            // in the whole document.
        }
        data.itemPosition++;

        // Read from the Node...
        for (int i = 0; i < meta.getInputFields().length; i++) {
            Node node = itemNode;

            XMLInputField xmlInputField = meta.getInputFields()[i];

            // This value will contain the value we're looking for...
            //
            String value = null;

            for (int p = 0; (value == null) && node != null && p < xmlInputField.getFieldPosition().length; p++) {
                XMLInputFieldPosition pos = xmlInputField.getFieldPosition()[p];

                switch (pos.getType()) {
                case XMLInputFieldPosition.XML_ELEMENT: {
                    if (pos.getElementNr() <= 1) {
                        Node subNode = XMLHandler.getSubNode(node, pos.getName());
                        if (subNode != null) {
                            if (p == xmlInputField.getFieldPosition().length - 1) // last
                            // level
                            {
                                value = XMLHandler.getNodeValue(subNode);
                            }
                        } else {
                            if (log.isDebug())
                                logDebug(BaseMessages.getString(PKG, "XMLInput.Log.UnableToFindPosition",
                                        pos.toString(), node.toString()));
                        }
                        node = subNode;
                    } else
                    // Multiple possible values: get number
                    // pos.getElementNr()!
                    {
                        Node subNode = XMLHandler.getSubNodeByNr(node, pos.getName(), pos.getElementNr() - 1,
                                false);
                        if (subNode != null) {
                            if (p == xmlInputField.getFieldPosition().length - 1) // last
                            // level
                            {
                                value = XMLHandler.getNodeValue(subNode);
                            }
                        } else {
                            if (log.isDebug())
                                logDebug(BaseMessages.getString(PKG, "XMLInput.Log.UnableToFindPosition",
                                        pos.toString(), node.toString()));
                        }
                        node = subNode;
                    }
                }
                    break;

                case XMLInputFieldPosition.XML_ATTRIBUTE: {
                    value = XMLHandler.getTagAttribute(node, pos.getName());
                }
                    break;
                case XMLInputFieldPosition.XML_ROOT: {
                    value = XMLHandler.getNodeValue(node);
                }
                    break;
                default:
                    break;
                }

            }

            // OK, we have grabbed the string called value
            // Trim it, convert it, ...

            // DO Trimming!
            switch (xmlInputField.getTrimType()) {
            case XMLInputField.TYPE_TRIM_LEFT:
                value = Const.ltrim(value);
                break;
            case XMLInputField.TYPE_TRIM_RIGHT:
                value = Const.rtrim(value);
                break;
            case XMLInputField.TYPE_TRIM_BOTH:
                value = Const.trim(value);
                break;
            default:
                break;
            }

            // System.out.println("after trim, field #"+i+" : "+v);

            // DO CONVERSIONS...
            //
            ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta(i);
            ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(i);
            outputRowData[i] = targetValueMeta.convertData(sourceValueMeta, value);

            // Do we need to repeat this field if it is null?
            if (meta.getInputFields()[i].isRepeated()) {
                if (data.previousRow != null && Const.isEmpty(value)) {
                    outputRowData[i] = data.previousRow[i];
                }
            }
        } // End of loop over fields...

        int outputIndex = meta.getInputFields().length;

        // See if we need to add the filename to the row...
        if (meta.includeFilename() && !Const.isEmpty(meta.getFilenameField())) {
            outputRowData[outputIndex++] = KettleVFS.getFilename(data.file);
        }

        // See if we need to add the row number to the row...
        if (meta.includeRowNumber() && !Const.isEmpty(meta.getRowNumberField())) {
            outputRowData[outputIndex++] = new Long(data.rownr);
        }

        RowMetaInterface irow = getInputRowMeta();

        data.previousRow = irow == null ? outputRowData : (Object[]) irow.cloneRow(outputRowData); // copy it to make
        // surely the next step doesn't change it in between...
        data.rownr++;

        // Throw away the information in the item?
        NodeList nodeList = itemNode.getChildNodes();
        for (int i = 0; i < nodeList.getLength(); i++) {
            itemNode.removeChild(nodeList.item(i));
        }

        return outputRowData;
    }

    /**
     * Build an empty row based on the meta-data...
     * 
     * @return
     */
    private Object[] buildEmptyRow() {
        return RowDataUtil.allocateRowData(data.outputRowMeta.size());
    }

    private boolean openNextFile() {
        try {
            if (data.filenr >= data.files.size()) // finished processing!
            {
                if (log.isDetailed())
                    logDetailed(BaseMessages.getString(PKG, "XMLInput.Log.FinishedProcessing"));
                return false;
            }

            // Is this the last file?
            data.last_file = (data.filenr == data.files.size() - 1);
            data.file = (FileObject) data.files.get(data.filenr);

            logBasic(BaseMessages.getString(PKG, "XMLInput.Log.OpeningFile", data.file.toString()));

            // Move file pointer ahead!
            data.filenr++;

            String baseURI = this.environmentSubstitute(meta.getFileBaseURI());
            if (Const.isEmpty(baseURI)) {
                baseURI = data.file.getParent().getName().getURI();
            }

            // Open the XML document
            data.document = XMLHandler.loadXMLFile(data.file, baseURI, meta.isIgnoreEntities(),
                    meta.isNamespaceAware());

            // Add this to the result file names...
            ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, data.file,
                    getTransMeta().getName(), getStepname());
            resultFile.setComment("File was read by an XML input step");
            addResultFile(resultFile);

            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "XMLInput.Log.FileOpened", data.file.toString()));

            // Position in the file...
            data.section = data.document;

            for (int i = 0; i < meta.getInputPosition().length - 1; i++) {
                data.section = XMLHandler.getSubNode(data.section, meta.getInputPosition()[i]);
            }
            // Last element gets repeated: what's the name?
            data.itemElement = meta.getInputPosition()[meta.getInputPosition().length - 1];

            data.itemCount = XMLHandler.countNodes(data.section, data.itemElement);
            data.itemPosition = (int) meta.getNrRowsToSkip();
        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "XMLInput.Log.UnableToOpenFile", "" + data.filenr,
                    data.file.toString(), e.toString()));
            stopAll();
            setErrors(1);
            return false;
        }
        return true;
    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (XMLInputMeta) smi;
        data = (XMLInputData) sdi;

        if (super.init(smi, sdi)) {
            data.files = meta.getFiles(this).getFiles();
            if (data.files == null || data.files.size() == 0) {
                logError(BaseMessages.getString(PKG, "XMLInput.Log.NoFiles"));
                return false;
            }

            data.rownr = 1L;

            return true;
        }
        return false;
    }

    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (XMLInputMeta) smi;
        data = (XMLInputData) sdi;

        super.dispose(smi, sdi);
    }

}