com.bayesforecast.ingdat.vigsteps.vigmake.VigMakeStep.java Source code

Java tutorial

Introduction

Here is the source code for com.bayesforecast.ingdat.vigsteps.vigmake.VigMakeStep.java

Source

/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/

package com.bayesforecast.ingdat.vigsteps.vigmake;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateUtils;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

import com.bayesforecast.ingdat.vigsteps.Item;
import com.bayesforecast.ingdat.vigsteps.State;
import com.bayesforecast.ingdat.vigsteps.StateConflictException;

/**
 * This class is part of the demo step plug-in implementation.
 * It demonstrates the basics of developing a plug-in step for PDI. 
 * 
 * The demo step adds a new string field to the row stream and sets its
 * value to "Hello World!". The user may select the name of the new field.
 *   
 * This class is the implementation of StepInterface.
 * Classes implementing this interface need to:
 * 
 * - initialize the step
 * - execute the row processing logic
 * - dispose of the step 
 * 
 * Please do not create any local fields in a StepInterface class. Store any
 * information related to the processing logic in the supplied step data interface
 * instead.  
 * 
 */

public class VigMakeStep extends BaseStep implements StepInterface {

    private VigMakeStepMeta meta;

    /**
     * The constructor should simply pass on its arguments to the parent class.
     * 
     * @param s             step description
     * @param stepDataInterface   step data class
     * @param c               step copy
     * @param t               transformation description
     * @param dis            transformation executing
     */
    public VigMakeStep(StepMeta s, StepDataInterface stepDataInterface, int c, TransMeta t, Trans dis) {
        super(s, stepDataInterface, c, t, dis);
    }

    /**
     * This method is called by PDI during transformation startup. 
     * 
     * It should initialize required for step execution. 
     * 
     * The meta and data implementations passed in can safely be cast
     * to the step's respective implementations. 
     * 
     * It is mandatory that super.init() is called to ensure correct behavior.
     * 
     * Typical tasks executed here are establishing the connection to a database,
     * as wall as obtaining resources, like file handles.
     * 
     * @param smi    step meta interface implementation, containing the step settings
     * @param sdi   step data interface implementation, used to store runtime information
     * 
     * @return true if initialization completed successfully, false if there was an error preventing the step from working. 
     *  
     */
    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        // Casting to step-specific implementation classes is safe
        meta = (VigMakeStepMeta) smi;
        VigMakeStepData data = (VigMakeStepData) sdi;

        data.items = new HashMap<List<Object>, Item>();
        data.processedDates = new HashMap<Date, Boolean>();
        if (meta.isOrderedData()) {
            logBasic("Algoritmo Ordenado");
            data.stateInsertionAlgo = new OrderedStateTreeAlgorithm();
        } else {
            logBasic("Algoritmo No Ordenado");
            data.stateInsertionAlgo = new GeneralStateTreeAlgorithm();
        }
        return super.init(meta, data);
    }

    /**
     * Once the transformation starts executing, the processRow() method is called repeatedly
     * by PDI for as long as it returns true. To indicate that a step has finished processing rows
     * this method must call setOutputDone() and return false;
     * 
     * Steps which process incoming rows typically call getRow() to read a single row from the
     * input stream, change or add row content, call putRow() to pass the changed row on 
     * and return true. If getRow() returns null, no more rows are expected to come in, 
     * and the processRow() implementation calls setOutputDone() and returns false to
     * indicate that it is done too.
     * 
     * Steps which generate rows typically construct a new row Object[] using a call to
     * RowDataUtil.allocateRowData(numberOfFields), add row content, and call putRow() to
     * pass the new row on. Above process may happen in a loop to generate multiple rows,
     * at the end of which processRow() would call setOutputDone() and return false;
     * 
     * @param smi the step meta interface containing the step settings
     * @param sdi the step data interface that should be used to store
     * 
     * @return true to indicate that the function should be called again, false if the step is done
     */
    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {

        // safely cast the step settings (meta) and runtime info (data) to specific implementations 
        //VigMakeStepMeta meta = (VigMakeStepMeta) smi;
        VigMakeStepData data = (VigMakeStepData) sdi;
        HashMap<List<Object>, Item> items = data.items;
        HashMap<Date, Boolean> processedDates = data.processedDates;
        StateTreeAlgorithm insertionAlgo = data.stateInsertionAlgo;

        // get incoming row, getRow() potentially blocks waiting for more rows, returns null if no more rows expected
        Object[] r = getRow();

        // if no more rows are expected, indicate step is finished and processRow() should not be called again
        if (r == null) {
            setOutputDone();
            return false;
        }

        // clone the input row structure and place it in our data object
        data.outputRowMeta = (RowMetaInterface) getInputRowMeta().clone();
        // use meta.getFields() to change it, so it reflects the output row structure 
        meta.getFields(data.outputRowMeta, getStepname(), null, null, this, null, null);

        // generate new item if it is not present in items.
        List<String> attributeFields = meta.getAttributeFields();
        List<String> idFields = meta.getIdFields();
        List<Object> ids = new ArrayList<Object>();
        List<Object> attributes = new ArrayList<Object>();
        for (String idField : idFields) {
            ids.add(r[getInputRowMeta().indexOfValue(idField)]);
        }
        for (String attributeField : attributeFields) {
            attributes.add(r[getInputRowMeta().indexOfValue(attributeField)]);
        }
        Item item;

        if (!items.containsKey(ids)) {
            item = new Item(ids, attributes);
            items.put(ids, item);
        } else {
            item = items.get(ids);
        }

        // gets the date field
        String dateField = meta.getDateField();
        int dateIndex = getInputRowMeta().indexOfValue(dateField);
        Date date = getInputRowMeta().getDate(r, dateIndex);
        processedDates.put(date, true);

        // generate the new status
        List<String> statusFields = meta.getStatusFields();
        List<Object> status = new ArrayList<Object>();
        for (String statusField : statusFields) {
            status.add(r[getInputRowMeta().indexOfValue(statusField)]);
        }
        State state = new State(status);
        try {
            insertionAlgo.addState(this, item, date, state);
        } catch (StateConflictException e) {
            throw new KettleException("Date conflict between 2 different status.\n" + "Item: " + item.toString()
                    + "\nState" + state.toString() + "\nDate: " + date.toString());
        }

        // indicate that processRow() should be called again
        return true;
    }

    /**
     * This method is called by PDI once the step is done processing. 
     * 
     * The dispose() method is the counterpart to init() and should release any resources
     * acquired for step execution like file handles or database connections.
     * 
     * The meta and data implementations passed in can safely be cast
     * to the step's respective implementations. 
     * 
     * It is mandatory that super.dispose() is called to ensure correct behavior.
     * 
     * @param smi    step meta interface implementation, containing the step settings
     * @param sdi   step data interface implementation, used to store runtime information
     */
    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {

        ((VigMakeStepData) sdi).items.clear();
        super.dispose(smi, sdi);
        System.gc();
    }

    private void emitTree() {
        //VigMakeStepMeta meta = (VigMakeStepMeta) this.getStepMetaInterface(); 
        VigMakeStepData data = (VigMakeStepData) this.getStepDataInterface();

        Set<Date> processedDates = data.processedDates.keySet();
        HashMap<List<Object>, Item> items = data.items;

        logBasic("Fechas procesadas: " + processedDates.size());
        logBasic("Items procesados: " + items.size());

        for (Item item : items.values()) {
            data.stateInsertionAlgo.markAbsences(item, processedDates);
            data.stateInsertionAlgo.cleanStates(item);
            emitItem(data, item, true);
        }

    }

    public void emitItem(VigMakeStepData data, Item item, boolean emitLastState) {
        Set<Date> processedDates = data.processedDates.keySet();
        Object[] ids = item.getId().toArray();
        Object[] attributes = item.getAttributes().toArray();
        Object[] itemData = ArrayUtils.addAll(ids, attributes);
        Set<Entry<Date, State>> set = item.getStates().entrySet();
        for (Entry<Date, State> status : set) {
            if (!status.getValue().isNullState()
                    && !(item.getStates().lastKey().equals(status.getKey()) && !emitLastState)) {
                Object[] itemStateData = ArrayUtils.addAll(itemData, status.getValue().getStatus().toArray());
                Object[] itemStateData2 = ArrayUtils.add(itemStateData, status.getKey());
                Date dateEnd = null;
                Object[] statusPrev = new Object[meta.getStatusFields().size()];
                Object[] statusNext = new Object[meta.getStatusFields().size()];
                if (status.getValue().getPrevious() != null && !status.getValue().getPrevious().isNullState()) {
                    statusPrev = status.getValue().getPrevious().getStatus().toArray();
                }
                if (status.getValue().getNext() != null) {
                    if (!status.getValue().getNext().isNullState()) {
                        statusNext = status.getValue().getNext().getStatus().toArray();
                        dateEnd = item.getStates().higherKey(status.getKey());
                    } else {
                        dateEnd = DateUtils.addDays(item.getStates().higherKey(status.getKey()), -1);
                    }
                }
                Object[] itemStateData3 = ArrayUtils.add(itemStateData2, dateEnd);
                Object[] itemStateData4 = ArrayUtils.addAll(itemStateData3, statusPrev);
                Object[] itemStateData5 = ArrayUtils.addAll(itemStateData4, statusNext);

                // put the row to the output row stream
                try {
                    putRow(data.outputRowMeta, itemStateData5);
                } catch (KettleStepException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }

    @Override
    public void setOutputDone() {
        // TODO Auto-generated method stub
        emitTree();
        super.setOutputDone();
    }

}