pt.webdetails.cda.dataaccess.JoinCompoundDataAccess.java Source code

Java tutorial

Introduction

Here is the source code for pt.webdetails.cda.dataaccess.JoinCompoundDataAccess.java

Source

/*!
* Copyright 2002 - 2013 Webdetails, a Pentaho company.  All rights reserved.
* 
* This software was developed by Webdetails and is provided under the terms
* of the Mozilla Public License, Version 2.0, or any later version. You may not use
* this file except in compliance with the license. If you need a copy of the license,
* please go to  http://mozilla.org/MPL/2.0/. The Initial Developer is Webdetails.
*
* Software distributed under the Mozilla Public License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or  implied. Please refer to
* the license for the specific language governing your rights and limitations.
*/

package pt.webdetails.cda.dataaccess;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import javax.swing.table.TableModel;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Element;
import org.pentaho.di.core.util.StringUtil;
import org.pentaho.metadata.model.concept.types.JoinType;
import org.pentaho.reporting.libraries.base.util.StringUtils;

import pt.webdetails.cda.CdaEngine;
import pt.webdetails.cda.query.QueryOptions;
import pt.webdetails.cda.settings.UnknownDataAccessException;
import pt.webdetails.robochef.DynamicTransConfig;
import pt.webdetails.robochef.DynamicTransConfig.EntryType;
import pt.webdetails.robochef.DynamicTransMetaConfig;
import pt.webdetails.robochef.DynamicTransMetaConfig.Type;
import pt.webdetails.robochef.DynamicTransformation;
import pt.webdetails.cda.utils.kettle.RowMetaToTableModel;
import pt.webdetails.robochef.RowProductionManager;
import pt.webdetails.robochef.TableModelInput;
import pt.webdetails.cda.connections.ConnectionCatalog.ConnectionType;
import pt.webdetails.cda.utils.MetadataTableModel;

public class JoinCompoundDataAccess extends CompoundDataAccess implements RowProductionManager {

    public static final String MAX_ROWS_VALUE_TYPE_SEARCH_PROPERTY = "pt.webdetails.cda.TypeSearchMaxRows";
    private static final Log logger = LogFactory.getLog(JoinCompoundDataAccess.class);
    private static final String TYPE = "join";
    private static final long DEFAULT_ROW_PRODUCTION_TIMEOUT = 120;
    private static TimeUnit DEFAULT_ROW_PRODUCTION_TIMEOUT_UNIT = TimeUnit.SECONDS;
    private static int DEFAULT_MAX_ROWS_VALUE_TYPE_SEARCH = 500;//max nbr of rows to search for value
    private JoinType joinType;
    private String leftId;
    private String rightId;
    private String[] leftKeys;
    private String[] rightKeys;
    private ExecutorService executorService = Executors.newCachedThreadPool();
    private Collection<Callable<Boolean>> inputCallables = new ArrayList<Callable<Boolean>>();

    public JoinCompoundDataAccess() {
    }

    public JoinCompoundDataAccess(final Element element) {
        super(element);

        Element joinTypeNode = (Element) element.selectSingleNode("JoinType");
        if (joinTypeNode != null && org.apache.commons.lang.StringUtils.isNotBlank(joinTypeNode.getText())) {
            joinType = JoinType.valueOf(joinTypeNode.getText());
        } else {
            joinType = JoinType.FULL_OUTER;
        }

        Element left = (Element) element.selectSingleNode("Left");
        Element right = (Element) element.selectSingleNode("Right");
        leftId = left.attributeValue("id");
        rightId = right.attributeValue("id");

        leftKeys = left.attributeValue("keys").split(",");
        rightKeys = right.attributeValue("keys").split(",");
    }

    private static String getMergeJoinType(JoinType joinType) {
        switch (joinType) {
        case INNER:
            return "INNER";
        case LEFT_OUTER:
            return "LEFT OUTER";
        case RIGHT_OUTER:
            return "RIGHT OUTER";
        case FULL_OUTER:
        default:
            return "FULL OUTER";
        }
    }

    public String getType() {
        return TYPE;
    }

    public JoinType getJoinType() {
        return joinType;
    }

    protected TableModel queryDataSource(final QueryOptions queryOptions) throws QueryException {
        TableModel output;
        inputCallables.clear();

        try {
            QueryOptions croppedOptions = (QueryOptions) queryOptions.clone();
            croppedOptions.setSortBy(new ArrayList<String>());
            croppedOptions.setPageSize(0);
            croppedOptions.setPageStart(0);
            final TableModel tableModelA = this.getCdaSettings().getDataAccess(leftId).doQuery(croppedOptions);
            final TableModel tableModelB = this.getCdaSettings().getDataAccess(rightId).doQuery(croppedOptions);

            if (tableModelA.getColumnCount() == 0 || tableModelB.getColumnCount() == 0) {
                return new MetadataTableModel(new String[0], new Class[0], 0);

            }

            String[] leftColumnNames = new String[leftKeys.length];
            for (int i = 0; i < leftKeys.length; i++) {
                leftColumnNames[i] = tableModelA.getColumnName(Integer.parseInt(leftKeys[i]));
            }

            String[] rightColumnNames = new String[rightKeys.length];
            for (int i = 0; i < rightKeys.length; i++) {
                rightColumnNames[i] = tableModelB.getColumnName(Integer.parseInt(rightKeys[i]));
            }

            String sortLeftXML = getSortXmlStep("sortLeft", leftColumnNames);
            String sortRightXML = getSortXmlStep("sortRight", rightColumnNames);

            String mergeJoinXML = getMergeJoinXml(leftColumnNames, rightColumnNames);

            DynamicTransMetaConfig transMetaConfig = new DynamicTransMetaConfig(Type.EMPTY, "JoinCompoundData",
                    null, null);
            DynamicTransConfig transConfig = new DynamicTransConfig();

            String input1Xml = getInjectorStepXmlString("input1", tableModelA);
            String input2Xml = getInjectorStepXmlString("input2", tableModelB);

            transConfig.addConfigEntry(EntryType.STEP, "input1", input1Xml);
            transConfig.addConfigEntry(EntryType.STEP, "input2", input2Xml);
            transConfig.addConfigEntry(EntryType.STEP, "sortLeft", sortLeftXML);
            transConfig.addConfigEntry(EntryType.STEP, "sortRight", sortRightXML);
            transConfig.addConfigEntry(EntryType.STEP, "mergeJoin", mergeJoinXML);

            transConfig.addConfigEntry(EntryType.HOP, "input1", "sortLeft");
            transConfig.addConfigEntry(EntryType.HOP, "input2", "sortRight");
            transConfig.addConfigEntry(EntryType.HOP, "sortLeft", "mergeJoin");
            transConfig.addConfigEntry(EntryType.HOP, "sortRight", "mergeJoin");

            TableModelInput input1 = new TableModelInput();
            transConfig.addInput("input1", input1);
            inputCallables.add(input1.getCallableRowProducer(tableModelA, true));
            TableModelInput input2 = new TableModelInput();
            transConfig.addInput("input2", input2);
            inputCallables.add(input2.getCallableRowProducer(tableModelB, true));

            RowMetaToTableModel outputListener = new RowMetaToTableModel(false, true, false);
            transConfig.addOutput("mergeJoin", outputListener);

            DynamicTransformation trans = new DynamicTransformation(transConfig, transMetaConfig);
            trans.executeCheckedSuccess(null, null, this);
            logger.info(trans.getReadWriteThroughput());
            output = outputListener.getRowsWritten();

            if (output == null) {
                return new MetadataTableModel(new String[0], new Class[0], 0);
            }
        } catch (UnknownDataAccessException e) {
            throw new QueryException("Unknown Data access in CompoundDataAccess ", e);
        } catch (Exception e) {
            throw new QueryException("Exception during query ", e);
        }
        return output;
    }

    private String getMergeJoinXml(String[] leftColumnNames, String[] rightColumnNames) {
        StringBuilder mergeJoinXML = new StringBuilder(
                "<step><name>mergeJoin</name><type>MergeJoin</type><join_type>");
        mergeJoinXML.append(getMergeJoinType(joinType));
        mergeJoinXML.append("</join_type><copies>1</copies><step1>sortLeft</step1><step2>sortRight</step2>");
        mergeJoinXML.append("<keys_1>");

        for (int i = 0; i < leftKeys.length; i++) {
            mergeJoinXML.append("<key>").append(leftColumnNames[i]).append("</key>");
        }
        mergeJoinXML.append("</keys_1><keys_2>");

        for (int i = 0; i < rightKeys.length; i++) {
            mergeJoinXML.append("<key>").append(rightColumnNames[i]).append("</key>");
        }
        mergeJoinXML.append("</keys_2></step>");

        return mergeJoinXML.toString();
    }

    private String getSortXmlStep(final String name, final String[] columnNames) {

        StringBuilder sortXML = new StringBuilder("  <step>\n" + "    <name>" + name + "</name>\n"
                + "    <type>SortRows</type>\n" + "    <description/>\n" + "    <distribute>Y</distribute>\n"
                + "    <copies>1</copies>\n" + "         <partitioning>\n" + "           <method>none</method>\n"
                + "           <schema_name/>\n" + "           </partitioning>\n"
                + "      <directory>%%java.io.tmpdir%%</directory>\n" + "      <prefix>out</prefix>\n"
                + "      <sort_size/>\n" + "      <free_memory>25</free_memory>\n"
                + "      <compress>N</compress>\n" + "      <compress_variable/>\n"
                + "      <unique_rows>N</unique_rows>\n" + "    <fields>\n");

        for (String columnName : columnNames) {
            sortXML.append("      <field>\n").append("        <name>").append(columnName).append("</name>\n")
                    .append("        <ascending>Y</ascending>\n")
                    .append("        <case_sensitive>N</case_sensitive>\n").append("      </field>\n");
        }

        sortXML.append("    </fields>\n").append("     <cluster_schema/>\n")
                .append(" <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>\n")
                .append("      <xloc>615</xloc>\n").append("      <yloc>188</yloc>\n")
                .append("      <draw>Y</draw>\n").append("      </GUI>\n").append("    </step>\n");

        return sortXML.toString();
    }

    private String getInjectorStepXmlString(String name, TableModel t) {
        StringBuilder xml = new StringBuilder("<step><name>");
        Class<?> columnClass;
        xml.append(name).append("</name><type>Injector</type><copies>1</copies>");

        int maxRowsTypeSearch = getMaxTypeSearchRowCount(t);

        // If we have metadata information, put it here
        if (t.getColumnCount() > 0) {

            xml.append("<fields>");
            for (int i = 0; i < t.getColumnCount(); i++) {
                /* The proper way to get the column class is from t.getColumnClass().
                 * However, this always returns Object when the column at hand is a
                 * Calculated Column -- and we have no idea what to do with Objects.
                 * Therefore, we try to infer the correct type from the getClass() of
                 * the chosen column, first row, as that can't be worse than trying
                 * to deal with Object.
                 */
                columnClass = t.getColumnClass(i);
                if (columnClass.equals(Object.class) && t.getRowCount() > 0) {
                    for (int j = 0; j < maxRowsTypeSearch; j++) {
                        if (t.getValueAt(j, i) != null) {
                            columnClass = t.getValueAt(j, i).getClass();
                            break;
                        }
                    }
                }
                xml.append("<field>");
                xml.append("<name>").append(t.getColumnName(i)).append("</name>");
                xml.append("<type>").append(getKettleTypeFromColumnClass(columnClass)).append("</type>");
                xml.append("<length>-1</length><precision>-1</precision></field>");
            }
            xml.append("</fields>");

        }

        xml.append("</step>");
        return xml.toString();

    }

    private int getMaxTypeSearchRowCount(TableModel t) {
        int maxRowsTypeSearch = DEFAULT_MAX_ROWS_VALUE_TYPE_SEARCH;
        String maxRowsTypeSearchProperty = CdaEngine.getInstance()
                .getConfigProperty(MAX_ROWS_VALUE_TYPE_SEARCH_PROPERTY);
        if (!StringUtils.isEmpty(maxRowsTypeSearchProperty)) {
            try {
                maxRowsTypeSearch = Integer.parseInt(maxRowsTypeSearchProperty);
            } catch (NumberFormatException nfe) {
                logger.error(MAX_ROWS_VALUE_TYPE_SEARCH_PROPERTY + ":" + maxRowsTypeSearchProperty
                        + " not a valid integer.");
            }
        }
        if (maxRowsTypeSearch <= 0) {
            maxRowsTypeSearch = t.getRowCount();
        } else {
            maxRowsTypeSearch = Math.min(maxRowsTypeSearch, t.getRowCount());
        }
        return maxRowsTypeSearch;
    }

    /**
     * This method returns the correct kettle type from the column class.<br> Possible values: <ul> <li>String</li>
     * <li>Date</li> <li>Boolean</li> <li>Integer</li> <li>BigNumber</li> <li>Serializable</li> <li>Binary</li> </ul>
     */
    private String getKettleTypeFromColumnClass(Class<?> clazz) {
        if (clazz == String.class) {
            return "String";
        }
        if (clazz == Double.class || clazz == Float.class) {
            return "Number";
        }
        if (java.util.Date.class.isAssignableFrom(clazz)) {
            return "Date";
        }
        if (clazz == Long.class || clazz == Integer.class) {
            return "Integer";
        }
        if (clazz == java.math.BigDecimal.class) {
            return "BigNumber";
        }
        if (clazz == Boolean.class) {
            return "Boolean";
        }

        throw new IllegalArgumentException("Unexpected class " + clazz + ", can't convert to kettle type");
    }

    public void startRowProduction() {

        String timeoutStr = CdaEngine.getInstance()
                .getConfigProperty("pt.webdetails.cda.DefaultRowProductionTimeout");
        long timeout = StringUtil.isEmpty(timeoutStr) ? DEFAULT_ROW_PRODUCTION_TIMEOUT : Long.parseLong(timeoutStr);
        String unitStr = CdaEngine.getInstance()
                .getConfigProperty("pt.webdetails.cda.DefaultRowProductionTimeoutTimeUnit");
        TimeUnit unit = StringUtil.isEmpty(unitStr) ? DEFAULT_ROW_PRODUCTION_TIMEOUT_UNIT
                : TimeUnit.valueOf(unitStr);
        startRowProduction(timeout, unit);
    }

    public void startRowProduction(long timeout, TimeUnit unit) {
        try {
            List<Future<Boolean>> results = executorService.invokeAll(inputCallables, timeout, unit);
            for (Future<Boolean> result : results) {
                result.get();
            }
        } catch (InterruptedException e) {
            logger.error("Row production interrupted", e);
        } catch (ExecutionException e) {
            logger.error("Problem starting row production", e);
        }
    }

    @Override
    public ConnectionType getConnectionType() {
        return ConnectionType.NONE;
    }

    public ArrayList<PropertyDescriptor> getInterface() {
        ArrayList<PropertyDescriptor> properties = new ArrayList<PropertyDescriptor>();
        properties.add(
                new PropertyDescriptor("id", PropertyDescriptor.Type.STRING, PropertyDescriptor.Placement.ATTRIB));
        properties.add(
                new PropertyDescriptor("left", PropertyDescriptor.Type.STRING, PropertyDescriptor.Placement.CHILD));
        properties.add(new PropertyDescriptor("right", PropertyDescriptor.Type.STRING,
                PropertyDescriptor.Placement.CHILD));
        properties.add(new PropertyDescriptor("parameters", PropertyDescriptor.Type.STRING,
                PropertyDescriptor.Placement.CHILD));
        properties.add(new PropertyDescriptor("output", PropertyDescriptor.Type.ARRAY,
                PropertyDescriptor.Placement.CHILD));
        properties.add(new PropertyDescriptor("joinType", PropertyDescriptor.Type.STRING,
                PropertyDescriptor.Placement.CHILD));
        return properties;
    }

    public String getLeftId() {
        return leftId;
    }

    public void setLeftId(String leftId) {
        this.leftId = leftId;
    }

    public String getRightId() {
        return rightId;
    }

    public String[] getLeftKeys() {
        return leftKeys;
    }

    public String[] getRightKeys() {
        return rightKeys;
    }

    @Override
    public void setQuery(String query) {
        // Do nothing
    }
}