com.github.maasdi.mongo.wrapper.field.MongoField.java Source code

Java tutorial

Introduction

Here is the source code for com.github.maasdi.mongo.wrapper.field.MongoField.java

Source

/**
 * Copyright (C) 2014 Maas Dianto (maas.dianto@gmail.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.github.maasdi.mongo.wrapper.field;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.bson.types.Binary;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.ValueMeta;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaFactory;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.i18n.BaseMessages;

import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;

import com.github.maasdi.di.trans.steps.mongodbdelete.MongoDbDeleteData;

public class MongoField implements Comparable<MongoField> {
    protected static Class<?> PKG = MongoField.class; // for i18n purposes

    /** The name the the field will take in the outputted kettle stream */
    public String m_fieldName = ""; //$NON-NLS-1$

    /** The path to the field in the Mongo object */
    public String m_fieldPath = ""; //$NON-NLS-1$

    /** The kettle type for this field */
    public String m_kettleType = ""; //$NON-NLS-1$

    /** User-defined indexed values for String types */
    public List<String> m_indexedVals;

    /**
     * Temporary variable to hold the min:max array index info for fields determined when sampling documents for
     * paths/types
     */
    public transient String m_arrayIndexInfo;

    /**
     * Temporary variable to hold the number of times this path was seen when sampling documents to determine paths/types.
     */
    public transient int m_percentageOfSample = -1;

    /**
     * Temporary variable to hold the num times this path was seen/num sampled documents. Note that numerator might be
     * larger than denominator if this path is encountered multiple times in an array within one document.
     */
    public transient String m_occurenceFraction = ""; //$NON-NLS-1$

    public transient Class<?> m_mongoType;

    /**
     * Temporary variable used to indicate that this path occurs multiple times over the sampled documents and that the
     * types differ. In this case we should default to Kettle type String as a catch-all
     */
    public transient boolean m_disparateTypes;

    /** The index that this field is in the output row structure */
    public int m_outputIndex;

    private ValueMetaInterface m_tempValueMeta;

    private List<String> m_pathParts;
    private List<String> m_tempParts;

    public MongoField copy() {
        MongoField newF = new MongoField();
        newF.m_fieldName = m_fieldName;
        newF.m_fieldPath = m_fieldPath;
        newF.m_kettleType = m_kettleType;

        // reference doesn't matter here as this list is read only at runtime
        newF.m_indexedVals = m_indexedVals;

        return newF;
    }

    /**
     * Initialize this mongo field
     *
     * @param outputIndex
     *          the index for this field in the outgoing row structure.
     * @throws KettleException
     *           if a problem occurs
     */
    public void init(int outputIndex) throws KettleException {
        if (Const.isEmpty(m_fieldPath)) {
            throw new KettleException(
                    BaseMessages.getString(PKG, "MongoDbOutput.Messages.MongoField.Error.NoPathSet")); //$NON-NLS-1$
        }

        if (m_pathParts != null) {
            return;
        }

        String fieldPath = MongoDbDeleteData.cleansePath(m_fieldPath);

        String[] temp = fieldPath.split("\\."); //$NON-NLS-1$
        m_pathParts = new ArrayList<String>();
        for (String part : temp) {
            m_pathParts.add(part);
        }

        if (m_pathParts.get(0).equals("$")) { //$NON-NLS-1$
            m_pathParts.remove(0); // root record indicator
        } else if (m_pathParts.get(0).startsWith("$[")) { //$NON-NLS-1$

            // strip leading $ off of array
            String r = m_pathParts.get(0).substring(1, m_pathParts.get(0).length());
            m_pathParts.set(0, r);
        }

        m_tempParts = new ArrayList<String>();
        m_tempValueMeta = ValueMetaFactory.createValueMeta(ValueMeta.getType(m_kettleType));
        m_outputIndex = outputIndex;
    }

    /**
     * Reset this field, ready for processing a new document
     *
     * @param space
     *          variables to use
     */
    public void reset(VariableSpace space) {
        // first clear because there may be stuff left over from processing
        // the previous mongo document object (especially if a path exited early
        // due to non-existent field or array index out of bounds)
        m_tempParts.clear();

        for (String part : m_pathParts) {
            m_tempParts.add(space.environmentSubstitute(part));
        }
    }

    /**
     * Perform Kettle type conversions for the Mongo leaf field value.
     *
     * @param fieldValue
     *          the leaf value from the Mongo structure
     * @return an Object of the appropriate Kettle type
     * @throws KettleException
     *           if a problem occurs
     */
    public Object getKettleValue(Object fieldValue) throws KettleException {

        switch (m_tempValueMeta.getType()) {
        case ValueMetaInterface.TYPE_BIGNUMBER:
            if (fieldValue instanceof Number) {
                fieldValue = BigDecimal.valueOf(((Number) fieldValue).doubleValue());
            } else if (fieldValue instanceof Date) {
                fieldValue = new BigDecimal(((Date) fieldValue).getTime());
            } else {
                fieldValue = new BigDecimal(fieldValue.toString());
            }
            return m_tempValueMeta.getBigNumber(fieldValue);
        case ValueMetaInterface.TYPE_BINARY:
            if (fieldValue instanceof Binary) {
                fieldValue = ((Binary) fieldValue).getData();
            } else {
                fieldValue = fieldValue.toString().getBytes();
            }
            return m_tempValueMeta.getBinary(fieldValue);
        case ValueMetaInterface.TYPE_BOOLEAN:
            if (fieldValue instanceof Number) {
                fieldValue = new Boolean(((Number) fieldValue).intValue() != 0);
            } else if (fieldValue instanceof Date) {
                fieldValue = new Boolean(((Date) fieldValue).getTime() != 0);
            } else {
                fieldValue = new Boolean(fieldValue.toString().equalsIgnoreCase("Y") //$NON-NLS-1$
                        || fieldValue.toString().equalsIgnoreCase("T") //$NON-NLS-1$
                        || fieldValue.toString().equalsIgnoreCase("1")); //$NON-NLS-1$
            }
            return m_tempValueMeta.getBoolean(fieldValue);
        case ValueMetaInterface.TYPE_DATE:
            if (fieldValue instanceof Number) {
                fieldValue = new Date(((Number) fieldValue).longValue());
            } else if (fieldValue instanceof Date) {
                // nothing to do
            } else {
                throw new KettleException(BaseMessages.getString(PKG, "MongoDbInput.ErrorMessage.DateConversion", //$NON-NLS-1$
                        fieldValue.toString()));
            }
            return m_tempValueMeta.getDate(fieldValue);
        case ValueMetaInterface.TYPE_INTEGER:
            if (fieldValue instanceof Number) {
                fieldValue = new Long(((Number) fieldValue).intValue());
            } else if (fieldValue instanceof Binary) {
                byte[] b = ((Binary) fieldValue).getData();
                String s = new String(b);
                fieldValue = new Integer(s);
            } else {
                fieldValue = new Integer(fieldValue.toString());
            }
            return m_tempValueMeta.getInteger(fieldValue);
        case ValueMetaInterface.TYPE_NUMBER:
            if (fieldValue instanceof Number) {
                fieldValue = new Double(((Number) fieldValue).doubleValue());
            } else if (fieldValue instanceof Binary) {
                byte[] b = ((Binary) fieldValue).getData();
                String s = new String(b);
                fieldValue = new Double(s);
            } else {
                fieldValue = new Double(fieldValue.toString());
            }
            return m_tempValueMeta.getNumber(fieldValue);
        case ValueMetaInterface.TYPE_STRING:
            return m_tempValueMeta.getString(fieldValue);
        default:
            return null;
        }
    }

    /**
     * Convert a mongo record object to a Kettle field value (for the field defined by this path)
     *
     * @param mongoObject
     *          the record to convert
     * @return the kettle field value
     * @throws KettleException
     *           if a problem occurs
     */
    public Object convertToKettleValue(BasicDBObject mongoObject) throws KettleException {

        if (mongoObject == null) {
            return null;
        }

        if (m_tempParts.size() == 0) {
            throw new KettleException(BaseMessages.getString(PKG, "MongoDbInput.ErrorMessage.MalformedPathRecord")); //$NON-NLS-1$
        }

        String part = m_tempParts.remove(0);

        if (part.charAt(0) == '[') {
            // we're not expecting an array at this point - this document does not
            // contain our field
            return null;
        }

        if (part.indexOf('[') > 0) {
            String arrayPart = part.substring(part.indexOf('['));
            part = part.substring(0, part.indexOf('['));

            // put the array section back into location zero
            m_tempParts.add(0, arrayPart);
        }

        // part is a named field of this record
        Object fieldValue = mongoObject.get(part);
        if (fieldValue == null) {
            return null;
        }

        // what have we got
        if (m_tempParts.size() == 0) {
            // we're expecting a leaf primitive - lets see if that's what we have
            // here...
            return getKettleValue(fieldValue);
        }

        if (fieldValue instanceof BasicDBObject) {
            return convertToKettleValue(((BasicDBObject) fieldValue));
        }

        if (fieldValue instanceof BasicDBList) {
            return convertToKettleValue(((BasicDBList) fieldValue));
        }

        // must mean we have a primitive here, but we're expecting to process more
        // path so this doesn't match us - return null
        return null;
    }

    /**
     * Convert a mongo array object to a Kettle field value (for the field defined in this path)
     *
     * @param mongoObject
     *          the array to convert
     * @return the kettle field value
     * @throws KettleException
     *           if a problem occurs
     */
    public Object convertToKettleValue(BasicDBList mongoList) throws KettleException {

        if (mongoList == null) {
            return null;
        }

        if (m_tempParts.size() == 0) {
            throw new KettleException(BaseMessages.getString(PKG, "MongoDbInput.ErrorMessage.MalformedPathArray")); //$NON-NLS-1$
        }

        String part = m_tempParts.remove(0);
        if (!(part.charAt(0) == '[')) {
            // we're expecting an array at this point - this document does not
            // contain our field
            return null;
        }

        String index = part.substring(1, part.indexOf(']'));
        int arrayI = 0;
        try {
            arrayI = Integer.parseInt(index.trim());
        } catch (NumberFormatException e) {
            throw new KettleException(
                    BaseMessages.getString(PKG, "MongoDbInput.ErrorMessage.UnableToParseArrayIndex", index)); //$NON-NLS-1$
        }

        if (part.indexOf(']') < part.length() - 1) {
            // more dimensions to the array
            part = part.substring(part.indexOf(']') + 1, part.length());
            m_tempParts.add(0, part);
        }

        if (arrayI >= mongoList.size() || arrayI < 0) {
            return null;
        }

        Object element = mongoList.get(arrayI);

        if (element == null) {
            return null;
        }

        if (m_tempParts.size() == 0) {
            // we're expecting a leaf primitive - let's see if that's what we have
            // here...
            return getKettleValue(element);
        }

        if (element instanceof BasicDBObject) {
            return convertToKettleValue(((BasicDBObject) element));
        }

        if (element instanceof BasicDBList) {
            return convertToKettleValue(((BasicDBList) element));
        }

        // must mean we have a primitive here, but we're expecting to process more
        // path so this doesn't match us - return null
        return null;
    }

    @Override
    public int compareTo(MongoField comp) {
        return m_fieldName.compareTo(comp.m_fieldName);
    }
}