com.datatorrent.contrib.enrichment.POJOEnrichmentOperator.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.contrib.enrichment.POJOEnrichmentOperator.java

Source

/**
 * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.contrib.enrichment;

import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang3.ClassUtils;
import org.apache.hadoop.classification.InterfaceStability.Evolving;

import com.esotericsoftware.kryo.NotNull;

import com.datatorrent.api.Context;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.Context.PortContext;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.Operator.ActivationListener;
import com.datatorrent.api.Sink;
import com.datatorrent.api.annotation.InputPortFieldAnnotation;
import com.datatorrent.api.annotation.OutputPortFieldAnnotation;
import com.datatorrent.lib.util.PojoUtils;
import com.datatorrent.lib.util.PojoUtils.Getter;
import com.datatorrent.lib.util.PojoUtils.Setter;
import com.datatorrent.netlet.util.DTThrowable;

/**
 * This class takes a POJO as input and extract the value of the lookupKey configured
 * for this operator. It then does a lookup in file/DB to find matching entry and all key-value pairs
 * specified in the file/DB or based on include fieldMap are added to original tuple.
 * This operator is App Builder schema support enabled. <br>
 * 
 * Properties:<br>
 * <b>inputClass</b>: Class to be loaded for the incoming data type<br>
 * <b>outputClass</b>: Class to be loaded for the emitted data type<br>
 * <br>
 *
 * Example
 * The file contains data in json format, one entry per line. during setup entire file is read and
 * kept in memory for quick lookup.
 * If file contains following lines, and operator is configured with lookup key "productId"
 * { "productId": 1, "productCategory": 3 }
 * { "productId": 4, "productCategory": 10 }
 * { "productId": 3, "productCategory": 1 }
 *
 * And input tuple is
 * { amount=10.0, channelId=4, productId=3 }
 *
 * The tuple is modified as below before operator emits it on output port.
 * { amount=10.0, channelId=4, productId=3, productCategory=1 }
 *
 * @displayName BeanEnrichment
 * @category Database
 * @tags enrichment, pojo, schema, lookup
 * @since 2.1.0
 */
@Evolving
public class POJOEnrichmentOperator extends AbstractEnrichmentOperator<Object, Object>
        implements ActivationListener<Context> {
    private static final Logger log = LoggerFactory.getLogger(POJOEnrichmentOperator.class);

    private List<LookupKeyField> lookupKey = new ArrayList<LookupKeyField>();
    private List<EnrichField> fieldsToAddToOutputTuple = new ArrayList<EnrichField>();
    private List<CopyField> tupleFieldsToCopyFromInputToOutput = new ArrayList<CopyField>();

    @SuppressWarnings("rawtypes")
    private transient List<Getter> keyMethodMap;
    @SuppressWarnings("rawtypes")
    private transient List<Setter> resultMethodMap;
    private transient List<MethodMap> passOnFieldMethodMap;

    protected transient Class<?> inputClass;
    protected transient Class<?> outputClass;

    @InputPortFieldAnnotation(schemaRequired = true)
    public transient DefaultInputPort<Object> inputPojo = new DefaultInputPort<Object>() {

        @Override
        public void setup(PortContext context) {
            inputClass = context.getValue(Context.PortContext.TUPLE_CLASS);
        }

        @Override
        public void process(Object tuple) {
            POJOEnrichmentOperator.this.input.put(tuple);
        }
    };

    @OutputPortFieldAnnotation(schemaRequired = true)
    public transient DefaultOutputPort<Object> outputPojo = new DefaultOutputPort<Object>() {

        @Override
        public void setup(PortContext context) {
            outputClass = context.getValue(Context.PortContext.TUPLE_CLASS);
        }
    };

    public POJOEnrichmentOperator() {
        includeFieldsStr = "";
        lookupFieldsStr = "";
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    @Override
    protected Object getKey(Object tuple) {
        ArrayList<Object> keyList = new ArrayList<Object>();

        for (Getter g : keyMethodMap) {
            keyList.add((Object) g.get(tuple));
        }

        return keyList;
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    @Override
    protected Object convert(Object in, Object cached) {
        Object out;

        try {
            out = outputClass.newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            log.error("Failed to create instance of output schema.", e);
            return null;
        }

        for (MethodMap map : passOnFieldMethodMap) {
            try {
                map.set.set(out, map.get.get(in));
            } catch (RuntimeException e) {
                log.error("Failed to set the property. Continuing with default.", e);
            }
        }

        if (cached != null) {
            ArrayList<Object> newAttributes = (ArrayList<Object>) cached;
            int idx = 0;
            for (Setter s : resultMethodMap) {
                try {
                    s.set(out, newAttributes.get(idx++));
                } catch (RuntimeException e) {
                    log.error("Failed to set the property. Continuing with default.", e);
                }
            }
        }

        return out;
    }

    @Override
    public void setup(OperatorContext context) {
        boolean first = true;
        for (LookupKeyField map : lookupKey) {
            if (first) {
                lookupFieldsStr = map.getDbColumnName();
                first = false;
            } else {
                lookupFieldsStr += "," + map.getDbColumnName();
            }
        }

        first = true;
        for (EnrichField map : fieldsToAddToOutputTuple) {
            if (first) {
                includeFieldsStr = map.getFromDBColumn();
                first = false;
            } else {
                includeFieldsStr += "," + map.getFromDBColumn();
            }
        }

        Sink<Object> sink = new Sink<Object>() {
            @Override
            public void put(Object tuple) {
                outputPojo.emit(tuple);
            }

            @Override
            public int getCount(boolean reset) {
                return 0;
            }
        };

        super.output.setSink(sink);

        super.setup(context);
    }

    @SuppressWarnings("rawtypes")
    @Override
    public void activate(Context context) {
        keyMethodMap = new ArrayList<Getter>();
        for (LookupKeyField map : lookupKey) {
            try {
                keyMethodMap.add(generateGettersForField(inputClass, map.getInputPOJOSubstituteField()));
            } catch (NoSuchFieldException | SecurityException e) {
                throw new RuntimeException(
                        "Failed to initialize Input class getters for field: " + map.getInputPOJOSubstituteField(),
                        e);
            }
        }

        resultMethodMap = new ArrayList<Setter>();
        for (EnrichField map : fieldsToAddToOutputTuple) {
            try {
                resultMethodMap.add(generateSettersForField(outputClass, map.getToOutputPOJOField()));
            } catch (NoSuchFieldException | SecurityException e) {
                throw new RuntimeException(
                        "Failed to initialize Output class getters for field: " + map.getToOutputPOJOField(), e);
            }
        }

        passOnFieldMethodMap = new ArrayList<MethodMap>();
        for (CopyField map : tupleFieldsToCopyFromInputToOutput) {
            if ((map.getFromInputPOJOField() == null) && (map.getToOutputPOJOField() == null)) {
                throw new RuntimeException("Both from & to fields cannot be null. Atleast one should be not null.");
            }

            MethodMap m = new MethodMap();
            try {
                m.get = generateGettersForField(inputClass, map.getFromInputPOJOField());
                m.set = generateSettersForField(outputClass, map.getToOutputPOJOField());
                passOnFieldMethodMap.add(m);
            } catch (NoSuchFieldException | SecurityException e) {
                throw new RuntimeException("Failed to initialize Input/Output class getters/setter.", e);
            }
        }
    }

    @Override
    public void deactivate() {
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    private Setter generateSettersForField(Class<?> klass, String outputFieldName)
            throws NoSuchFieldException, SecurityException {
        Field f = outputClass.getDeclaredField(outputFieldName);
        Class c = ClassUtils.primitiveToWrapper(f.getType());
        Setter classSetter = PojoUtils.createSetter(klass, outputFieldName, c);
        return classSetter;
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    private Getter generateGettersForField(Class<?> klass, String inputFieldName)
            throws NoSuchFieldException, SecurityException {
        Field f = klass.getDeclaredField(inputFieldName);
        Class c = ClassUtils.primitiveToWrapper(f.getType());
        Getter classGetter = PojoUtils.createGetter(klass, inputFieldName, c);
        return classGetter;
    }

    /**
     * Specify the fields from the input tuple to be used to lookup the store. 
     * If the column names in the store are different from the tuple field names, specify the mappings below. 
     * Just specifying the tuple field name assumes that the column name is the same as the tuple field name
     * 
     * @return List of CompositeKeyField
     */
    public List<LookupKeyField> getLookupKey() {
        return lookupKey;
    }

    /**
     * Specify the fields from the input tuple to be used to lookup the store. 
     * If the column names in the store are different from the tuple field names, specify the mappings below. 
     * Just specifying the tuple field name assumes that the column name is the same as the tuple field name
     * 
     * @param List of CompositeKeyField
     * 
     * @description $[].inputPOJOSubstituteField Value from this input schema field will be substituted for database
     * column
     * @description $[].dbColumnName This will be the name of the column which is part of composite key. This field is
     * optional. If not provided, column name will be considered same as schema field name.
     * @useSchema $[].inputPOJOSubstituteField inputPojo.fields[].name
     */
    public void setLookupKey(List<LookupKeyField> lookupKey) {
        this.lookupKey = lookupKey;
    }

    /**
     * This sets lookup keys in string format. Following formats are allowed:
     * 
     *    {InputPOJOFieldName1}:{dbColumnName1},{InputPOJOFieldName2}:{dbColumnName2}
     *     - Field mapping will be set as it shows.
     *    {field1},{field2}
     *    - Both dbColumn and input POJO field name will be set to same value.
     *  
     * This property is supposed to be used only when property is set via conf files.
     * This property will not show up in UI.
     * 
     * @param lookup key in string format
     * @omitFromUI
     */
    public void setLookupKeyStr(String str) {
        String[] fieldSplit = str.split(",");
        for (String s : fieldSplit) {
            if (s.length() == 0) {
                continue;
            }
            String[] columnSplit = s.split(":");
            if (columnSplit.length == 0) {
                continue;
            } else {
                LookupKeyField field = new LookupKeyField();
                if (columnSplit.length == 1) {
                    field.setInputPOJOSubstituteField(columnSplit[0]);
                    field.setDbColumnName(columnSplit[0]);
                } else {
                    field.setInputPOJOSubstituteField(columnSplit[0]);
                    field.setDbColumnName(columnSplit[1]);
                }
                this.lookupKey.add(field);
            }
        }
    }

    /**
     * A list of fields to be enriched in output schema. 
     * Each item in the list gives mapping of database column to field in output schema.
     * 
     * @return List of EnrichField
     */
    public List<EnrichField> getFieldsToAddToOutputTuple() {
        return fieldsToAddToOutputTuple;
    }

    /**
     * A list of fields to be enriched in output schema. 
     * Each item in the list gives mapping of database column to field in output schema.
     * 
     * @param List of CompositeKeyField
     * 
     * @description $[].toOutputPOJOField Value from corresponding column in database will be set to this field in output
     * schema.
     * @description $[].fromDBColumn Database column name for which value is to be retrieved. This field is optional.
     * If not provided, column name will be considered same as schema field name.
     * @useSchema $[].toOutputPOJOField outputPojo.fields[].name
     */
    public void setFieldsToAddToOutputTuple(List<EnrichField> enrichedFields) {
        this.fieldsToAddToOutputTuple = enrichedFields;
    }

    /**
     * This sets fields to be added to output tuple in string format. Following formats are allowed:
     * 
     *    {fromDBColumnName1}:{toOutputPOJOField1},{fromDBColumnName2}:{toOutputPOJOField2}
     *     - Field mapping will be set as it shows.
     *    {field1},{field2}
     *    - Both dbColumn and input POJO field name will be set to same value.
     *  
     * This property is supposed to be used only when property is set via conf files.
     * This property will not show up in UI.
     * 
     * @param enriched key fields in string format
     * @omitFromUI
     */
    public void setFieldToAddToOutputTupleStr(String str) {
        String[] fieldSplit = str.split(",");
        for (String s : fieldSplit) {
            if (s.length() == 0) {
                continue;
            }
            String[] columnSplit = s.split(":");
            if (columnSplit.length == 0) {
                continue;
            } else {
                EnrichField field = new EnrichField();
                if (columnSplit.length == 1) {
                    field.setFromDBColumn(columnSplit[0]);
                    field.setToOutputPOJOField(columnSplit[0]);
                } else {
                    field.setFromDBColumn(columnSplit[0]);
                    field.setToOutputPOJOField(columnSplit[1]);
                }
                this.fieldsToAddToOutputTuple.add(field);
            }
        }
    }

    /**
     * A list of fields values to be copied from input to output tuple.
     * 
     * @return List of CopyField
     */
    public List<CopyField> getTupleFieldsToCopyFromInputToOutput() {
        return tupleFieldsToCopyFromInputToOutput;
    }

    /**
     * A list of fields values to be copied from input to output tuple.
     * 
     * @param List of CopyField
     * 
     * @description $[].fromInputPOJOField Source field in input schema. If not specified, same field name selected in
     * Output schema will be considered.
     * @description $[].toOutputPOJOField Destination field in output schema. If not specified, same field name selected
     * in Input schema will be considered.
     * @useSchema $[].fromInputPOJOField inputPojo.fields[].name
     * @useSchema $[].toOutputPOJOField outputPojo.fields[].name
     */
    public void setTupleFieldsToCopyFromInputToOutput(List<CopyField> copyFields) {
        this.tupleFieldsToCopyFromInputToOutput = copyFields;
    }

    /**
     * This sets fields to be copied from input to output tuple. Following formats are allowed:
     * 
     *    {fromInputPOJOField2}:{toOutputPOJOField1},{fromInputPOJOField2}:{toOutputPOJOField2}
     *     - Field mapping will be set as it shows.
     *    {field1},{field2}
     *    - Both input and output POJO field name will be set to same value.
     *  
     * This property is supposed to be used only when property is set via conf files.
     * This property will not show up in UI.
     * 
     * @param To be copied fields in string format
     * @omitFromUI
     */
    public void setTupleFieldsToCopyFromInputToOutputStr(String str) {
        String[] fieldSplit = str.split(",");
        for (String s : fieldSplit) {
            if (s.length() == 0) {
                continue;
            }
            String[] columnSplit = s.split(":");
            if (columnSplit.length == 0) {
                continue;
            } else {
                CopyField field = new CopyField();
                if (columnSplit.length == 1) {
                    field.setFromInputPOJOField(columnSplit[0]);
                    field.setToOutputPOJOField(columnSplit[0]);
                } else {
                    field.setFromInputPOJOField(columnSplit[0]);
                    field.setToOutputPOJOField(columnSplit[1]);
                }
                this.tupleFieldsToCopyFromInputToOutput.add(field);
            }
        }
    }

    /**
     * @omitFromUI
     */
    @Override
    public void setLookupFieldsStr(String lookupFieldsStr) {
        super.setLookupFieldsStr(lookupFieldsStr);
    }

    /**
     * @omitFromUI
     */
    @Override
    public void setIncludeFieldsStr(String includeFieldsStr) {
        super.setIncludeFieldsStr(includeFieldsStr);
    }

    /**
     * Returns the canonical name of the expected class on input port.
     * This property is required to be set from config file.
     *
     * @return Return input class canonical name
     * @omitFromUI
     */
    public String getInputClassStr() {
        return this.inputClass.getCanonicalName();
    }

    /**
     * Sets input class type expected on input port. The class should be found in classpath.
     *
     * @param inputClassStr
     * @omitFromUI
     */
    @SuppressWarnings("rawtypes")
    public void setInputClassStr(String inputClassStr) {
        try {
            this.inputClass = (Class) Thread.currentThread().getContextClassLoader().loadClass(inputClassStr);
        } catch (Throwable cause) {
            DTThrowable.rethrow(cause);
        }
    }

    /**
     * Returns the canonical name of the output class on output port.
     *
     * @return Returns output class canonical name.
     * @omitFromUI
     */
    public String getOutputClassStr() {
        return this.outputClass.getCanonicalName();
    }

    /**
     * Sets output class type to be returns on output port. This class should be found in classpath.
     * This property is required to be set from config file.
     *
     * @param outputClassStr
     * @omitFromUI
     */
    @SuppressWarnings("rawtypes")
    public void setOutputClassStr(String outputClassStr) {
        try {
            this.outputClass = (Class) Thread.currentThread().getContextClassLoader().loadClass(outputClassStr);
        } catch (Throwable cause) {
            DTThrowable.rethrow(cause);
        }
    }

    public static class LookupKeyField {
        @NotNull
        private String inputPOJOSubstituteField;
        private String dbColumnName;

        public LookupKeyField() {
            // Required by kryo
        }

        public String getInputPOJOSubstituteField() {
            return inputPOJOSubstituteField;
        }

        public void setInputPOJOSubstituteField(String inputPOJOSubstituteField) {
            this.inputPOJOSubstituteField = inputPOJOSubstituteField;
            if (this.dbColumnName == null) {
                this.dbColumnName = inputPOJOSubstituteField;
            }
        }

        public String getDbColumnName() {
            return dbColumnName;
        }

        public void setDbColumnName(String dbColumnName) {
            this.dbColumnName = dbColumnName;
        }
    }

    public static class EnrichField {
        @NotNull
        private String toOutputPOJOField;
        private String fromDBColumn;

        public EnrichField() {
            // Required by kryo
        }

        public String getToOutputPOJOField() {
            return toOutputPOJOField;
        }

        public void setToOutputPOJOField(String toOutputPOJOField) {
            this.toOutputPOJOField = toOutputPOJOField;
            if (this.fromDBColumn == null) {
                this.fromDBColumn = toOutputPOJOField;
            }
        }

        public String getFromDBColumn() {
            return fromDBColumn;
        }

        public void setFromDBColumn(String fromDBColumn) {
            this.fromDBColumn = fromDBColumn;
        }
    }

    public static class CopyField {
        private String fromInputPOJOField;
        private String toOutputPOJOField;

        public CopyField() {
            // Required by kryo
        }

        public String getFromInputPOJOField() {
            return fromInputPOJOField;
        }

        public void setFromInputPOJOField(String fromInputPOJOField) {
            this.fromInputPOJOField = fromInputPOJOField;
            if (this.toOutputPOJOField == null) {
                this.toOutputPOJOField = fromInputPOJOField;
            }
        }

        public String getToOutputPOJOField() {
            return toOutputPOJOField;
        }

        public void setToOutputPOJOField(String toOutputPOJOField) {
            this.toOutputPOJOField = toOutputPOJOField;
            if (this.fromInputPOJOField == null) {
                this.fromInputPOJOField = toOutputPOJOField;
            }
        }
    }

    @SuppressWarnings("rawtypes")
    public static class MethodMap {
        public Getter get;
        public Setter set;
    }
}