com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java

Source

package com.linkedin.databus2.producers.gg;
/*
 * Copyright 2013 LinkedIn Corp. All rights reserved
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.nio.charset.Charset;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.Calendar;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.avro.Schema;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.log4j.Logger;

import com.linkedin.databus.core.DbusConstants;
import com.linkedin.databus.core.util.InvalidConfigException;
import com.linkedin.databus2.core.DatabusException;
import com.linkedin.databus2.producers.ConstantPartitionFunction;
import com.linkedin.databus2.producers.PartitionFunction;
import com.linkedin.databus2.relay.config.LogicalSourceStaticConfig;
import com.linkedin.databus2.schemas.utils.SchemaHelper;

public class GGEventGenerationFactory {
    //2013-03-10:11:45:01.001000000
    private final Logger log = Logger.getLogger(getClass());

    /**
     * Given a logical source config, create a partition function.
     *
     * @param sourceConfig
     * @return the partition function
     * @throws InvalidConfigException
     */
    public static PartitionFunction buildPartitionFunction(LogicalSourceStaticConfig sourceConfig)
            throws InvalidConfigException {
        String partitionFunction = sourceConfig.getPartitionFunction();
        if (partitionFunction.startsWith("constant:")) {
            try {
                String numberPart = partitionFunction.substring("constant:".length()).trim();
                short constantPartitionNumber = Short.valueOf(numberPart);
                return new ConstantPartitionFunction(constantPartitionNumber);
            } catch (Exception ex) {
                // Could be a NumberFormatException, IndexOutOfBoundsException or other exception when trying
                // to parse the partition number.
                throw new InvalidConfigException("Invalid partition configuration (" + partitionFunction + "). "
                        + "Could not parse the constant partition number.");
            }
        } else {
            throw new InvalidConfigException("Invalid partition configuration (" + partitionFunction + ").");
        }
    }

    public static String uriToGGDir(String uri) throws DatabusException {
        if (uri == null) {
            throw new DatabusException("uri passed is null and not valid");
        }

        Pattern pattern = Pattern.compile("gg://(.*):(.*)");
        Matcher matcher = pattern.matcher(uri);
        if (!matcher.matches() || matcher.groupCount() != 2) {
            throw new DatabusException("Expected uri format for gg path not found");
        }

        return matcher.group(1);
    }

    public static String uriToXmlPrefix(String uri) throws DatabusException {
        if (uri == null) {
            throw new DatabusException("uri passed is null and not valid");
        }

        Pattern pattern = Pattern.compile("gg://(.*):(.*)");
        Matcher matcher = pattern.matcher(uri);
        if (!matcher.matches() || matcher.groupCount() != 2) {
            throw new DatabusException("Expected uri format for gg path not found");
        }

        return matcher.group(2);
    }

    public static Object stringToAvroType(String fieldValue, Schema.Field avroField) throws DatabusException {
        Schema.Type fieldType = SchemaHelper.getAnyType(avroField);
        String recordFieldName = avroField.name();

        switch (fieldType) {
        case BOOLEAN:
        case BYTES:
        case DOUBLE:
        case FLOAT:
        case INT:
        case LONG:
        case STRING:
        case NULL:
            return convertToSimpleType(fieldValue, avroField);
        case RECORD:
        case ARRAY:
            // TODO Add support for these datatypes (warning: when do so, watch out for fieldType
            // vs. avroField.schema() mismatches for arrays within unions:  see DDSDBUS-3093/3136)
            throw new DatabusException("Handling of Avro '" + fieldType + "' field type not yet implemented!");
        case ENUM:
        case FIXED:
        case MAP:
        case UNION:
        default:
            throw new DatabusException("unknown field type: " + recordFieldName + ":" + fieldType);
        }
    }

    public static Object convertToSimpleType(String fieldValue, Schema.Field avroField) throws DatabusException {
        String databaseFieldType = SchemaHelper.getMetaField(avroField, "dbFieldType");
        String recordFieldName = avroField.name();

        //return int
        if (databaseFieldType.equalsIgnoreCase("INTEGER")) {
            return new Integer(fieldValue);
        } //return long
        else if (databaseFieldType.equalsIgnoreCase("LONG")) {
            return new Long(fieldValue);
        } else if (databaseFieldType.equalsIgnoreCase("DATE")) {
            return ggDateStringToLong(fieldValue);
        } else if (databaseFieldType.equalsIgnoreCase("TIMESTAMP")) {
            return ggTimeStampStringToMilliSeconds(fieldValue);
        }
        //return float
        else if (databaseFieldType.equalsIgnoreCase("FLOAT")) {
            return new Float(fieldValue);
        }
        //return double
        else if (databaseFieldType.equalsIgnoreCase("DOUBLE")) {
            return new Double(fieldValue);
        }
        //return string
        else if (databaseFieldType.equalsIgnoreCase("CLOB")) {
            return fieldValue;
        } else if (databaseFieldType.equalsIgnoreCase("VARCHAR")) {
            return fieldValue;
        } else if (databaseFieldType.equalsIgnoreCase("VARCHAR2")) {
            return fieldValue;
        } else if (databaseFieldType.equalsIgnoreCase("NVARCHAR")) {
            return fieldValue;
        } else if (databaseFieldType.equalsIgnoreCase("NVARCHAR2")) {
            return fieldValue;
        } else if (databaseFieldType.equalsIgnoreCase("XMLTYPE")) {
            return fieldValue;
        } else if (databaseFieldType.equalsIgnoreCase("CHAR")) {
            return fieldValue;
        }
        //return bytes
        else if (databaseFieldType.equalsIgnoreCase("BLOB") || databaseFieldType.equalsIgnoreCase("RAW")) {
            if (fieldValue.length() == 0) {
                return fieldValue.getBytes(Charset.defaultCharset());
            }
            if (fieldValue.length() <= 2) {
                throw new DatabusException("Unable to decode the string because length is less than 2");
            }
            if (!isStringHex(fieldValue)) {
                throw new DatabusException("Unable to decode the string because it is not hex-encoded");
            }
            try {
                return stringToHex(fieldValue.substring(2, fieldValue.length() - 1));
            } catch (DecoderException e) {
                throw new DatabusException(
                        "Unable to decode a " + databaseFieldType + " field: " + recordFieldName);
            }
        }
        //return array
        else if (databaseFieldType.equalsIgnoreCase("ARRAY")) {
            throw new DatabusException("ARRAY type still not implemented!"); //TODO add support for array
        }
        //return record
        else if (databaseFieldType.equalsIgnoreCase("TABLE")) {
            throw new DatabusException("TABLE type still not implemented!"); //TODO add support for table
        } else {
            throw new DatabusException("unknown field type: " + recordFieldName + ":" + databaseFieldType);
        }
    }

    public static boolean isStringHex(String fieldValue) {
        if (fieldValue == null || fieldValue.length() <= 2) {
            return false;
        }
        return fieldValue.substring(0, 2).equals("0x");
    }

    public static byte[] stringToHex(String hexString) throws DecoderException {
        return Hex.decodeHex(hexString.toCharArray());
    }

    public static long ggTimeStampStringToNanoSeconds(String value) throws DatabusException {
        return (ggTimeStampStringToMilliSeconds(value) * DbusConstants.NUM_NSECS_IN_MSEC);
    }

    public static long ggTimeStampStringToMilliSeconds(String value) throws DatabusException {
        Pattern _pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2}):(\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{0,9})");
        Matcher matcher = _pattern.matcher(value);
        if (!matcher.matches() || matcher.groupCount() != 7) {
            throw new DatabusException("The timestamp format is not as expected, cannot proceed!");
        }

        Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));

        //Explicitly set ms to zero; without initialization it has random ms values :(
        calendar.set(Calendar.MILLISECOND, 0);
        calendar.set(Integer.valueOf(matcher.group(1)), Integer.valueOf(matcher.group(2)) - 1,
                Integer.valueOf(matcher.group(3)), Integer.valueOf(matcher.group(4)),
                Integer.valueOf(matcher.group(5)), Integer.valueOf(matcher.group(6)));

        //Prune to the first 3 digits or less
        String milliSecondsString = matcher.group(7);
        int maxSecondsLength = (milliSecondsString.length() > 3) ? 3 : milliSecondsString.length();
        String prunedMilliSeconds = milliSecondsString.substring(0, maxSecondsLength);

        //Add the ms value to the calendar object
        calendar.add(Calendar.MILLISECOND, Integer.valueOf(prunedMilliSeconds));
        return calendar.getTimeInMillis();
    }

    public static long ggDateStringToLong(String value) throws DatabusException {
        Pattern _pattern = Pattern.compile("(\\d{4}-\\d{2}-\\d{2}).*");
        Matcher matcher = _pattern.matcher(value);
        if (!matcher.matches() || matcher.groupCount() != 1) {
            throw new DatabusException("The date format is not as expected, cannot proceed!");
        }
        String dateFormatString = matcher.group(1);
        long dateLong = Date.valueOf(dateFormatString).getTime();
        return dateLong;
    }

}