com.odiago.flumebase.io.CharBufferUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.odiago.flumebase.io.CharBufferUtils.java

Source

/**
 * Licensed to Odiago, Inc. under one or more contributor license
 * agreements.  See the NOTICE.txt file distributed with this work for
 * additional information regarding copyright ownership.  Odiago, Inc.
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the
 * License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */

package com.odiago.flumebase.io;

import java.io.UnsupportedEncodingException;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.avro.util.Utf8;

import org.apache.commons.lang.text.StrTokenizer;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.odiago.flumebase.lang.ListType;
import com.odiago.flumebase.lang.PreciseType;
import com.odiago.flumebase.lang.Timestamp;
import com.odiago.flumebase.lang.Type;

/**
 * Utility methods for parsing string-based values without
 * requiring that they be incorporated into a String object.
 */
public class CharBufferUtils {

    private static final Logger LOG = LoggerFactory.getLogger(CharBufferUtils.class.getName());

    private static final String TRUE_STR = "true";
    private static final String FALSE_STR = "false";

    private CharBufferUtils() {
    }

    /**
     * Parse a CharSequence into a bool. Only the case-sensitive values
     * "true" and "false" are recongized; others result in a ColumnParseException.
     */
    public static boolean parseBool(CharSequence chars) throws ColumnParseException {
        if (TRUE_STR.contentEquals(chars)) {
            return true;
        } else if (FALSE_STR.contentEquals(chars)) {
            return false;
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Could not parse as boolean: " + chars);
            }
            throw new ColumnParseException("Invalid boolean");
        }
    }

    /**
     * Parses a CharSequence into an integer in base 10.
     */
    public static int parseInt(CharBuffer chars) throws ColumnParseException {
        int result = 0;

        final int limit = chars.limit();
        final int start = chars.position();
        if (0 == limit - start) {
            // The empty string can not be parsed as an integer.
            throw new ColumnParseException("No value provided");
        }

        boolean isNegative = false;
        for (int pos = start; pos < limit; pos++) {
            char cur = chars.get();
            if (pos == start && cur == '-') {
                isNegative = true;
                if (limit - start == 1) {
                    // "-" is not an integer we accept.
                    throw new ColumnParseException("No integer part provided");
                }
            } else if (Character.isDigit(cur)) {
                byte digitVal = (byte) (cur - '0');
                result = result * 10 - digitVal;
                // TODO: Detect over/underflow and signal exception?
            } else {
                throw new ColumnParseException("Invalid character in number");
            }
        }

        // We built up the value as a negative, to use the larger "half" of the
        // integer range. If it's not negative, flip it on return.
        return isNegative ? result : -result;
    }

    /**
     * Parses a CharSequence into a long in base 10.
     */
    public static long parseLong(CharBuffer chars) throws ColumnParseException {
        long result = 0L;

        final int limit = chars.limit();
        final int start = chars.position();
        if (0 == limit - start) {
            // The empty string can not be parsed as an integer.
            throw new ColumnParseException("No value provided");
        }

        boolean isNegative = false;
        for (int pos = start; pos < limit; pos++) {
            char cur = chars.get();
            if (pos == start && cur == '-') {
                isNegative = true;
                if (limit - start == 1) {
                    // "-" is not an integer we accept.
                    throw new ColumnParseException("No integer part provided");
                }
            } else if (Character.isDigit(cur)) {
                byte digitVal = (byte) (cur - '0');
                result = result * 10 - digitVal;
                // TODO: Detect over/underflow and signal exception?
            } else {
                throw new ColumnParseException("Invalid character in number");
            }
        }

        // We built up the value as a negative, to use the larger "half" of the
        // integer range. If it's not negative, flip it on return.
        return isNegative ? result : -result;
    }

    /**
     * Parses a CharSequence into a floating-point value.
     */
    public static float parseFloat(CharBuffer chars) throws ColumnParseException {
        try {
            return Float.valueOf(new String(chars.array()));
        } catch (NumberFormatException nfe) {
            throw new ColumnParseException(nfe);
        }
    }

    /**
     * Parses a CharSequence into a double-precision floating-point value.
     */
    public static double parseDouble(CharBuffer chars) throws ColumnParseException {
        try {
            return Double.valueOf(new String(chars.array()));
        } catch (NumberFormatException nfe) {
            throw new ColumnParseException(nfe);
        }
    }

    public static String parseString(CharBuffer chars) throws ColumnParseException {
        return chars.toString();
    }

    /**
     * Parses a CharSequence into a list of values, all of some other type.
     */
    public static List<Object> parseList(CharBuffer chars, Type listItemType, String nullStr, String listDelim)
            throws ColumnParseException {
        StrTokenizer tokenizer = new StrTokenizer(chars.toString(), listDelim.charAt(0));
        List<Object> out = new ArrayList<Object>();

        while (tokenizer.hasNext()) {
            String part = (String) tokenizer.next();
            out.add(parseType(CharBuffer.wrap(part), listItemType, nullStr, listDelim));
        }

        return Collections.unmodifiableList(out);
    }

    /**
     * Parses a CharSequence into a value of a given expected type.
     * @param chars the unparsed characters representing the value
     * @param expectedType the expected type of the final value
     * @param nullStr a token indicating a null String instance.
     */
    public static Object parseType(CharBuffer chars, Type expectedType, String nullStr, String listDelim)
            throws ColumnParseException {
        Type.TypeName primitiveTypeName = expectedType.getPrimitiveTypeName();

        // TODO(aaron): Test how this handles a field that is an empty string.
        Object out = null;
        switch (primitiveTypeName) {
        case BINARY:
            try {
                out = ByteBuffer.wrap(chars.toString().getBytes("UTF-8"));
            } catch (UnsupportedEncodingException uee) {
                // Shouldn't ever be able to get here.
                // (http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html)
                LOG.error("Your JVM doesn't support UTF-8. This is really, really bad.");
                throw new ColumnParseException(uee);
            }
            break;
        case BOOLEAN:
            out = CharBufferUtils.parseBool(chars);
            break;
        case INT:
            out = CharBufferUtils.parseInt(chars);
            break;
        case BIGINT:
            out = CharBufferUtils.parseLong(chars);
            break;
        case FLOAT:
            out = CharBufferUtils.parseFloat(chars);
            break;
        case DOUBLE:
            out = CharBufferUtils.parseDouble(chars);
            break;
        case STRING:
            String asStr = chars.toString();
            if (expectedType.isNullable() && asStr.equals(nullStr)) {
                out = null;
            } else {
                out = new Utf8(asStr);
            }
            break;
        case TIMESTAMP:
            out = CharBufferUtils.parseLong(chars);
            if (null != out) {
                out = new Timestamp((Long) out);
            }
            break;
        case TIMESPAN:
            // TODO: This should return a TimeSpan object, which is actually two
            // fields. We need to work on this... it should not just be a 'long'
            // representation.
            out = CharBufferUtils.parseLong(chars);
            break;
        case PRECISE:
            PreciseType preciseType = PreciseType.toPreciseType(expectedType);
            out = preciseType.parseStringInput(chars.toString());
            break;
        case LIST:
            out = parseList(chars, ListType.toListType(expectedType).getElementType(), nullStr, listDelim);
            break;
        default:
            throw new ColumnParseException("Cannot parse recursive types");
        }

        return out;
    }

}