com.linkedin.cubert.io.text.TextTupleCreator.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.cubert.io.text.TextTupleCreator.java

Source

/* (c) 2014 LinkedIn Corp. All rights reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package com.linkedin.cubert.io.text;

import java.io.IOException;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.io.Text;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.codehaus.jackson.JsonNode;

import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.block.DataType;
import com.linkedin.cubert.block.TupleCreator;

/**
 * 
 * Creates tuples from text input. The separator is read from the json.
 * 
 * @author Krishna Puttaswamy
 * 
 */

public class TextTupleCreator implements TupleCreator {
    private DataType[] typeArray;
    private BlockSchema schema;
    Tuple tuple;
    private String separator = new String(new byte[] { PigTextOutputFormatWrapper.defaultDelimiter });

    @Override
    public void setup(JsonNode json) throws IOException {
        if (json.has("params")) {
            JsonNode params = json.get("params");
            if (params.has("separator")) {
                String str = params.get("separator").getTextValue();
                str = StringEscapeUtils.unescapeJava(str);
                byte[] bytes = str.getBytes("UTF-8");
                separator = new String(bytes);
            }
        }

        schema = new BlockSchema(json.get("schema"));
        typeArray = new DataType[schema.getNumColumns()];
        for (int i = 0; i < schema.getNumColumns(); i++)
            typeArray[i] = schema.getType(i);

        tuple = TupleFactory.getInstance().newTuple(schema.getNumColumns());
    }

    @Override
    public Tuple create(Object key, Object value) throws ExecException {
        Text t = (Text) value;
        String[] fields = t.toString().split(separator);

        for (int i = 0; i < fields.length; i++) {
            Object obj = null;

            if (fields[i] != null && fields[i].length() != 0)
                switch (typeArray[i]) {
                case INT:
                    obj = new Integer(Integer.parseInt(fields[i]));
                    break;

                case LONG:
                    obj = new Long(Long.parseLong(fields[i]));
                    break;

                case STRING:
                    obj = fields[i];
                    break;

                case DOUBLE:
                    obj = Double.parseDouble(fields[i]);
                    break;

                case FLOAT:
                    obj = Float.parseFloat(fields[i]);
                    break;
                default:
                    break;
                }

            tuple.set(i, obj);
        }

        return tuple;
    }
}