gov.jgi.meta.pig.aggregate.BLAT.java Source code

Java tutorial

Introduction

Here is the source code for gov.jgi.meta.pig.aggregate.BLAT.java

Source

/*
 * Copyright (c) 2010, The Regents of the University of California, through Lawrence Berkeley
 * National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided
 * that the following conditions are met:
 *
 * (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the
 * following disclaimer.
 *
 * (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions
 * and the following disclaimer in the documentation and/or other materials provided with the distribution.
 *
 * (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept.
 * of Energy, nor the names of its contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the
 * features, functionality or performance of the source code ("Enhancements") to anyone; however,
 * if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley
 * National Laboratory, without imposing a separate written license agreement for such Enhancements,
 * then you hereby grant the following license: a  non-exclusive, royalty-free perpetual license to install,
 * use, modify, prepare derivative works, incorporate into other computer software, distribute, and
 * sublicense such enhancements or derivative works thereof, in binary and source code form.
 */

package gov.jgi.meta.pig.aggregate;

import java.io.IOException;
import java.util.Iterator;

import org.apache.commons.lang.StringUtils;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;

/**
 * string.LOWER implements eval function to convert a string to lower case
 * Example:
 *      register pigudfs.jar;
 *      A = load 'mydata' as (name);
 *      B = foreach A generate string.LOWER(name);
 *      dump B;
 */
public class BLAT extends EvalFunc<Tuple> {

    /**
     * Method invoked on every tuple during foreach evaluation
     * @param input tuple; assumed to be a sequence tuple of the form (id, direction, sequence)
     * @exception java.io.IOException
     */
    public Tuple exec(Tuple input) throws IOException {

        DataBag values = (DataBag) input.get(0);

        if (values.size() == 0)
            return null;

        if (values.size() != 2) {
            ExecException newE = new ExecException("Error: can't merge more than 2 pairs ");
            throw newE;
        }

        Iterator<Tuple> it = values.iterator();
        Tuple seqPair1 = it.next();
        Tuple seqPair2 = it.next();

        if (!arePairedSequences(seqPair1, seqPair2)) {
            ExecException newE = new ExecException("Error: sequences are not pairs");
            throw newE;
        }
        Tuple t = TupleFactory.getInstance().newTuple(3);
        t.set(0, seqPair1.get(0));
        t.set(1, "0");
        String seq1 = seqPair1.get(2).toString();
        String seq2 = seqPair2.get(2).toString();
        t.set(2, (seq1 + StringUtils.reverse(seq2)));

        return t;
    }

    boolean arePairedSequences(Tuple s1, Tuple s2) {
        return true;
    }

}