Java tutorial
/* * Copyright (C) 2012 eXo Platform SAS. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.exoplatform; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.pig.LoadFunc; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextInputFormat; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import java.io.IOException; /** * Loads and parses standard log:<br> * YYYY-MM-DD HH:MM:SS,MMM[${ThreadName}][{$Type}][${ClassName} ${LineNumber}] ${Message} * * @author <a href="abazko@exoplatform.com">Anatoliy Bazko</a> */ public class LogLoader extends LoadFunc { /** * {@link RecordReader} in usage. */ protected RecordReader in = null; /** * {@link TupleFactory} instance. */ private final TupleFactory tumpleFactory = TupleFactory.getInstance(); /** * {@inheritDoc} */ @Override public Tuple getNext() throws IOException { try { if (!in.nextKeyValue()) { return null; } Text value = (Text) in.getCurrentValue(); DataContext context = new DataContext(value.getBytes()); Tuple tuple = parseData(context); return tuple; } catch (InterruptedException e) { throw new ExecException(e.getMessage(), e); } } private Tuple parseData(DataContext context) { Tuple tuple = tumpleFactory.newTuple(); try { tuple.append(exctractYear(context)); tuple.append(exctractMonth(context)); tuple.append(exctractDay(context)); tuple.append(exctractHour(context)); tuple.append(exctractMinute(context)); tuple.append(exctractSecond(context)); tuple.append(exctractMilliSecond(context)); tuple.append(context.exctractRemainedString()); } catch (Throwable e) { throw new IllegalStateException( "Exception during parsing data, orginal string is '" + context.getOriginalData() + "'", e); } return tuple; } private Object exctractMilliSecond(DataContext context) { context.skip(1); return context.exctractNumber(3); } private Object exctractSecond(DataContext context) { context.skip(1); return context.exctractNumber(2); } private Object exctractMinute(DataContext context) { context.skip(1); return context.exctractNumber(2); } private Object exctractHour(DataContext context) { context.skip(1); return context.exctractNumber(2); } private Object exctractDay(DataContext context) { context.skip(1); return context.exctractNumber(2); } private Object exctractMonth(DataContext context) { context.skip(1); return context.exctractNumber(2); } private Integer exctractYear(DataContext context) { return context.exctractNumber(4); } /** * {@inheritDoc} */ @Override public InputFormat getInputFormat() { return new PigTextInputFormat(); } /** * {@inheritDoc} */ @Override public void prepareToRead(RecordReader reader, PigSplit split) { in = reader; } /** * {@inheritDoc} */ @Override public void setLocation(String location, Job job) throws IOException { FileInputFormat.setInputPaths(job, location); } }