ml.shifu.guagua.hadoop.io.GuaguaSequenceAsTextRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for ml.shifu.guagua.hadoop.io.GuaguaSequenceAsTextRecordReader.java

Source

/*
 * Copyright [2013-2014] PayPal Software Foundation
 *  
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *  
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ml.shifu.guagua.hadoop.io;

import java.io.IOException;

import ml.shifu.guagua.io.GuaguaFileSplit;
import ml.shifu.guagua.io.GuaguaRecordReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.SequenceFileAsTextRecordReader;

/**
 * A reader read HDFS sequence file key by key. The sequence key and value types are both {@link BytesWritable}.
 * 
 * <p>
 * Copy some code from {@link org.apache.hadoop.mapred.GuaguaSequenceAsTextRecordReader} but to support
 * {@link GuaguaRecordReader} interface.
 * 
 * <p>
 * If use default constructor, user should also call {@link #initialize(GuaguaFileSplit)} like in below:
 * 
 * <pre>
 * this.setRecordReader(new GuaguaSequenceAsBinaryRecordReader());
 * this.getRecordReader().initialize(fileSplit);
 * </pre>
 * 
 * or directly use other constructors:
 * 
 * <pre>
 * this.setRecordReader(new GuaguaSequenceAsTextRecordReader(fileSplit));
 * </pre>
 */
public class GuaguaSequenceAsTextRecordReader
        implements GuaguaRecordReader<GuaguaWritableAdapter<Text>, GuaguaWritableAdapter<Text>> {

    private SequenceFileAsTextRecordReader sequenceReader;

    private Configuration conf;

    private GuaguaWritableAdapter<Text> key = null;
    private GuaguaWritableAdapter<Text> value = null;

    public GuaguaSequenceAsTextRecordReader() {
        this.conf = new Configuration();
    }

    public GuaguaSequenceAsTextRecordReader(GuaguaFileSplit split) throws IOException {
        this(new Configuration(), split);
    }

    public GuaguaSequenceAsTextRecordReader(Configuration conf, GuaguaFileSplit split) throws IOException {
        this.conf = conf;
        initialize(split);
    }

    /**
     * Return the progress within the input split
     * 
     * @return 0.0 to 1.0 of the input byte range
     */
    public float getProgress() throws IOException {
        return sequenceReader.getProgress();
    }

    @Override
    public void initialize(GuaguaFileSplit split) throws IOException {
        FileSplit fileSplit = new FileSplit(new Path(split.getPath()), split.getOffset(), split.getLength(),
                (String[]) null);
        this.sequenceReader = new SequenceFileAsTextRecordReader(conf, fileSplit);
    }

    @Override
    public boolean nextKeyValue() throws IOException {
        if (key == null) {
            key = new GuaguaWritableAdapter<Text>(new Text());
        }
        if (value == null) {
            value = new GuaguaWritableAdapter<Text>(new Text());
        }
        return this.sequenceReader.next(key.getWritable(), value.getWritable());
    }

    @Override
    public GuaguaWritableAdapter<Text> getCurrentKey() {
        return key;
    }

    @Override
    public GuaguaWritableAdapter<Text> getCurrentValue() {
        return value;
    }

    @Override
    public synchronized void close() throws IOException {
        if (sequenceReader != null) {
            sequenceReader.close();
        }
    }

}