dima.kmeansseq.SequenceFile.java Source code

Java tutorial

Introduction

Here is the source code for dima.kmeansseq.SequenceFile.java

Source

package dima.kmeansseq;

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.*;
import java.util.*;
import java.rmi.server.UID;
import java.security.MessageDigest;
import org.apache.commons.logging.*;
import org.apache.hadoop.fs.*;
//import org.apache.hadoop.fs.s3.VersionMismatchException;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.VersionMismatchException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableName;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.SerializationFactory;
import org.apache.hadoop.io.serializer.Serializer;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.NativeCodeLoader;
import org.apache.hadoop.util.MergeSort;
import org.apache.hadoop.util.PriorityQueue;

/**
 * <code>SequenceFile</code>s are flat files consisting of binary key/value
 * pairs.
 * 
 * <p>
 * <code>SequenceFile</code> provides {@link Writer}, {@link Reader} and
 * {@link Sorter} classes for writing, reading and sorting respectively.
 * </p>
 * 
 * There are three <code>SequenceFile</code> <code>Writer</code>s based on the
 * {@link CompressionType} used to compress key/value pairs:
 * <ol>
 * <li>
 * <code>Writer</code> : Uncompressed records.</li>
 * <li>
 * <code>RecordCompressWriter</code> : Record-compressed files, only compress
 * values.</li>
 * <li>
 * <code>BlockCompressWriter</code> : Block-compressed files, both keys & values
 * are collected in 'blocks' separately and compressed. The size of the 'block'
 * is configurable.
 * </ol>
 * 
 * <p>
 * The actual compression algorithm used to compress key and/or values can be
 * specified by using the appropriate {@link CompressionCodec}.
 * </p>
 * 
 * <p>
 * The recommended way is to use the static <tt>createWriter</tt> methods
 * provided by the <code>SequenceFile</code> to chose the preferred format.
 * </p>
 * 
 * <p>
 * The {@link Reader} acts as the bridge and can read any of the above
 * <code>SequenceFile</code> formats.
 * </p>
 * 
 * <h4 id="Formats">SequenceFile Formats</h4>
 * 
 * <p>
 * Essentially there are 3 different formats for <code>SequenceFile</code>s
 * depending on the <code>CompressionType</code> specified. All of them share a
 * <a href="#Header">common header</a> described below.
 * 
 * <h5 id="Header">SequenceFile Header</h5>
 * <ul>
 * <li>
 * version - 3 bytes of magic header <b>SEQ</b>, followed by 1 byte of actual
 * version number (e.g. SEQ4 or SEQ6)</li>
 * <li>
 * keyClassName -key class</li>
 * <li>
 * valueClassName - value class</li>
 * <li>
 * compression - A boolean which specifies if compression is turned on for
 * keys/values in this file.</li>
 * <li>
 * blockCompression - A boolean which specifies if block-compression is turned
 * on for keys/values in this file.</li>
 * <li>
 * compression codec - <code>CompressionCodec</code> class which is used for
 * compression of keys and/or values (if compression is enabled).</li>
 * <li>
 * metadata - {@link Metadata} for this file.</li>
 * <li>
 * sync - A sync marker to denote end of the header.</li>
 * </ul>
 * 
 * <h5 id="#UncompressedFormat">Uncompressed SequenceFile Format</h5>
 * <ul>
 * <li>
 * <a href="#Header">Header</a></li>
 * <li>
 * Record
 * <ul>
 * <li>Record length</li>
 * <li>Key length</li>
 * <li>Key</li>
 * <li>Value</li>
 * </ul>
 * </li>
 * <li>
 * A sync-marker every few <code>100</code> bytes or so.</li>
 * </ul>
 * 
 * <h5 id="#RecordCompressedFormat">Record-Compressed SequenceFile Format</h5>
 * <ul>
 * <li>
 * <a href="#Header">Header</a></li>
 * <li>
 * Record
 * <ul>
 * <li>Record length</li>
 * <li>Key length</li>
 * <li>Key</li>
 * <li><i>Compressed</i> Value</li>
 * </ul>
 * </li>
 * <li>
 * A sync-marker every few <code>100</code> bytes or so.</li>
 * </ul>
 * 
 * <h5 id="#BlockCompressedFormat">Block-Compressed SequenceFile Format</h5>
 * <ul>
 * <li>
 * <a href="#Header">Header</a></li>
 * <li>
 * Record <i>Block</i>
 * <ul>
 * <li>Compressed key-lengths block-size</li>
 * <li>Compressed key-lengths block</li>
 * <li>Compressed keys block-size</li>
 * <li>Compressed keys block</li>
 * <li>Compressed value-lengths block-size</li>
 * <li>Compressed value-lengths block</li>
 * <li>Compressed values block-size</li>
 * <li>Compressed values block</li>
 * </ul>
 * </li>
 * <li>
 * A sync-marker every few <code>100</code> bytes or so.</li>
 * </ul>
 * 
 * <p>
 * The compressed blocks of key lengths and value lengths consist of the actual
 * lengths of individual keys/values encoded in ZeroCompressedInteger format.
 * </p>
 * 
 * @see CompressionCodec
 */
public class SequenceFile {
    private static final Log LOG = LogFactory.getLog(SequenceFile.class);

    private SequenceFile() {
    } // no public ctor

    private static final byte BLOCK_COMPRESS_VERSION = (byte) 4;
    private static final byte CUSTOM_COMPRESS_VERSION = (byte) 5;
    private static final byte VERSION_WITH_METADATA = (byte) 6;
    private static byte[] VERSION = new byte[] { (byte) 'S', (byte) 'E', (byte) 'Q', VERSION_WITH_METADATA };

    private static final int SYNC_ESCAPE = -1; // "length" of sync entries
    private static final int SYNC_HASH_SIZE = 16; // number of bytes in hash
    private static final int SYNC_SIZE = 4 + SYNC_HASH_SIZE; // escape + hash

    /** The number of bytes between sync points. */
    public static final int SYNC_INTERVAL = 100 * SYNC_SIZE;

    /**
     * The compression type used to compress key/value pairs in the
     * {@link SequenceFile}.
     * 
     * @see SequenceFile.Writer
     */
    public static enum CompressionType {
        /** Do not compress records. */
        NONE,
        /** Compress values only, each separately. */
        RECORD,
        /** Compress sequences of records together in blocks. */
        BLOCK
    }

    /**
     * Get the compression type for the reduce outputs
     * 
     * @param job
     *            the job config to look in
     * @return the kind of compression to use
     * @deprecated Use
     *             {@link org.apache.hadoop.mapred.SequenceFileOutputFormat#getOutputCompressionType(org.apache.hadoop.mapred.JobConf)}
     *             to get {@link CompressionType} for job-outputs.
     */
    @Deprecated
    static public CompressionType getCompressionType(Configuration job) {
        String name = job.get("io.seqfile.compression.type");
        return name == null ? CompressionType.RECORD : CompressionType.valueOf(name);
    }

    /**
     * Set the compression type for sequence files.
     * 
     * @param job
     *            the configuration to modify
     * @param val
     *            the new compression type (none, block, record)
     * @deprecated Use the one of the many SequenceFile.createWriter methods to
     *             specify the {@link CompressionType} while creating the
     *             {@link SequenceFile} or
     *             {@link org.apache.hadoop.mapred.SequenceFileOutputFormat#setOutputCompressionType(org.apache.hadoop.mapred.JobConf, org.apache.hadoop.io.SequenceFile.CompressionType)}
     *             to specify the {@link CompressionType} for job-outputs. or
     */
    @Deprecated
    static public void setCompressionType(Configuration job, CompressionType val) {
        job.set("io.seqfile.compression.type", val.toString());
    }

    /**
     * Construct the preferred type of SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param name
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass)
            throws IOException {
        return createWriter(fs, conf, name, keyClass, valClass, getCompressionType(conf));
    }

    /**
     * Construct the preferred type of SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param name
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compressionType
     *            The compression type.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
            CompressionType compressionType) throws IOException {
        return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), null,
                new Metadata());
    }

    /**
     * Construct the preferred type of SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param name
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compressionType
     *            The compression type.
     * @param progress
     *            The Progressable object to track progress.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
            CompressionType compressionType, Progressable progress) throws IOException {
        return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), progress,
                new Metadata());
    }

    /**
     * Construct the preferred type of SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param name
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compressionType
     *            The compression type.
     * @param codec
     *            The compression codec.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
            CompressionType compressionType, CompressionCodec codec) throws IOException {
        return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, null, new Metadata());
    }

    /**
     * Construct the preferred type of SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param name
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compressionType
     *            The compression type.
     * @param codec
     *            The compression codec.
     * @param progress
     *            The Progressable object to track progress.
     * @param metadata
     *            The metadata of the file.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
            CompressionType compressionType, CompressionCodec codec, Progressable progress, Metadata metadata)
            throws IOException {
        return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, progress, metadata);
    }

    /**
     * Construct the preferred type of SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param name
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param bufferSize
     *            buffer size for the underlaying outputstream.
     * @param replication
     *            replication factor for the file.
     * @param blockSize
     *            block size for the file.
     * @param compressionType
     *            The compression type.
     * @param codec
     *            The compression codec.
     * @param progress
     *            The Progressable object to track progress.
     * @param metadata
     *            The metadata of the file.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
            int bufferSize, short replication, long blockSize, CompressionType compressionType,
            CompressionCodec codec, Progressable progress, Metadata metadata) throws IOException {
        if ((codec instanceof GzipCodec) && !NativeCodeLoader.isNativeCodeLoaded()
                && !ZlibFactory.isNativeZlibLoaded(conf)) {
            throw new IllegalArgumentException(
                    "SequenceFile doesn't work with " + "GzipCodec without native-hadoop code!");
        }

        Writer writer = null;

        if (compressionType == CompressionType.NONE) {
            writer = new Writer(fs, conf, name, keyClass, valClass, bufferSize, replication, blockSize, progress,
                    metadata);
        } else if (compressionType == CompressionType.RECORD) {
            writer = new RecordCompressWriter(fs, conf, name, keyClass, valClass, bufferSize, replication,
                    blockSize, codec, progress, metadata);
        } else if (compressionType == CompressionType.BLOCK) {
            writer = new BlockCompressWriter(fs, conf, name, keyClass, valClass, bufferSize, replication, blockSize,
                    codec, progress, metadata);
        }

        return writer;
    }

    /**
     * Construct the preferred type of SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param name
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compressionType
     *            The compression type.
     * @param codec
     *            The compression codec.
     * @param progress
     *            The Progressable object to track progress.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
            CompressionType compressionType, CompressionCodec codec, Progressable progress) throws IOException {
        Writer writer = createWriter(fs, conf, name, keyClass, valClass, compressionType, codec, progress,
                new Metadata());
        return writer;
    }

    /**
     * Construct the preferred type of 'raw' SequenceFile Writer.
     * 
     * @param out
     *            The stream on top which the writer is to be constructed.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compress
     *            Compress data?
     * @param blockCompress
     *            Compress blocks?
     * @param metadata
     *            The metadata of the file.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    private static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
            boolean compress, boolean blockCompress, CompressionCodec codec, Metadata metadata) throws IOException {
        if (codec != null && (codec instanceof GzipCodec) && !NativeCodeLoader.isNativeCodeLoaded()
                && !ZlibFactory.isNativeZlibLoaded(conf)) {
            throw new IllegalArgumentException(
                    "SequenceFile doesn't work with " + "GzipCodec without native-hadoop code!");
        }

        Writer writer = null;

        if (!compress) {
            writer = new Writer(conf, out, keyClass, valClass, metadata);
        } else if (compress && !blockCompress) {
            writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
        } else {
            writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
        }

        return writer;
    }

    /**
     * Construct the preferred type of 'raw' SequenceFile Writer.
     * 
     * @param fs
     *            The configured filesystem.
     * @param conf
     *            The configuration.
     * @param file
     *            The name of the file.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compress
     *            Compress data?
     * @param blockCompress
     *            Compress blocks?
     * @param codec
     *            The compression codec.
     * @param progress
     * @param metadata
     *            The metadata of the file.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    private static Writer createWriter(FileSystem fs, Configuration conf, Path file, Class keyClass, Class valClass,
            boolean compress, boolean blockCompress, CompressionCodec codec, Progressable progress,
            Metadata metadata) throws IOException {
        if (codec != null && (codec instanceof GzipCodec) && !NativeCodeLoader.isNativeCodeLoaded()
                && !ZlibFactory.isNativeZlibLoaded(conf)) {
            throw new IllegalArgumentException(
                    "SequenceFile doesn't work with " + "GzipCodec without native-hadoop code!");
        }

        Writer writer = null;

        if (!compress) {
            writer = new Writer(fs, conf, file, keyClass, valClass, progress, metadata);
        } else if (compress && !blockCompress) {
            writer = new RecordCompressWriter(fs, conf, file, keyClass, valClass, codec, progress, metadata);
        } else {
            writer = new BlockCompressWriter(fs, conf, file, keyClass, valClass, codec, progress, metadata);
        }

        return writer;
    }

    /**
     * Construct the preferred type of 'raw' SequenceFile Writer.
     * 
     * @param conf
     *            The configuration.
     * @param out
     *            The stream on top which the writer is to be constructed.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compressionType
     *            The compression type.
     * @param codec
     *            The compression codec.
     * @param metadata
     *            The metadata of the file.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
            CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException {
        if ((codec instanceof GzipCodec) && !NativeCodeLoader.isNativeCodeLoaded()
                && !ZlibFactory.isNativeZlibLoaded(conf)) {
            throw new IllegalArgumentException(
                    "SequenceFile doesn't work with " + "GzipCodec without native-hadoop code!");
        }

        Writer writer = null;

        if (compressionType == CompressionType.NONE) {
            writer = new Writer(conf, out, keyClass, valClass, metadata);
        } else if (compressionType == CompressionType.RECORD) {
            writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
        } else if (compressionType == CompressionType.BLOCK) {
            writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
        }

        return writer;
    }

    /**
     * Construct the preferred type of 'raw' SequenceFile Writer.
     * 
     * @param conf
     *            The configuration.
     * @param out
     *            The stream on top which the writer is to be constructed.
     * @param keyClass
     *            The 'key' type.
     * @param valClass
     *            The 'value' type.
     * @param compressionType
     *            The compression type.
     * @param codec
     *            The compression codec.
     * @return Returns the handle to the constructed SequenceFile Writer.
     * @throws IOException
     */
    public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
            CompressionType compressionType, CompressionCodec codec) throws IOException {
        Writer writer = createWriter(conf, out, keyClass, valClass, compressionType, codec, new Metadata());
        return writer;
    }

    /** The interface to 'raw' values of SequenceFiles. */
    public static interface ValueBytes {

        /**
         * Writes the uncompressed bytes to the outStream.
         * 
         * @param outStream
         *            : Stream to write uncompressed bytes into.
         * @throws IOException
         */
        public void writeUncompressedBytes(DataOutputStream outStream) throws IOException;

        /**
         * Write compressed bytes to outStream. Note: that it will NOT compress
         * the bytes if they are not compressed.
         * 
         * @param outStream
         *            : Stream to write compressed bytes into.
         */
        public void writeCompressedBytes(DataOutputStream outStream) throws IllegalArgumentException, IOException;

        /**
         * Size of stored data.
         */
        public int getSize();
    }

    private static class UncompressedBytes implements ValueBytes {
        private int dataSize;
        private byte[] data;

        private UncompressedBytes() {
            data = null;
            dataSize = 0;
        }

        private void reset(DataInputStream in, int length) throws IOException {
            data = new byte[length];
            dataSize = -1;

            in.readFully(data);
            dataSize = data.length;
        }

        public int getSize() {
            return dataSize;
        }

        public void writeUncompressedBytes(DataOutputStream outStream) throws IOException {
            outStream.write(data, 0, dataSize);
        }

        public void writeCompressedBytes(DataOutputStream outStream) throws IllegalArgumentException, IOException {
            throw new IllegalArgumentException("UncompressedBytes cannot be compressed!");
        }

    } // UncompressedBytes

    private static class CompressedBytes implements ValueBytes {
        private int dataSize;
        private byte[] data;
        DataInputBuffer rawData = null;
        CompressionCodec codec = null;
        CompressionInputStream decompressedStream = null;

        private CompressedBytes(CompressionCodec codec) {
            data = null;
            dataSize = 0;
            this.codec = codec;
        }

        private void reset(DataInputStream in, int length) throws IOException {
            data = new byte[length];
            dataSize = -1;

            in.readFully(data);
            dataSize = data.length;
        }

        public int getSize() {
            return dataSize;
        }

        public void writeUncompressedBytes(DataOutputStream outStream) throws IOException {
            if (decompressedStream == null) {
                rawData = new DataInputBuffer();
                decompressedStream = codec.createInputStream(rawData);
            } else {
                decompressedStream.resetState();
            }
            rawData.reset(data, 0, dataSize);

            byte[] buffer = new byte[8192];
            int bytesRead = 0;
            while ((bytesRead = decompressedStream.read(buffer, 0, 8192)) != -1) {
                outStream.write(buffer, 0, bytesRead);
            }
        }

        public void writeCompressedBytes(DataOutputStream outStream) throws IllegalArgumentException, IOException {
            outStream.write(data, 0, dataSize);
        }

    } // CompressedBytes

    /**
     * The class encapsulating with the metadata of a file. The metadata of a
     * file is a list of attribute name/value pairs of Text type.
     * 
     */
    public static class Metadata implements Writable {

        private TreeMap<Text, Text> theMetadata;

        public Metadata() {
            this(new TreeMap<Text, Text>());
        }

        public Metadata(TreeMap<Text, Text> arg) {
            if (arg == null) {
                this.theMetadata = new TreeMap<Text, Text>();
            } else {
                this.theMetadata = arg;
            }
        }

        public Text get(Text name) {
            return this.theMetadata.get(name);
        }

        public void set(Text name, Text value) {
            this.theMetadata.put(name, value);
        }

        public TreeMap<Text, Text> getMetadata() {
            return new TreeMap<Text, Text>(this.theMetadata);
        }

        public void write(DataOutput out) throws IOException {
            out.writeInt(this.theMetadata.size());
            Iterator<Map.Entry<Text, Text>> iter = this.theMetadata.entrySet().iterator();
            while (iter.hasNext()) {
                Map.Entry<Text, Text> en = iter.next();
                en.getKey().write(out);
                en.getValue().write(out);
            }
        }

        public void readFields(DataInput in) throws IOException {
            int sz = in.readInt();
            if (sz < 0)
                throw new IOException("Invalid size: " + sz + " for file metadata object");
            this.theMetadata = new TreeMap<Text, Text>();
            for (int i = 0; i < sz; i++) {
                Text key = new Text();
                Text val = new Text();
                key.readFields(in);
                val.readFields(in);
                this.theMetadata.put(key, val);
            }
        }

        public boolean equals(Metadata other) {
            if (other == null)
                return false;
            if (this.theMetadata.size() != other.theMetadata.size()) {
                return false;
            }
            Iterator<Map.Entry<Text, Text>> iter1 = this.theMetadata.entrySet().iterator();
            Iterator<Map.Entry<Text, Text>> iter2 = other.theMetadata.entrySet().iterator();
            while (iter1.hasNext() && iter2.hasNext()) {
                Map.Entry<Text, Text> en1 = iter1.next();
                Map.Entry<Text, Text> en2 = iter2.next();
                if (!en1.getKey().equals(en2.getKey())) {
                    return false;
                }
                if (!en1.getValue().equals(en2.getValue())) {
                    return false;
                }
            }
            if (iter1.hasNext() || iter2.hasNext()) {
                return false;
            }
            return true;
        }

        public int hashCode() {
            assert false : "hashCode not designed";
            return 42; // any arbitrary constant will do
        }

        public String toString() {
            StringBuffer sb = new StringBuffer();
            sb.append("size: ").append(this.theMetadata.size()).append("\n");
            Iterator<Map.Entry<Text, Text>> iter = this.theMetadata.entrySet().iterator();
            while (iter.hasNext()) {
                Map.Entry<Text, Text> en = iter.next();
                sb.append("\t").append(en.getKey().toString()).append("\t").append(en.getValue().toString());
                sb.append("\n");
            }
            return sb.toString();
        }
    }

    /** Write key/value pairs to a sequence-format file. */
    public static class Writer implements java.io.Closeable {
        Configuration conf;
        FSDataOutputStream out;
        boolean ownOutputStream = true;
        DataOutputBuffer buffer = new DataOutputBuffer();

        Class keyClass;
        Class valClass;

        private boolean compress;
        CompressionCodec codec = null;
        CompressionOutputStream deflateFilter = null;
        DataOutputStream deflateOut = null;
        Metadata metadata = null;
        Compressor compressor = null;

        protected Serializer keySerializer;
        protected Serializer uncompressedValSerializer;
        protected Serializer compressedValSerializer;

        // Insert a globally unique 16-byte value every few entries, so that one
        // can seek into the middle of a file and then synchronize with record
        // starts and ends by scanning for this value.
        long lastSyncPos; // position of last sync
        byte[] sync; // 16 random bytes
        {
            try {
                MessageDigest digester = MessageDigest.getInstance("MD5");
                long time = System.currentTimeMillis();
                digester.update((new UID() + "@" + time).getBytes());
                sync = digester.digest();
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        /** Implicit constructor: needed for the period of transition! */
        Writer() {
        }

        /** Create the named file. */
        public Writer(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass)
                throws IOException {
            this(fs, conf, name, keyClass, valClass, null, new Metadata());
        }

        /** Create the named file with write-progress reporter. */
        public Writer(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                Progressable progress, Metadata metadata) throws IOException {
            this(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                    fs.getDefaultReplication(), fs.getDefaultBlockSize(), progress, metadata);
        }

        /** Create the named file with write-progress reporter. */
        public Writer(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, int bufferSize,
                short replication, long blockSize, Progressable progress, Metadata metadata) throws IOException {
            init(name, conf, fs.create(name, true, bufferSize, replication, blockSize, progress), keyClass,
                    valClass, false, null, metadata);
            initializeFileHeader();
            writeFileHeader();
            finalizeFileHeader();
        }

        /** Write to an arbitrary stream using a specified buffer size. */
        private Writer(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
                Metadata metadata) throws IOException {
            this.ownOutputStream = false;
            init(null, conf, out, keyClass, valClass, false, null, metadata);

            initializeFileHeader();
            writeFileHeader();
            finalizeFileHeader();
        }

        /** Write the initial part of file header. */
        void initializeFileHeader() throws IOException {
            out.write(VERSION);
        }

        /** Write the final part of file header. */
        void finalizeFileHeader() throws IOException {
            out.write(sync); // write the sync bytes
            out.flush(); // flush header
        }

        boolean isCompressed() {
            return compress;
        }

        boolean isBlockCompressed() {
            return false;
        }

        /** Write and flush the file header. */
        void writeFileHeader() throws IOException {
            Text.writeString(out, keyClass.getName());
            Text.writeString(out, valClass.getName());

            out.writeBoolean(this.isCompressed());
            out.writeBoolean(this.isBlockCompressed());

            if (this.isCompressed()) {
                Text.writeString(out, (codec.getClass()).getName());
            }
            this.metadata.write(out);
        }

        /** Initialize. */
        @SuppressWarnings("unchecked")
        void init(Path name, Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
                boolean compress, CompressionCodec codec, Metadata metadata) throws IOException {
            this.conf = conf;
            this.out = out;
            this.keyClass = keyClass;
            this.valClass = valClass;
            this.compress = compress;
            this.codec = codec;
            this.metadata = metadata;
            SerializationFactory serializationFactory = new SerializationFactory(conf);
            this.keySerializer = serializationFactory.getSerializer(keyClass);
            this.keySerializer.open(buffer);
            this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
            this.uncompressedValSerializer.open(buffer);
            if (this.codec != null) {
                ReflectionUtils.setConf(this.codec, this.conf);
                this.compressor = CodecPool.getCompressor(this.codec);
                this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
                this.deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
                this.compressedValSerializer = serializationFactory.getSerializer(valClass);
                this.compressedValSerializer.open(deflateOut);
            }
        }

        /** Returns the class of keys in this file. */
        public Class getKeyClass() {
            return keyClass;
        }

        /** Returns the class of values in this file. */
        public Class getValueClass() {
            return valClass;
        }

        /** Returns the compression codec of data in this file. */
        public CompressionCodec getCompressionCodec() {
            return codec;
        }

        /** create a sync point */
        public void sync() throws IOException {
            if (sync != null && lastSyncPos != out.getPos()) {
                out.writeInt(SYNC_ESCAPE); // mark the start of the sync
                out.write(sync); // write sync
                lastSyncPos = out.getPos(); // update lastSyncPos
            }
        }

        /** flush all currently written data to the file system */
        public void syncFs() throws IOException {
            if (out != null) {
                out.sync(); // flush contents to file system
            }
        }

        /** Returns the configuration of this file. */
        Configuration getConf() {
            return conf;
        }

        /** Close the file. */
        public synchronized void close() throws IOException {
            keySerializer.close();
            uncompressedValSerializer.close();
            if (compressedValSerializer != null) {
                compressedValSerializer.close();
            }

            CodecPool.returnCompressor(compressor);
            compressor = null;

            if (out != null) {

                // Close the underlying stream iff we own it...
                if (ownOutputStream) {
                    out.close();
                } else {
                    out.flush();
                }
                out = null;
            }
        }

        synchronized void checkAndWriteSync() throws IOException {
            if (sync != null && out.getPos() >= lastSyncPos + SYNC_INTERVAL) { // time
                                                                               // to
                                                                               // emit
                                                                               // sync
                sync();
            }
        }

        /** Append a key/value pair. */
        public synchronized void append(Writable key, Writable val) throws IOException {
            append((Object) key, (Object) val);
        }

        /** Append a key/value pair. */
        @SuppressWarnings("unchecked")
        public synchronized void append(Object key, Object val) throws IOException {
            if (key.getClass() != keyClass)
                throw new IOException("wrong key class: " + key.getClass().getName() + " is not " + keyClass);
            if (val.getClass() != valClass)
                throw new IOException("wrong value class: " + val.getClass().getName() + " is not " + valClass);

            buffer.reset();

            // Append the 'key'
            keySerializer.serialize(key);
            int keyLength = buffer.getLength();
            if (keyLength < 0)
                throw new IOException("negative length keys not allowed: " + key);

            // Append the 'value'
            if (compress) {
                deflateFilter.resetState();
                compressedValSerializer.serialize(val);
                deflateOut.flush();
                deflateFilter.finish();
            } else {
                uncompressedValSerializer.serialize(val);
            }

            // Write the record out
            checkAndWriteSync(); // sync
            out.writeInt(buffer.getLength()); // total record length
            out.writeInt(keyLength); // key portion length
            out.write(buffer.getData(), 0, buffer.getLength()); // data
        }

        public synchronized void appendRaw(byte[] keyData, int keyOffset, int keyLength, ValueBytes val)
                throws IOException {
            if (keyLength < 0)
                throw new IOException("negative length keys not allowed: " + keyLength);

            int valLength = val.getSize();

            checkAndWriteSync();

            out.writeInt(keyLength + valLength); // total record length
            out.writeInt(keyLength); // key portion length
            out.write(keyData, keyOffset, keyLength); // key
            val.writeUncompressedBytes(out); // value
        }

        /**
         * Returns the current length of the output file.
         * 
         * <p>
         * This always returns a synchronized position. In other words,
         * immediately after calling {@link SequenceFile.Reader#seek(long)} with
         * a position returned by this method,
         * {@link SequenceFile.Reader#next(Writable)} may be called. However the
         * key may be earlier in the file than key last written when this method
         * was called (e.g., with block-compression, it may be the first key in
         * the block that was being written when this method was called).
         */
        public synchronized long getLength() throws IOException {
            return out.getPos();
        }

    } // class Writer

    /** Write key/compressed-value pairs to a sequence-format file. */
    static class RecordCompressWriter extends Writer {

        /** Create the named file. */
        public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                CompressionCodec codec) throws IOException {
            this(conf, fs.create(name), keyClass, valClass, codec, new Metadata());
        }

        /** Create the named file with write-progress reporter. */
        public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                CompressionCodec codec, Progressable progress, Metadata metadata) throws IOException {
            this(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                    fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec, progress, metadata);
        }

        /** Create the named file with write-progress reporter. */
        public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                int bufferSize, short replication, long blockSize, CompressionCodec codec, Progressable progress,
                Metadata metadata) throws IOException {
            super.init(name, conf, fs.create(name, true, bufferSize, replication, blockSize, progress), keyClass,
                    valClass, true, codec, metadata);

            initializeFileHeader();
            writeFileHeader();
            finalizeFileHeader();
        }

        /** Create the named file with write-progress reporter. */
        public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                CompressionCodec codec, Progressable progress) throws IOException {
            this(fs, conf, name, keyClass, valClass, codec, progress, new Metadata());
        }

        /** Write to an arbitrary stream using a specified buffer size. */
        private RecordCompressWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
                CompressionCodec codec, Metadata metadata) throws IOException {
            this.ownOutputStream = false;
            super.init(null, conf, out, keyClass, valClass, true, codec, metadata);

            initializeFileHeader();
            writeFileHeader();
            finalizeFileHeader();

        }

        boolean isCompressed() {
            return true;
        }

        boolean isBlockCompressed() {
            return false;
        }

        /** Append a key/value pair. */
        @SuppressWarnings("unchecked")
        public synchronized void append(Object key, Object val) throws IOException {
            if (key.getClass() != keyClass)
                throw new IOException("wrong key class: " + key.getClass().getName() + " is not " + keyClass);
            if (val.getClass() != valClass)
                throw new IOException("wrong value class: " + val.getClass().getName() + " is not " + valClass);

            buffer.reset();

            // Append the 'key'
            keySerializer.serialize(key);
            int keyLength = buffer.getLength();
            if (keyLength < 0)
                throw new IOException("negative length keys not allowed: " + key);

            // Compress 'value' and append it
            deflateFilter.resetState();
            compressedValSerializer.serialize(val);
            deflateOut.flush();
            deflateFilter.finish();

            // Write the record out
            checkAndWriteSync(); // sync
            out.writeInt(buffer.getLength()); // total record length
            out.writeInt(keyLength); // key portion length
            out.write(buffer.getData(), 0, buffer.getLength()); // data
        }

        /** Append a key/value pair. */
        public synchronized void appendRaw(byte[] keyData, int keyOffset, int keyLength, ValueBytes val)
                throws IOException {

            if (keyLength < 0)
                throw new IOException("negative length keys not allowed: " + keyLength);

            int valLength = val.getSize();

            checkAndWriteSync(); // sync
            out.writeInt(keyLength + valLength); // total record length
            out.writeInt(keyLength); // key portion length
            out.write(keyData, keyOffset, keyLength); // 'key' data
            val.writeCompressedBytes(out); // 'value' data
        }

    } // RecordCompressionWriter

    /** Write compressed key/value blocks to a sequence-format file. */
    static class BlockCompressWriter extends Writer {

        private int noBufferedRecords = 0;

        private DataOutputBuffer keyLenBuffer = new DataOutputBuffer();
        private DataOutputBuffer keyBuffer = new DataOutputBuffer();

        private DataOutputBuffer valLenBuffer = new DataOutputBuffer();
        private DataOutputBuffer valBuffer = new DataOutputBuffer();

        private int compressionBlockSize;

        /** Create the named file. */
        public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                CompressionCodec codec) throws IOException {
            this(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                    fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec, null, new Metadata());
        }

        /** Create the named file with write-progress reporter. */
        public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                CompressionCodec codec, Progressable progress, Metadata metadata) throws IOException {
            this(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
                    fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec, progress, metadata);
        }

        /** Create the named file with write-progress reporter. */
        public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                int bufferSize, short replication, long blockSize, CompressionCodec codec, Progressable progress,
                Metadata metadata) throws IOException {
            super.init(name, conf, fs.create(name, true, bufferSize, replication, blockSize, progress), keyClass,
                    valClass, true, codec, metadata);
            init(conf.getInt("io.seqfile.compress.blocksize", 1000000));

            initializeFileHeader();
            writeFileHeader();
            finalizeFileHeader();
        }

        /** Create the named file with write-progress reporter. */
        public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
                CompressionCodec codec, Progressable progress) throws IOException {
            this(fs, conf, name, keyClass, valClass, codec, progress, new Metadata());
        }

        /** Write to an arbitrary stream using a specified buffer size. */
        private BlockCompressWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
                CompressionCodec codec, Metadata metadata) throws IOException {
            this.ownOutputStream = false;
            super.init(null, conf, out, keyClass, valClass, true, codec, metadata);
            init(1000000);

            initializeFileHeader();
            writeFileHeader();
            finalizeFileHeader();
        }

        boolean isCompressed() {
            return true;
        }

        boolean isBlockCompressed() {
            return true;
        }

        /** Initialize */
        void init(int compressionBlockSize) throws IOException {
            this.compressionBlockSize = compressionBlockSize;
            keySerializer.close();
            keySerializer.open(keyBuffer);
            uncompressedValSerializer.close();
            uncompressedValSerializer.open(valBuffer);
        }

        /** Workhorse to check and write out compressed data/lengths */
        private synchronized void writeBuffer(DataOutputBuffer uncompressedDataBuffer) throws IOException {
            deflateFilter.resetState();
            buffer.reset();
            deflateOut.write(uncompressedDataBuffer.getData(), 0, uncompressedDataBuffer.getLength());
            deflateOut.flush();
            deflateFilter.finish();

            WritableUtils.writeVInt(out, buffer.getLength());
            out.write(buffer.getData(), 0, buffer.getLength());
        }

        /** Compress and flush contents to dfs */
        public synchronized void sync() throws IOException {
            if (noBufferedRecords > 0) {
                super.sync();

                // No. of records
                WritableUtils.writeVInt(out, noBufferedRecords);

                // Write 'keys' and lengths
                writeBuffer(keyLenBuffer);
                writeBuffer(keyBuffer);

                // Write 'values' and lengths
                writeBuffer(valLenBuffer);
                writeBuffer(valBuffer);

                // Flush the file-stream
                out.flush();

                // Reset internal states
                keyLenBuffer.reset();
                keyBuffer.reset();
                valLenBuffer.reset();
                valBuffer.reset();
                noBufferedRecords = 0;
            }

        }

        /** Close the file. */
        public synchronized void close() throws IOException {
            if (out != null) {
                sync();
            }
            super.close();
        }

        /** Append a key/value pair. */
        @SuppressWarnings("unchecked")
        public synchronized void append(Object key, Object val) throws IOException {
            if (key.getClass() != keyClass)
                throw new IOException("wrong key class: " + key + " is not " + keyClass);
            if (val.getClass() != valClass)
                throw new IOException("wrong value class: " + val + " is not " + valClass);

            // Save key/value into respective buffers
            int oldKeyLength = keyBuffer.getLength();
            keySerializer.serialize(key);
            int keyLength = keyBuffer.getLength() - oldKeyLength;
            if (keyLength < 0)
                throw new IOException("negative length keys not allowed: " + key);
            WritableUtils.writeVInt(keyLenBuffer, keyLength);

            int oldValLength = valBuffer.getLength();
            uncompressedValSerializer.serialize(val);
            int valLength = valBuffer.getLength() - oldValLength;
            WritableUtils.writeVInt(valLenBuffer, valLength);

            // Added another key/value pair
            ++noBufferedRecords;

            // Compress and flush?
            int currentBlockSize = keyBuffer.getLength() + valBuffer.getLength();
            if (currentBlockSize >= compressionBlockSize) {
                sync();
            }
        }

        /** Append a key/value pair. */
        public synchronized void appendRaw(byte[] keyData, int keyOffset, int keyLength, ValueBytes val)
                throws IOException {

            if (keyLength < 0)
                throw new IOException("negative length keys not allowed");

            int valLength = val.getSize();

            // Save key/value data in relevant buffers
            WritableUtils.writeVInt(keyLenBuffer, keyLength);
            keyBuffer.write(keyData, keyOffset, keyLength);
            WritableUtils.writeVInt(valLenBuffer, valLength);
            val.writeUncompressedBytes(valBuffer);

            // Added another key/value pair
            ++noBufferedRecords;

            // Compress and flush?
            int currentBlockSize = keyBuffer.getLength() + valBuffer.getLength();
            if (currentBlockSize >= compressionBlockSize) {
                sync();
            }
        }

    } // BlockCompressionWriter

    /** Reads key/value pairs from a sequence-format file. */
    public static class Reader implements java.io.Closeable {
        private Path file;
        private FSDataInputStream in;
        private DataOutputBuffer outBuf = new DataOutputBuffer();

        private byte version;

        private String keyClassName;
        private String valClassName;
        private Class keyClass;
        private Class valClass;

        private CompressionCodec codec = null;
        private Metadata metadata = null;

        private byte[] sync = new byte[SYNC_HASH_SIZE];
        private byte[] syncCheck = new byte[SYNC_HASH_SIZE];
        private boolean syncSeen;

        private long end;
        private int keyLength;
        private int recordLength;

        private boolean decompress;
        private boolean blockCompressed;

        private Configuration conf;

        private int noBufferedRecords = 0;
        private boolean lazyDecompress = true;
        private boolean valuesDecompressed = true;

        private int noBufferedKeys = 0;
        private int noBufferedValues = 0;

        private DataInputBuffer keyLenBuffer = null;
        private CompressionInputStream keyLenInFilter = null;
        private DataInputStream keyLenIn = null;
        private Decompressor keyLenDecompressor = null;
        private DataInputBuffer keyBuffer = null;
        private CompressionInputStream keyInFilter = null;
        private DataInputStream keyIn = null;
        private Decompressor keyDecompressor = null;

        private DataInputBuffer valLenBuffer = null;
        private CompressionInputStream valLenInFilter = null;
        private DataInputStream valLenIn = null;
        private Decompressor valLenDecompressor = null;
        private DataInputBuffer valBuffer = null;
        private CompressionInputStream valInFilter = null;
        private DataInputStream valIn = null;
        private Decompressor valDecompressor = null;

        private Deserializer keyDeserializer;
        private Deserializer valDeserializer;

        /** Open the named file. */
        public Reader(FileSystem fs, Path file, Configuration conf) throws IOException {
            this(fs, file, conf.getInt("io.file.buffer.size", 4096), conf, false);
        }

        private Reader(FileSystem fs, Path file, int bufferSize, Configuration conf, boolean tempReader)
                throws IOException {
            this(fs, file, bufferSize, 0, fs.getLength(file), conf, tempReader);
        }

        private Reader(FileSystem fs, Path file, int bufferSize, long start, long length, Configuration conf,
                boolean tempReader) throws IOException {
            this.file = file;
            this.in = openFile(fs, file, bufferSize, length);
            this.conf = conf;
            seek(start);
            this.end = in.getPos() + length;
            init(tempReader);
        }

        /**
         * Override this method to specialize the type of
         * {@link FSDataInputStream} returned.
         */
        protected FSDataInputStream openFile(FileSystem fs, Path file, int bufferSize, long length)
                throws IOException {
            return fs.open(file, bufferSize);
        }

        /**
         * Initialize the {@link Reader}
         * 
         * @param tmpReader
         *            <code>true</code> if we are constructing a temporary
         *            reader {@link SequenceFile.Sorter.cloneFileAttributes},
         *            and hence do not initialize every component;
         *            <code>false</code> otherwise.
         * @throws IOException
         */
        private void init(boolean tempReader) throws IOException {
            byte[] versionBlock = new byte[VERSION.length];
            in.readFully(versionBlock);

            if ((versionBlock[0] != VERSION[0]) || (versionBlock[1] != VERSION[1])
                    || (versionBlock[2] != VERSION[2]))
                throw new IOException(file + " not a SequenceFile");

            // Set 'version'
            version = versionBlock[3];
            if (version > VERSION[3])
                throw new VersionMismatchException(VERSION[3], version);

            if (version < BLOCK_COMPRESS_VERSION) {
                UTF8 className = new UTF8();

                className.readFields(in);
                keyClassName = className.toString(); // key class name

                className.readFields(in);
                valClassName = className.toString(); // val class name
            } else {
                keyClassName = Text.readString(in);
                valClassName = Text.readString(in);
            }

            if (version > 2) { // if version > 2
                this.decompress = in.readBoolean(); // is compressed?
            } else {
                decompress = false;
            }

            if (version >= BLOCK_COMPRESS_VERSION) { // if version >= 4
                this.blockCompressed = in.readBoolean(); // is block-compressed?
            } else {
                blockCompressed = false;
            }

            // if version >= 5
            // setup the compression codec
            if (decompress) {
                if (version >= CUSTOM_COMPRESS_VERSION) {
                    String codecClassname = Text.readString(in);
                    try {
                        Class<? extends CompressionCodec> codecClass = conf.getClassByName(codecClassname)
                                .asSubclass(CompressionCodec.class);
                        this.codec = ReflectionUtils.newInstance(codecClass, conf);
                    } catch (ClassNotFoundException cnfe) {
                        throw new IllegalArgumentException("Unknown codec: " + codecClassname, cnfe);
                    }
                } else {
                    codec = new DefaultCodec();
                    ((Configurable) codec).setConf(conf);
                }
            }

            this.metadata = new Metadata();
            if (version >= VERSION_WITH_METADATA) { // if version >= 6
                this.metadata.readFields(in);
            }

            if (version > 1) { // if version > 1
                in.readFully(sync); // read sync bytes
            }

            // Initialize... *not* if this we are constructing a temporary
            // Reader
            if (!tempReader) {
                valBuffer = new DataInputBuffer();
                if (decompress) {
                    valDecompressor = CodecPool.getDecompressor(codec);
                    valInFilter = codec.createInputStream(valBuffer, valDecompressor);
                    valIn = new DataInputStream(valInFilter);
                } else {
                    valIn = valBuffer;
                }

                if (blockCompressed) {
                    keyLenBuffer = new DataInputBuffer();
                    keyBuffer = new DataInputBuffer();
                    valLenBuffer = new DataInputBuffer();

                    keyLenDecompressor = CodecPool.getDecompressor(codec);
                    keyLenInFilter = codec.createInputStream(keyLenBuffer, keyLenDecompressor);
                    keyLenIn = new DataInputStream(keyLenInFilter);

                    keyDecompressor = CodecPool.getDecompressor(codec);
                    keyInFilter = codec.createInputStream(keyBuffer, keyDecompressor);
                    keyIn = new DataInputStream(keyInFilter);

                    valLenDecompressor = CodecPool.getDecompressor(codec);
                    valLenInFilter = codec.createInputStream(valLenBuffer, valLenDecompressor);
                    valLenIn = new DataInputStream(valLenInFilter);
                }

                SerializationFactory serializationFactory = new SerializationFactory(conf);
                this.keyDeserializer = getDeserializer(serializationFactory, getKeyClass());
                if (!blockCompressed) {
                    this.keyDeserializer.open(valBuffer);
                } else {
                    this.keyDeserializer.open(keyIn);
                }
                this.valDeserializer = getDeserializer(serializationFactory, getValueClass());
                this.valDeserializer.open(valIn);
            }
        }

        @SuppressWarnings("unchecked")
        private Deserializer getDeserializer(SerializationFactory sf, Class c) {
            return sf.getDeserializer(c);
        }

        /** Close the file. */
        public synchronized void close() throws IOException {
            // Return the decompressors to the pool
            CodecPool.returnDecompressor(keyLenDecompressor);
            CodecPool.returnDecompressor(keyDecompressor);
            CodecPool.returnDecompressor(valLenDecompressor);
            CodecPool.returnDecompressor(valDecompressor);
            keyLenDecompressor = keyDecompressor = null;
            valLenDecompressor = valDecompressor = null;

            if (keyDeserializer != null) {
                keyDeserializer.close();
            }
            if (valDeserializer != null) {
                valDeserializer.close();
            }

            // Close the input-stream
            in.close();
        }

        /** Returns the name of the key class. */
        public String getKeyClassName() {
            return keyClassName;
        }

        /** Returns the class of keys in this file. */
        public synchronized Class<?> getKeyClass() {
            if (null == keyClass) {
                try {
                    keyClass = WritableName.getClass(getKeyClassName(), conf);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return keyClass;
        }

        /** Returns the name of the value class. */
        public String getValueClassName() {
            return valClassName;
        }

        /** Returns the class of values in this file. */
        public synchronized Class<?> getValueClass() {
            if (null == valClass) {
                try {
                    valClass = WritableName.getClass(getValueClassName(), conf);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return valClass;
        }

        /** Returns true if values are compressed. */
        public boolean isCompressed() {
            return decompress;
        }

        /** Returns true if records are block-compressed. */
        public boolean isBlockCompressed() {
            return blockCompressed;
        }

        /** Returns the compression codec of data in this file. */
        public CompressionCodec getCompressionCodec() {
            return codec;
        }

        /** Returns the metadata object of the file */
        public Metadata getMetadata() {
            return this.metadata;
        }

        /** Returns the configuration used for this file. */
        Configuration getConf() {
            return conf;
        }

        /** Read a compressed buffer */
        private synchronized void readBuffer(DataInputBuffer buffer, CompressionInputStream filter)
                throws IOException {
            // Read data into a temporary buffer
            DataOutputBuffer dataBuffer = new DataOutputBuffer();

            try {
                // hadoop import
                int dataBufferLength = WritableUtils.readVInt(in);
                dataBuffer.write(in, dataBufferLength);

                // Set up 'buffer' connected to the input-stream
                buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
            } finally {
                dataBuffer.close();
            }

            // Reset the codec
            filter.resetState();
        }

        /** Read the next 'compressed' block */
        private synchronized void readBlock() throws IOException {
            // Check if we need to throw away a whole block of
            // 'values' due to 'lazy decompression'
            if (lazyDecompress && !valuesDecompressed) {
                in.seek(WritableUtils.readVInt(in) + in.getPos());
                in.seek(WritableUtils.readVInt(in) + in.getPos());
            }

            // Reset internal states
            noBufferedKeys = 0;
            noBufferedValues = 0;
            noBufferedRecords = 0;
            valuesDecompressed = false;

            // Process sync
            if (sync != null) {
                in.readInt();
                in.readFully(syncCheck); // read syncCheck
                if (!Arrays.equals(sync, syncCheck)) // check it
                    throw new IOException("File is corrupt!");
            }
            syncSeen = true;

            // Read number of records in this block
            noBufferedRecords = WritableUtils.readVInt(in);

            // Read key lengths and keys
            readBuffer(keyLenBuffer, keyLenInFilter);
            readBuffer(keyBuffer, keyInFilter);
            noBufferedKeys = noBufferedRecords;

            // Read value lengths and values
            if (!lazyDecompress) {
                readBuffer(valLenBuffer, valLenInFilter);
                readBuffer(valBuffer, valInFilter);
                noBufferedValues = noBufferedRecords;
                valuesDecompressed = true;
            }
        }

        /**
         * Position valLenIn/valIn to the 'value' corresponding to the 'current'
         * key
         */
        private synchronized void seekToCurrentValue() throws IOException {
            if (!blockCompressed) {
                if (decompress) {
                    valInFilter.resetState();
                }
                valBuffer.reset();
            } else {
                // Check if this is the first value in the 'block' to be read
                if (lazyDecompress && !valuesDecompressed) {
                    // Read the value lengths and values
                    readBuffer(valLenBuffer, valLenInFilter);
                    readBuffer(valBuffer, valInFilter);
                    noBufferedValues = noBufferedRecords;
                    valuesDecompressed = true;
                }

                // Calculate the no. of bytes to skip
                // Note: 'current' key has already been read!
                int skipValBytes = 0;
                int currentKey = noBufferedKeys + 1;
                for (int i = noBufferedValues; i > currentKey; --i) {
                    skipValBytes += WritableUtils.readVInt(valLenIn);
                    --noBufferedValues;
                }

                // Skip to the 'val' corresponding to 'current' key
                if (skipValBytes > 0) {
                    if (valIn.skipBytes(skipValBytes) != skipValBytes) {
                        throw new IOException("Failed to seek to " + currentKey + "(th) value!");
                    }
                }
            }
        }

        /**
         * Get the 'value' corresponding to the last read 'key'.
         * 
         * @param val
         *            : The 'value' to be read.
         * @throws IOException
         */
        public synchronized void getCurrentValue(Writable val) throws IOException {
            if (val instanceof Configurable) {
                ((Configurable) val).setConf(this.conf);
            }

            // Position stream to 'current' value
            seekToCurrentValue();

            if (!blockCompressed) {
                val.readFields(valIn);

                if (valIn.read() > 0) {
                    LOG.info("available bytes: " + valIn.available());
                    throw new IOException(val + " read " + (valBuffer.getPosition() - keyLength)
                            + " bytes, should read " + (valBuffer.getLength() - keyLength));
                }
            } else {
                // Get the value
                int valLength = WritableUtils.readVInt(valLenIn);
                val.readFields(valIn);

                // Read another compressed 'value'
                --noBufferedValues;

                // Sanity check
                if (valLength < 0) {
                    LOG.debug(val + " is a zero-length value");
                }
            }

        }

        /**
         * Get the 'value' corresponding to the last read 'key'.
         * 
         * @param val
         *            : The 'value' to be read.
         * @throws IOException
         */
        public synchronized Object getCurrentValue(Object val) throws IOException {
            if (val instanceof Configurable) {
                ((Configurable) val).setConf(this.conf);
            }

            // Position stream to 'current' value
            seekToCurrentValue();

            if (!blockCompressed) {
                val = deserializeValue(val);

                if (valIn.read() > 0) {
                    LOG.info("available bytes: " + valIn.available());
                    throw new IOException(val + " read " + (valBuffer.getPosition() - keyLength)
                            + " bytes, should read " + (valBuffer.getLength() - keyLength));
                }
            } else {
                // Get the value
                int valLength = WritableUtils.readVInt(valLenIn);
                val = deserializeValue(val);

                // Read another compressed 'value'
                --noBufferedValues;

                // Sanity check
                if (valLength < 0) {
                    LOG.debug(val + " is a zero-length value");
                }
            }
            return val;

        }

        @SuppressWarnings("unchecked")
        private Object deserializeValue(Object val) throws IOException {
            return valDeserializer.deserialize(val);
        }

        /**
         * Read the next key in the file into <code>key</code>, skipping its
         * value. True if another entry exists, and false at end of file.
         */
        public synchronized boolean next(Writable key) throws IOException {
            if (key.getClass() != getKeyClass())
                throw new IOException("wrong key class: " + key.getClass().getName() + " is not " + keyClass);

            if (!blockCompressed) {
                outBuf.reset();

                keyLength = next(outBuf);
                if (keyLength < 0)
                    return false;

                valBuffer.reset(outBuf.getData(), outBuf.getLength());

                key.readFields(valBuffer);
                valBuffer.mark(0);
                if (valBuffer.getPosition() != keyLength)
                    throw new IOException(
                            key + " read " + valBuffer.getPosition() + " bytes, should read " + keyLength);
            } else {
                // Reset syncSeen
                syncSeen = false;

                if (noBufferedKeys == 0) {
                    try {
                        readBlock();
                    } catch (EOFException eof) {
                        return false;
                    }
                }

                int keyLength = WritableUtils.readVInt(keyLenIn);

                // Sanity check
                if (keyLength < 0) {
                    return false;
                }

                // Read another compressed 'key'
                key.readFields(keyIn);
                --noBufferedKeys;
            }

            return true;
        }

        /**
         * Read the next key/value pair in the file into <code>key</code> and
         * <code>val</code>. Returns true if such a pair exists and false when
         * at end of file
         */
        public synchronized boolean next(Writable key, Writable val) throws IOException {
            if (val.getClass() != getValueClass())
                throw new IOException("wrong value class: " + val + " is not " + valClass);

            boolean more = next(key);

            if (more) {
                getCurrentValue(val);
            }

            return more;
        }

        /**
         * Read and return the next record length, potentially skipping over a
         * sync block.
         * 
         * @return the length of the next record or -1 if there is no next
         *         record
         * @throws IOException
         */
        private synchronized int readRecordLength() throws IOException {
            if (in.getPos() >= end) {
                return -1;
            }
            int length = in.readInt();
            if (version > 1 && sync != null && length == SYNC_ESCAPE) { // process
                                                                        // a
                                                                        // sync
                                                                        // entry
                in.readFully(syncCheck); // read syncCheck
                if (!Arrays.equals(sync, syncCheck)) // check it
                    throw new IOException("File is corrupt!");
                syncSeen = true;
                if (in.getPos() >= end) {
                    return -1;
                }
                length = in.readInt(); // re-read length
            } else {
                syncSeen = false;
            }

            return length;
        }

        /**
         * Read the next key/value pair in the file into <code>buffer</code>.
         * Returns the length of the key read, or -1 if at end of file. The
         * length of the value may be computed by calling buffer.getLength()
         * before and after calls to this method.
         */
        /**
         * @deprecated Call
         *             {@link #nextRaw(DataOutputBuffer,SequenceFile.ValueBytes)}
         *             .
         */
        public synchronized int next(DataOutputBuffer buffer) throws IOException {
            // Unsupported for block-compressed sequence files
            if (blockCompressed) {
                throw new IOException("Unsupported call for block-compressed"
                        + " SequenceFiles - use SequenceFile.Reader.next(DataOutputStream, ValueBytes)");
            }
            try {
                int length = readRecordLength();
                if (length == -1) {
                    return -1;
                }
                int keyLength = in.readInt();
                buffer.write(in, length);
                return keyLength;
            } catch (ChecksumException e) { // checksum failure
                handleChecksumException(e);
                return next(buffer);
            }
        }

        public ValueBytes createValueBytes() {
            ValueBytes val = null;
            if (!decompress || blockCompressed) {
                val = new UncompressedBytes();
            } else {
                val = new CompressedBytes(codec);
            }
            return val;
        }

        /**
         * Read 'raw' records.
         * 
         * @param key
         *            - The buffer into which the key is read
         * @param val
         *            - The 'raw' value
         * @return Returns the total record length or -1 for end of file
         * @throws IOException
         */
        public synchronized int nextRaw(DataOutputBuffer key, ValueBytes val) throws IOException {
            if (!blockCompressed) {
                int length = readRecordLength();
                if (length == -1) {
                    return -1;
                }
                int keyLength = in.readInt();
                int valLength = length - keyLength;
                key.write(in, keyLength);
                if (decompress) {
                    CompressedBytes value = (CompressedBytes) val;
                    value.reset(in, valLength);
                } else {
                    UncompressedBytes value = (UncompressedBytes) val;
                    value.reset(in, valLength);
                }

                return length;
            } else {
                // Reset syncSeen
                syncSeen = false;

                // Read 'key'
                if (noBufferedKeys == 0) {
                    if (in.getPos() >= end)
                        return -1;

                    try {
                        readBlock();
                    } catch (EOFException eof) {
                        return -1;
                    }
                }
                int keyLength = WritableUtils.readVInt(keyLenIn);
                if (keyLength < 0) {
                    throw new IOException("zero length key found!");
                }
                key.write(keyIn, keyLength);
                --noBufferedKeys;

                // Read raw 'value'
                seekToCurrentValue();
                int valLength = WritableUtils.readVInt(valLenIn);
                UncompressedBytes rawValue = (UncompressedBytes) val;
                rawValue.reset(valIn, valLength);
                --noBufferedValues;

                return (keyLength + valLength);

            }

        }

        /**
         * Read 'raw' keys.
         * 
         * @param key
         *            - The buffer into which the key is read
         * @return Returns the key length or -1 for end of file
         * @throws IOException
         */
        public int nextRawKey(DataOutputBuffer key) throws IOException {
            if (!blockCompressed) {
                recordLength = readRecordLength();
                if (recordLength == -1) {
                    return -1;
                }
                keyLength = in.readInt();
                key.write(in, keyLength);
                return keyLength;
            } else {
                // Reset syncSeen
                syncSeen = false;

                // Read 'key'
                if (noBufferedKeys == 0) {
                    if (in.getPos() >= end)
                        return -1;

                    try {
                        readBlock();
                    } catch (EOFException eof) {
                        return -1;
                    }
                }
                int keyLength = WritableUtils.readVInt(keyLenIn);
                if (keyLength < 0) {
                    throw new IOException("zero length key found!");
                }
                key.write(keyIn, keyLength);
                --noBufferedKeys;

                return keyLength;
            }

        }

        /**
         * Read the next key in the file, skipping its value. Return null at end
         * of file.
         */
        public synchronized Object next(Object key) throws IOException {
            if (key != null && key.getClass() != getKeyClass()) {
                throw new IOException("wrong key class: " + key.getClass().getName() + " is not " + keyClass);
            }

            if (!blockCompressed) {
                outBuf.reset();

                keyLength = next(outBuf);
                if (keyLength < 0)
                    return null;

                valBuffer.reset(outBuf.getData(), outBuf.getLength());

                key = deserializeKey(key);
                valBuffer.mark(0);
                if (valBuffer.getPosition() != keyLength)
                    throw new IOException(
                            key + " read " + valBuffer.getPosition() + " bytes, should read " + keyLength);
            } else {
                // Reset syncSeen
                syncSeen = false;

                if (noBufferedKeys == 0) {
                    try {
                        readBlock();
                    } catch (EOFException eof) {
                        return null;
                    }
                }

                int keyLength = WritableUtils.readVInt(keyLenIn);

                // Sanity check
                if (keyLength < 0) {
                    return null;
                }

                // Read another compressed 'key'
                key = deserializeKey(key);
                --noBufferedKeys;
            }

            return key;
        }

        @SuppressWarnings("unchecked")
        private Object deserializeKey(Object key) throws IOException {
            return keyDeserializer.deserialize(key);
        }

        /**
         * Read 'raw' values.
         * 
         * @param val
         *            - The 'raw' value
         * @return Returns the value length
         * @throws IOException
         */
        public synchronized int nextRawValue(ValueBytes val) throws IOException {

            // Position stream to current value
            seekToCurrentValue();

            if (!blockCompressed) {
                int valLength = recordLength - keyLength;
                if (decompress) {
                    CompressedBytes value = (CompressedBytes) val;
                    value.reset(in, valLength);
                } else {
                    UncompressedBytes value = (UncompressedBytes) val;
                    value.reset(in, valLength);
                }

                return valLength;
            } else {
                int valLength = WritableUtils.readVInt(valLenIn);
                UncompressedBytes rawValue = (UncompressedBytes) val;
                rawValue.reset(valIn, valLength);
                --noBufferedValues;
                return valLength;
            }

        }

        private void handleChecksumException(ChecksumException e) throws IOException {
            if (this.conf.getBoolean("io.skip.checksum.errors", false)) {
                LOG.warn("Bad checksum at " + getPosition() + ". Skipping entries.");
                sync(getPosition() + this.conf.getInt("io.bytes.per.checksum", 512));
            } else {
                throw e;
            }
        }

        /**
         * Set the current byte position in the input file.
         * 
         * <p>
         * The position passed must be a position returned by
         * {@link SequenceFile.Writer#getLength()} when writing this file. To
         * seek to an arbitrary position, use
         * {@link SequenceFile.Reader#sync(long)}.
         */
        public synchronized void seek(long position) throws IOException {
            in.seek(position);
            if (blockCompressed) { // trigger block read
                noBufferedKeys = 0;
                valuesDecompressed = true;
            }
        }

        /** Seek to the next sync mark past a given position. */
        public synchronized void sync(long position) throws IOException {
            if (position + SYNC_SIZE >= end) {
                seek(end);
                return;
            }

            try {
                seek(position + 4); // skip escape
                in.readFully(syncCheck);
                int syncLen = sync.length;
                for (int i = 0; in.getPos() < end; i++) {
                    int j = 0;
                    for (; j < syncLen; j++) {
                        if (sync[j] != syncCheck[(i + j) % syncLen])
                            break;
                    }
                    if (j == syncLen) {
                        in.seek(in.getPos() - SYNC_SIZE); // position before
                                                          // sync
                        return;
                    }
                    syncCheck[i % syncLen] = in.readByte();
                }
            } catch (ChecksumException e) { // checksum failure
                handleChecksumException(e);
            }
        }

        /** Returns true iff the previous call to next passed a sync mark. */
        public boolean syncSeen() {
            return syncSeen;
        }

        /** Return the current byte position in the input file. */
        public synchronized long getPosition() throws IOException {
            return in.getPos();
        }

        /** Returns the name of the file. */
        public String toString() {
            return file.toString();
        }

    }

    /**
     * Sorts key/value pairs in a sequence-format file.
     * 
     * <p>
     * For best performance, applications should make sure that the
     * {@link Writable#readFields(DataInput)} implementation of their keys is
     * very efficient. In particular, it should avoid allocating memory.
     */
    public static class Sorter {

        private RawComparator comparator;

        private MergeSort mergeSort; // the implementation of merge sort

        private Path[] inFiles; // when merging or sorting

        private Path outFile;

        private int memory; // bytes
        private int factor; // merged per pass

        private FileSystem fs = null;

        private Class keyClass;
        private Class valClass;

        private Configuration conf;

        private Progressable progressable = null;

        /** Sort and merge files containing the named classes. */
        public Sorter(FileSystem fs, Class<? extends WritableComparable> keyClass, Class valClass,
                Configuration conf) {
            this(fs, WritableComparator.get(keyClass), keyClass, valClass, conf);
        }

        /** Sort and merge using an arbitrary {@link RawComparator}. */
        public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, Class valClass, Configuration conf) {
            this.fs = fs;
            this.comparator = comparator;
            this.keyClass = keyClass;
            this.valClass = valClass;
            this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024;
            this.factor = conf.getInt("io.sort.factor", 100);
            this.conf = conf;
        }

        /** Set the number of streams to merge at once. */
        public void setFactor(int factor) {
            this.factor = factor;
        }

        /** Get the number of streams to merge at once. */
        public int getFactor() {
            return factor;
        }

        /** Set the total amount of buffer memory, in bytes. */
        public void setMemory(int memory) {
            this.memory = memory;
        }

        /** Get the total amount of buffer memory, in bytes. */
        public int getMemory() {
            return memory;
        }

        /** Set the progressable object in order to report progress. */
        public void setProgressable(Progressable progressable) {
            this.progressable = progressable;
        }

        /**
         * Perform a file sort from a set of input files into an output file.
         * 
         * @param inFiles
         *            the files to be sorted
         * @param outFile
         *            the sorted output file
         * @param deleteInput
         *            should the input files be deleted as they are read?
         */
        public void sort(Path[] inFiles, Path outFile, boolean deleteInput) throws IOException {
            if (fs.exists(outFile)) {
                throw new IOException("already exists: " + outFile);
            }

            this.inFiles = inFiles;
            this.outFile = outFile;

            int segments = sortPass(deleteInput);
            if (segments > 1) {
                mergePass(outFile.getParent());
            }
        }

        /**
         * Perform a file sort from a set of input files and return an iterator.
         * 
         * @param inFiles
         *            the files to be sorted
         * @param tempDir
         *            the directory where temp files are created during sort
         * @param deleteInput
         *            should the input files be deleted as they are read?
         * @return iterator the RawKeyValueIterator
         */
        public RawKeyValueIterator sortAndIterate(Path[] inFiles, Path tempDir, boolean deleteInput)
                throws IOException {
            Path outFile = new Path(tempDir + Path.SEPARATOR + "all.2");
            if (fs.exists(outFile)) {
                throw new IOException("already exists: " + outFile);
            }
            this.inFiles = inFiles;
            // outFile will basically be used as prefix for temp files in the
            // cases
            // where sort outputs multiple sorted segments. For the single
            // segment
            // case, the outputFile itself will contain the sorted data for that
            // segment
            this.outFile = outFile;

            int segments = sortPass(deleteInput);
            if (segments > 1)
                return merge(outFile.suffix(".0"), outFile.suffix(".0.index"), tempDir);
            else if (segments == 1)
                return merge(new Path[] { outFile }, true, tempDir);
            else
                return null;
        }

        /**
         * The backwards compatible interface to sort.
         * 
         * @param inFile
         *            the input file to sort
         * @param outFile
         *            the sorted output file
         */
        public void sort(Path inFile, Path outFile) throws IOException {
            sort(new Path[] { inFile }, outFile, false);
        }

        private int sortPass(boolean deleteInput) throws IOException {
            LOG.debug("running sort pass");
            SortPass sortPass = new SortPass(); // make the SortPass
            sortPass.setProgressable(progressable);
            mergeSort = new MergeSort(sortPass.new SeqFileComparator());
            try {
                return sortPass.run(deleteInput); // run it
            } finally {
                sortPass.close(); // close it
            }
        }

        private class SortPass {
            private int memoryLimit = memory / 4;
            private int recordLimit = 1000000;

            private DataOutputBuffer rawKeys = new DataOutputBuffer();
            private byte[] rawBuffer;

            private int[] keyOffsets = new int[1024];
            private int[] pointers = new int[keyOffsets.length];
            private int[] pointersCopy = new int[keyOffsets.length];
            private int[] keyLengths = new int[keyOffsets.length];
            private ValueBytes[] rawValues = new ValueBytes[keyOffsets.length];

            private ArrayList segmentLengths = new ArrayList();

            private Reader in = null;
            private FSDataOutputStream out = null;
            private FSDataOutputStream indexOut = null;
            private Path outName;

            private Progressable progressable = null;

            public int run(boolean deleteInput) throws IOException {
                int segments = 0;
                int currentFile = 0;
                boolean atEof = (currentFile >= inFiles.length);
                boolean isCompressed = false;
                boolean isBlockCompressed = false;
                CompressionCodec codec = null;
                segmentLengths.clear();
                if (atEof) {
                    return 0;
                }

                // Initialize
                in = new Reader(fs, inFiles[currentFile], conf);
                isCompressed = in.isCompressed();
                isBlockCompressed = in.isBlockCompressed();
                codec = in.getCompressionCodec();

                for (int i = 0; i < rawValues.length; ++i) {
                    rawValues[i] = null;
                }

                while (!atEof) {
                    int count = 0;
                    int bytesProcessed = 0;
                    rawKeys.reset();
                    while (!atEof && bytesProcessed < memoryLimit && count < recordLimit) {

                        // Read a record into buffer
                        // Note: Attempt to re-use 'rawValue' as far as possible
                        int keyOffset = rawKeys.getLength();
                        ValueBytes rawValue = (count == keyOffsets.length || rawValues[count] == null)
                                ? in.createValueBytes()
                                : rawValues[count];
                        int recordLength = in.nextRaw(rawKeys, rawValue);
                        if (recordLength == -1) {
                            in.close();
                            if (deleteInput) {
                                fs.delete(inFiles[currentFile], true);
                            }
                            currentFile += 1;
                            atEof = currentFile >= inFiles.length;
                            if (!atEof) {
                                in = new Reader(fs, inFiles[currentFile], conf);
                            } else {
                                in = null;
                            }
                            continue;
                        }

                        int keyLength = rawKeys.getLength() - keyOffset;

                        if (count == keyOffsets.length)
                            grow();

                        keyOffsets[count] = keyOffset; // update pointers
                        pointers[count] = count;
                        keyLengths[count] = keyLength;
                        rawValues[count] = rawValue;

                        bytesProcessed += recordLength;
                        count++;
                    }

                    // buffer is full -- sort & flush it
                    LOG.debug("flushing segment " + segments);
                    rawBuffer = rawKeys.getData();
                    sort(count);
                    // indicate we're making progress
                    if (progressable != null) {
                        progressable.progress();
                    }
                    flush(count, bytesProcessed, isCompressed, isBlockCompressed, codec, segments == 0 && atEof);
                    segments++;
                }
                return segments;
            }

            public void close() throws IOException {
                if (in != null) {
                    in.close();
                }
                if (out != null) {
                    out.close();
                }
                if (indexOut != null) {
                    indexOut.close();
                }
            }

            private void grow() {
                int newLength = keyOffsets.length * 3 / 2;
                keyOffsets = grow(keyOffsets, newLength);
                pointers = grow(pointers, newLength);
                pointersCopy = new int[newLength];
                keyLengths = grow(keyLengths, newLength);
                rawValues = grow(rawValues, newLength);
            }

            private int[] grow(int[] old, int newLength) {
                int[] result = new int[newLength];
                System.arraycopy(old, 0, result, 0, old.length);
                return result;
            }

            private ValueBytes[] grow(ValueBytes[] old, int newLength) {
                ValueBytes[] result = new ValueBytes[newLength];
                System.arraycopy(old, 0, result, 0, old.length);
                for (int i = old.length; i < newLength; ++i) {
                    result[i] = null;
                }
                return result;
            }

            private void flush(int count, int bytesProcessed, boolean isCompressed, boolean isBlockCompressed,
                    CompressionCodec codec, boolean done) throws IOException {
                if (out == null) {
                    outName = done ? outFile : outFile.suffix(".0");
                    out = fs.create(outName);
                    if (!done) {
                        indexOut = fs.create(outName.suffix(".index"));
                    }
                }

                long segmentStart = out.getPos();
                Writer writer = createWriter(conf, out, keyClass, valClass, isCompressed, isBlockCompressed, codec,
                        new Metadata());

                if (!done) {
                    writer.sync = null; // disable sync on temp files
                }

                for (int i = 0; i < count; i++) { // write in sorted order
                    int p = pointers[i];
                    writer.appendRaw(rawBuffer, keyOffsets[p], keyLengths[p], rawValues[p]);
                }
                writer.close();

                if (!done) {
                    // Save the segment length
                    WritableUtils.writeVLong(indexOut, segmentStart);
                    WritableUtils.writeVLong(indexOut, (out.getPos() - segmentStart));
                    indexOut.flush();
                }
            }

            private void sort(int count) {
                System.arraycopy(pointers, 0, pointersCopy, 0, count);
                mergeSort.mergeSort(pointersCopy, pointers, 0, count);
            }

            class SeqFileComparator implements Comparator<IntWritable> {
                public int compare(IntWritable I, IntWritable J) {
                    return comparator.compare(rawBuffer, keyOffsets[I.get()], keyLengths[I.get()], rawBuffer,
                            keyOffsets[J.get()], keyLengths[J.get()]);
                }
            }

            /** set the progressable object in order to report progress */
            public void setProgressable(Progressable progressable) {
                this.progressable = progressable;
            }

        } // SequenceFile.Sorter.SortPass

        /** The interface to iterate over raw keys/values of SequenceFiles. */
        public static interface RawKeyValueIterator {
            /**
             * Gets the current raw key
             * 
             * @return DataOutputBuffer
             * @throws IOException
             */
            DataOutputBuffer getKey() throws IOException;

            /**
             * Gets the current raw value
             * 
             * @return ValueBytes
             * @throws IOException
             */
            ValueBytes getValue() throws IOException;

            /**
             * Sets up the current key and value (for getKey and getValue)
             * 
             * @return true if there exists a key/value, false otherwise
             * @throws IOException
             */
            boolean next() throws IOException;

            /**
             * closes the iterator so that the underlying streams can be closed
             * 
             * @throws IOException
             */
            void close() throws IOException;

            /**
             * Gets the Progress object; this has a float (0.0 - 1.0) indicating
             * the bytes processed by the iterator so far
             */
            Progress getProgress();
        }

        /**
         * Merges the list of segments of type <code>SegmentDescriptor</code>
         * 
         * @param segments
         *            the list of SegmentDescriptors
         * @param tmpDir
         *            the directory to write temporary files into
         * @return RawKeyValueIterator
         * @throws IOException
         */
        public RawKeyValueIterator merge(List<SegmentDescriptor> segments, Path tmpDir) throws IOException {
            // pass in object to report progress, if present
            MergeQueue mQueue = new MergeQueue(segments, tmpDir, progressable);
            return mQueue.merge();
        }

        /**
         * Merges the contents of files passed in Path[] using a max factor
         * value that is already set
         * 
         * @param inNames
         *            the array of path names
         * @param deleteInputs
         *            true if the input files should be deleted when unnecessary
         * @param tmpDir
         *            the directory to write temporary files into
         * @return RawKeyValueIteratorMergeQueue
         * @throws IOException
         */
        public RawKeyValueIterator merge(Path[] inNames, boolean deleteInputs, Path tmpDir) throws IOException {
            return merge(inNames, deleteInputs, (inNames.length < factor) ? inNames.length : factor, tmpDir);
        }

        /**
         * Merges the contents of files passed in Path[]
         * 
         * @param inNames
         *            the array of path names
         * @param deleteInputs
         *            true if the input files should be deleted when unnecessary
         * @param factor
         *            the factor that will be used as the maximum merge fan-in
         * @param tmpDir
         *            the directory to write temporary files into
         * @return RawKeyValueIteratorMergeQueue
         * @throws IOException
         */
        public RawKeyValueIterator merge(Path[] inNames, boolean deleteInputs, int factor, Path tmpDir)
                throws IOException {
            // get the segments from inNames
            ArrayList<SegmentDescriptor> a = new ArrayList<SegmentDescriptor>();
            for (int i = 0; i < inNames.length; i++) {
                SegmentDescriptor s = new SegmentDescriptor(0, fs.getLength(inNames[i]), inNames[i]);
                s.preserveInput(!deleteInputs);
                s.doSync();
                a.add(s);
            }
            this.factor = factor;
            MergeQueue mQueue = new MergeQueue(a, tmpDir, progressable);
            return mQueue.merge();
        }

        /**
         * Merges the contents of files passed in Path[]
         * 
         * @param inNames
         *            the array of path names
         * @param tempDir
         *            the directory for creating temp files during merge
         * @param deleteInputs
         *            true if the input files should be deleted when unnecessary
         * @return RawKeyValueIteratorMergeQueue
         * @throws IOException
         */
        public RawKeyValueIterator merge(Path[] inNames, Path tempDir, boolean deleteInputs) throws IOException {
            // outFile will basically be used as prefix for temp files for the
            // intermediate merge outputs
            this.outFile = new Path(tempDir + Path.SEPARATOR + "merged");
            // get the segments from inNames
            ArrayList<SegmentDescriptor> a = new ArrayList<SegmentDescriptor>();
            for (int i = 0; i < inNames.length; i++) {
                SegmentDescriptor s = new SegmentDescriptor(0, fs.getLength(inNames[i]), inNames[i]);
                s.preserveInput(!deleteInputs);
                s.doSync();
                a.add(s);
            }
            factor = (inNames.length < factor) ? inNames.length : factor;
            // pass in object to report progress, if present
            MergeQueue mQueue = new MergeQueue(a, tempDir, progressable);
            return mQueue.merge();
        }

        /**
         * Clones the attributes (like compression of the input file and creates
         * a corresponding Writer
         * 
         * @param inputFile
         *            the path of the input file whose attributes should be
         *            cloned
         * @param outputFile
         *            the path of the output file
         * @param prog
         *            the Progressable to report status during the file write
         * @return Writer
         * @throws IOException
         */
        public Writer cloneFileAttributes(Path inputFile, Path outputFile, Progressable prog) throws IOException {
            FileSystem srcFileSys = inputFile.getFileSystem(conf);
            Reader reader = new Reader(srcFileSys, inputFile, 4096, conf, true);
            boolean compress = reader.isCompressed();
            boolean blockCompress = reader.isBlockCompressed();
            CompressionCodec codec = reader.getCompressionCodec();
            reader.close();

            Writer writer = createWriter(outputFile.getFileSystem(conf), conf, outputFile, keyClass, valClass,
                    compress, blockCompress, codec, prog, new Metadata());
            return writer;
        }

        /**
         * Writes records from RawKeyValueIterator into a file represented by
         * the passed writer
         * 
         * @param records
         *            the RawKeyValueIterator
         * @param writer
         *            the Writer created earlier
         * @throws IOException
         */
        public void writeFile(RawKeyValueIterator records, Writer writer) throws IOException {
            while (records.next()) {
                writer.appendRaw(records.getKey().getData(), 0, records.getKey().getLength(), records.getValue());
            }
            writer.sync();
        }

        /**
         * Merge the provided files.
         * 
         * @param inFiles
         *            the array of input path names
         * @param outFile
         *            the final output file
         * @throws IOException
         */
        public void merge(Path[] inFiles, Path outFile) throws IOException {
            if (fs.exists(outFile)) {
                throw new IOException("already exists: " + outFile);
            }
            RawKeyValueIterator r = merge(inFiles, false, outFile.getParent());
            Writer writer = cloneFileAttributes(inFiles[0], outFile, null);

            writeFile(r, writer);

            writer.close();
        }

        /** sort calls this to generate the final merged output */
        private int mergePass(Path tmpDir) throws IOException {
            LOG.debug("running merge pass");
            Writer writer = cloneFileAttributes(outFile.suffix(".0"), outFile, null);
            RawKeyValueIterator r = merge(outFile.suffix(".0"), outFile.suffix(".0.index"), tmpDir);
            writeFile(r, writer);

            writer.close();
            return 0;
        }

        /**
         * Used by mergePass to merge the output of the sort
         * 
         * @param inName
         *            the name of the input file containing sorted segments
         * @param indexIn
         *            the offsets of the sorted segments
         * @param tmpDir
         *            the relative directory to store intermediate results in
         * @return RawKeyValueIterator
         * @throws IOException
         */
        private RawKeyValueIterator merge(Path inName, Path indexIn, Path tmpDir) throws IOException {
            // get the segments from indexIn
            // we create a SegmentContainer so that we can track segments
            // belonging to
            // inName and delete inName as soon as we see that we have looked at
            // all
            // the contained segments during the merge process & hence don't
            // need
            // them anymore
            SegmentContainer container = new SegmentContainer(inName, indexIn);
            MergeQueue mQueue = new MergeQueue(container.getSegmentList(), tmpDir, progressable);
            return mQueue.merge();
        }

        /** This class implements the core of the merge logic */
        private class MergeQueue extends PriorityQueue implements RawKeyValueIterator {
            private boolean compress;
            private boolean blockCompress;
            private DataOutputBuffer rawKey = new DataOutputBuffer();
            private ValueBytes rawValue;
            private long totalBytesProcessed;
            private float progPerByte;
            private Progress mergeProgress = new Progress();
            private Path tmpDir;
            private Progressable progress = null; // handle to the progress
                                                  // reporting object
            private SegmentDescriptor minSegment;

            // a TreeMap used to store the segments sorted by size (segment
            // offset and
            // segment path name is used to break ties between segments of same
            // sizes)
            private Map<SegmentDescriptor, Void> sortedSegmentSizes = new TreeMap<SegmentDescriptor, Void>();

            @SuppressWarnings("unchecked")
            public void put(SegmentDescriptor stream) throws IOException {
                if (size() == 0) {
                    compress = stream.in.isCompressed();
                    blockCompress = stream.in.isBlockCompressed();
                } else if (compress != stream.in.isCompressed() || blockCompress != stream.in.isBlockCompressed()) {
                    throw new IOException("All merged files must be compressed or not.");
                }
                super.put(stream);
            }

            /**
             * A queue of file segments to merge
             * 
             * @param segments
             *            the file segments to merge
             * @param tmpDir
             *            a relative local directory to save intermediate files
             *            in
             * @param progress
             *            the reference to the Progressable object
             */
            public MergeQueue(List<SegmentDescriptor> segments, Path tmpDir, Progressable progress) {
                int size = segments.size();
                for (int i = 0; i < size; i++) {
                    sortedSegmentSizes.put(segments.get(i), null);
                }
                this.tmpDir = tmpDir;
                this.progress = progress;
            }

            protected boolean lessThan(Object a, Object b) {
                // indicate we're making progress
                if (progress != null) {
                    progress.progress();
                }
                SegmentDescriptor msa = (SegmentDescriptor) a;
                SegmentDescriptor msb = (SegmentDescriptor) b;
                return comparator.compare(msa.getKey().getData(), 0, msa.getKey().getLength(),
                        msb.getKey().getData(), 0, msb.getKey().getLength()) < 0;
            }

            public void close() throws IOException {
                SegmentDescriptor ms; // close inputs
                while ((ms = (SegmentDescriptor) pop()) != null) {
                    ms.cleanup();
                }
                minSegment = null;
            }

            public DataOutputBuffer getKey() throws IOException {
                return rawKey;
            }

            public ValueBytes getValue() throws IOException {
                return rawValue;
            }

            public boolean next() throws IOException {
                if (size() == 0)
                    return false;
                if (minSegment != null) {
                    // minSegment is non-null for all invocations of next except
                    // the first
                    // one. For the first invocation, the priority queue is
                    // ready for use
                    // but for the subsequent invocations, first adjust the
                    // queue
                    adjustPriorityQueue(minSegment);
                    if (size() == 0) {
                        minSegment = null;
                        return false;
                    }
                }
                minSegment = (SegmentDescriptor) top();
                long startPos = minSegment.in.getPosition(); // Current position
                                                             // in stream
                                                             // save the raw key reference
                rawKey = minSegment.getKey();
                // load the raw value. Re-use the existing rawValue buffer
                if (rawValue == null) {
                    rawValue = minSegment.in.createValueBytes();
                }
                minSegment.nextRawValue(rawValue);
                long endPos = minSegment.in.getPosition(); // End position after
                                                           // reading value
                updateProgress(endPos - startPos);
                return true;
            }

            public Progress getProgress() {
                return mergeProgress;
            }

            private void adjustPriorityQueue(SegmentDescriptor ms) throws IOException {
                long startPos = ms.in.getPosition(); // Current position in
                                                     // stream
                boolean hasNext = ms.nextRawKey();
                long endPos = ms.in.getPosition(); // End position after reading
                                                   // key
                updateProgress(endPos - startPos);
                if (hasNext) {
                    adjustTop();
                } else {
                    pop();
                    ms.cleanup();
                }
            }

            private void updateProgress(long bytesProcessed) {
                totalBytesProcessed += bytesProcessed;
                if (progPerByte > 0) {
                    mergeProgress.set(totalBytesProcessed * progPerByte);
                }
            }

            /**
             * This is the single level merge that is called multiple times
             * depending on the factor size and the number of segments
             * 
             * @return RawKeyValueIterator
             * @throws IOException
             */
            public RawKeyValueIterator merge() throws IOException {
                // create the MergeStreams from the sorted map created in the
                // constructor
                // and dump the final output to a file
                int numSegments = sortedSegmentSizes.size();
                int origFactor = factor;
                int passNo = 1;
                LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir");
                do {
                    // get the factor for this pass of merge
                    factor = getPassFactor(passNo, numSegments);
                    List<SegmentDescriptor> segmentsToMerge = new ArrayList<SegmentDescriptor>();
                    int segmentsConsidered = 0;
                    int numSegmentsToConsider = factor;
                    while (true) {
                        // extract the smallest 'factor' number of segment
                        // pointers from the
                        // TreeMap. Call cleanup on the empty segments (no
                        // key/value data)
                        SegmentDescriptor[] mStream = getSegmentDescriptors(numSegmentsToConsider);
                        for (int i = 0; i < mStream.length; i++) {
                            if (mStream[i].nextRawKey()) {
                                segmentsToMerge.add(mStream[i]);
                                segmentsConsidered++;
                                // Count the fact that we read some bytes in
                                // calling nextRawKey()
                                updateProgress(mStream[i].in.getPosition());
                            } else {
                                mStream[i].cleanup();
                                numSegments--; // we ignore this segment for the
                                               // merge
                            }
                        }
                        // if we have the desired number of segments
                        // or looked at all available segments, we break
                        if (segmentsConsidered == factor || sortedSegmentSizes.size() == 0) {
                            break;
                        }

                        numSegmentsToConsider = factor - segmentsConsidered;
                    }
                    // feed the streams to the priority queue
                    initialize(segmentsToMerge.size());
                    clear();
                    for (int i = 0; i < segmentsToMerge.size(); i++) {
                        put(segmentsToMerge.get(i));
                    }
                    // if we have lesser number of segments remaining, then just
                    // return the
                    // iterator, else do another single level merge
                    if (numSegments <= factor) {
                        // calculate the length of the remaining segments.
                        // Required for
                        // calculating the merge progress
                        long totalBytes = 0;
                        for (int i = 0; i < segmentsToMerge.size(); i++) {
                            totalBytes += segmentsToMerge.get(i).segmentLength;
                        }
                        if (totalBytes != 0) // being paranoid
                            progPerByte = 1.0f / (float) totalBytes;
                        // reset factor to what it originally was
                        factor = origFactor;
                        return this;
                    } else {
                        // we want to spread the creation of temp files on
                        // multiple disks if
                        // available under the space constraints
                        long approxOutputSize = 0;
                        for (SegmentDescriptor s : segmentsToMerge) {
                            approxOutputSize += s.segmentLength
                                    + ChecksumFileSystem.getApproxChkSumLength(s.segmentLength);
                        }
                        Path tmpFilename = new Path(tmpDir, "intermediate").suffix("." + passNo);

                        Path outputFile = lDirAlloc.getLocalPathForWrite(tmpFilename.toString(), approxOutputSize,
                                conf);
                        LOG.debug("writing intermediate results to " + outputFile);
                        Writer writer = cloneFileAttributes(
                                fs.makeQualified(segmentsToMerge.get(0).segmentPathName),
                                fs.makeQualified(outputFile), null);
                        writer.sync = null; // disable sync for temp files
                        writeFile(this, writer);
                        writer.close();

                        // we finished one single level merge; now clean up the
                        // priority
                        // queue
                        this.close();

                        SegmentDescriptor tempSegment = new SegmentDescriptor(0, fs.getLength(outputFile),
                                outputFile);
                        // put the segment back in the TreeMap
                        sortedSegmentSizes.put(tempSegment, null);
                        numSegments = sortedSegmentSizes.size();
                        passNo++;
                    }
                    // we are worried about only the first pass merge factor. So
                    // reset the
                    // factor to what it originally was
                    factor = origFactor;
                } while (true);
            }

            // Hadoop-591
            public int getPassFactor(int passNo, int numSegments) {
                if (passNo > 1 || numSegments <= factor || factor == 1)
                    return factor;
                int mod = (numSegments - 1) % (factor - 1);
                if (mod == 0)
                    return factor;
                return mod + 1;
            }

            /**
             * Return (& remove) the requested number of segment descriptors
             * from the sorted map.
             */
            public SegmentDescriptor[] getSegmentDescriptors(int numDescriptors) {
                if (numDescriptors > sortedSegmentSizes.size())
                    numDescriptors = sortedSegmentSizes.size();
                SegmentDescriptor[] SegmentDescriptors = new SegmentDescriptor[numDescriptors];
                Iterator iter = sortedSegmentSizes.keySet().iterator();
                int i = 0;
                while (i < numDescriptors) {
                    SegmentDescriptors[i++] = (SegmentDescriptor) iter.next();
                    iter.remove();
                }
                return SegmentDescriptors;
            }
        } // SequenceFile.Sorter.MergeQueue

        /**
         * This class defines a merge segment. This class can be subclassed to
         * provide a customized cleanup method implementation. In this
         * implementation, cleanup closes the file handle and deletes the file
         */
        public class SegmentDescriptor implements Comparable {

            long segmentOffset; // the start of the segment in the file
            long segmentLength; // the length of the segment
            Path segmentPathName; // the path name of the file containing the
                                  // segment
            boolean ignoreSync = true; // set to true for temp files
            private Reader in = null;
            private DataOutputBuffer rawKey = null; // this will hold the
                                                    // current key
            private boolean preserveInput = false; // delete input segment
                                                   // files?

            /**
             * Constructs a segment
             * 
             * @param segmentOffset
             *            the offset of the segment in the file
             * @param segmentLength
             *            the length of the segment
             * @param segmentPathName
             *            the path name of the file containing the segment
             */
            public SegmentDescriptor(long segmentOffset, long segmentLength, Path segmentPathName) {
                this.segmentOffset = segmentOffset;
                this.segmentLength = segmentLength;
                this.segmentPathName = segmentPathName;
            }

            /** Do the sync checks */
            public void doSync() {
                ignoreSync = false;
            }

            /** Whether to delete the files when no longer needed */
            public void preserveInput(boolean preserve) {
                preserveInput = preserve;
            }

            public boolean shouldPreserveInput() {
                return preserveInput;
            }

            public int compareTo(Object o) {
                SegmentDescriptor that = (SegmentDescriptor) o;
                if (this.segmentLength != that.segmentLength) {
                    return (this.segmentLength < that.segmentLength ? -1 : 1);
                }
                if (this.segmentOffset != that.segmentOffset) {
                    return (this.segmentOffset < that.segmentOffset ? -1 : 1);
                }
                return (this.segmentPathName.toString()).compareTo(that.segmentPathName.toString());
            }

            public boolean equals(Object o) {
                if (!(o instanceof SegmentDescriptor)) {
                    return false;
                }
                SegmentDescriptor that = (SegmentDescriptor) o;
                if (this.segmentLength == that.segmentLength && this.segmentOffset == that.segmentOffset
                        && this.segmentPathName.toString().equals(that.segmentPathName.toString())) {
                    return true;
                }
                return false;
            }

            public int hashCode() {
                return 37 * 17 + (int) (segmentOffset ^ (segmentOffset >>> 32));
            }

            /**
             * Fills up the rawKey object with the key returned by the Reader
             * 
             * @return true if there is a key returned; false, otherwise
             * @throws IOException
             */
            public boolean nextRawKey() throws IOException {
                if (in == null) {
                    int bufferSize = conf.getInt("io.file.buffer.size", 4096);
                    if (fs.getUri().getScheme().startsWith("ramfs")) {
                        bufferSize = conf.getInt("io.bytes.per.checksum", 512);
                    }
                    Reader reader = new Reader(fs, segmentPathName, bufferSize, segmentOffset, segmentLength, conf,
                            false);

                    // sometimes we ignore syncs especially for temp merge files
                    if (ignoreSync)
                        reader.sync = null;

                    if (reader.getKeyClass() != keyClass)
                        throw new IOException("wrong key class: " + reader.getKeyClass() + " is not " + keyClass);
                    if (reader.getValueClass() != valClass)
                        throw new IOException(
                                "wrong value class: " + reader.getValueClass() + " is not " + valClass);
                    this.in = reader;
                    rawKey = new DataOutputBuffer();
                }
                rawKey.reset();
                int keyLength = in.nextRawKey(rawKey);
                return (keyLength >= 0);
            }

            /**
             * Fills up the passed rawValue with the value corresponding to the
             * key read earlier
             * 
             * @param rawValue
             * @return the length of the value
             * @throws IOException
             */
            public int nextRawValue(ValueBytes rawValue) throws IOException {
                int valLength = in.nextRawValue(rawValue);
                return valLength;
            }

            /** Returns the stored rawKey */
            public DataOutputBuffer getKey() {
                return rawKey;
            }

            /** closes the underlying reader */
            private void close() throws IOException {
                this.in.close();
                this.in = null;
            }

            /**
             * The default cleanup. Subclasses can override this with a custom
             * cleanup
             */
            public void cleanup() throws IOException {
                close();
                if (!preserveInput) {
                    fs.delete(segmentPathName, true);
                }
            }
        } // SequenceFile.Sorter.SegmentDescriptor

        /**
         * This class provisions multiple segments contained within a single
         * file
         */
        private class LinkedSegmentsDescriptor extends SegmentDescriptor {

            SegmentContainer parentContainer = null;

            /**
             * Constructs a segment
             * 
             * @param segmentOffset
             *            the offset of the segment in the file
             * @param segmentLength
             *            the length of the segment
             * @param segmentPathName
             *            the path name of the file containing the segment
             * @param parent
             *            the parent SegmentContainer that holds the segment
             */
            public LinkedSegmentsDescriptor(long segmentOffset, long segmentLength, Path segmentPathName,
                    SegmentContainer parent) {
                super(segmentOffset, segmentLength, segmentPathName);
                this.parentContainer = parent;
            }

            /**
             * The default cleanup. Subclasses can override this with a custom
             * cleanup
             */
            public void cleanup() throws IOException {
                super.close();
                if (super.shouldPreserveInput())
                    return;
                parentContainer.cleanup();
            }
        } // SequenceFile.Sorter.LinkedSegmentsDescriptor

        /**
         * The class that defines a container for segments to be merged.
         * Primarily required to delete temp files as soon as all the contained
         * segments have been looked at
         */
        private class SegmentContainer {
            private int numSegmentsCleanedUp = 0; // track the no. of segment
                                                  // cleanups
            private int numSegmentsContained; // # of segments contained
            private Path inName; // input file from where segments are created

            // the list of segments read from the file
            private ArrayList<SegmentDescriptor> segments = new ArrayList<SegmentDescriptor>();

            /**
             * This constructor is there primarily to serve the sort routine
             * that generates a single output file with an associated index file
             */
            public SegmentContainer(Path inName, Path indexIn) throws IOException {
                // get the segments from indexIn
                FSDataInputStream fsIndexIn = fs.open(indexIn);
                long end = fs.getLength(indexIn);
                while (fsIndexIn.getPos() < end) {
                    long segmentOffset = WritableUtils.readVLong(fsIndexIn);
                    long segmentLength = WritableUtils.readVLong(fsIndexIn);
                    Path segmentName = inName;
                    segments.add(new LinkedSegmentsDescriptor(segmentOffset, segmentLength, segmentName, this));
                }
                fsIndexIn.close();
                fs.delete(indexIn, true);
                numSegmentsContained = segments.size();
                this.inName = inName;
            }

            public List<SegmentDescriptor> getSegmentList() {
                return segments;
            }

            public void cleanup() throws IOException {
                numSegmentsCleanedUp++;
                if (numSegmentsCleanedUp == numSegmentsContained) {
                    fs.delete(inName, true);
                }
            }
        } // SequenceFile.Sorter.SegmentContainer

    } // SequenceFile.Sorter

} // SequenceFile