org.apache.lucene.store.DataOutput.java Source code

Introduction

Here is the source code for org.apache.lucene.store.DataOutput.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.store;

import java.io.IOException;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;

/**
 * Abstract base class for performing write operations of Lucene's low-level
 * data types.
     
 * <p>{@code DataOutput} may only be used from one thread, because it is not
 * thread safe (it keeps internal state like file position).
 */
public abstract class DataOutput {

    /** Writes a single byte.
     * <p>
     * The most primitive data type is an eight-bit byte. Files are 
     * accessed as sequences of bytes. All other data types are defined 
     * as sequences of bytes, so file formats are byte-order independent.
     * 
     * @see IndexInput#readByte()
     */
    public abstract void writeByte(byte b) throws IOException;

    /** Writes an array of bytes.
     * @param b the bytes to write
     * @param length the number of bytes to write
     * @see DataInput#readBytes(byte[],int,int)
     */
    public void writeBytes(byte[] b, int length) throws IOException {
        writeBytes(b, 0, length);
    }

    /** Writes an array of bytes.
     * @param b the bytes to write
     * @param offset the offset in the byte array
     * @param length the number of bytes to write
     * @see DataInput#readBytes(byte[],int,int)
     */
    public abstract void writeBytes(byte[] b, int offset, int length) throws IOException;

    /** Writes an int as four bytes.
     * <p>
     * 32-bit unsigned integer written as four bytes, high-order bytes first.
     * 
     * @see DataInput#readInt()
     */
    public void writeInt(int i) throws IOException {
        writeByte((byte) (i >> 24));
        writeByte((byte) (i >> 16));
        writeByte((byte) (i >> 8));
        writeByte((byte) i);
    }

    /** Writes a short as two bytes.
     * @see DataInput#readShort()
     */
    public void writeShort(short i) throws IOException {
        writeByte((byte) (i >> 8));
        writeByte((byte) i);
    }

    /** Writes an int in a variable-length format.  Writes between one and
     * five bytes.  Smaller values take fewer bytes.  Negative numbers are
     * supported, but should be avoided.
     * <p>VByte is a variable-length format for positive integers is defined where the
     * high-order bit of each byte indicates whether more bytes remain to be read. The
     * low-order seven bits are appended as increasingly more significant bits in the
     * resulting integer value. Thus values from zero to 127 may be stored in a single
     * byte, values from 128 to 16,383 may be stored in two bytes, and so on.</p>
     * <p>VByte Encoding Example</p>
     * <table cellspacing="0" cellpadding="2" border="0" summary="variable length encoding examples">
     * <tr valign="top">
     *   <th align="left">Value</th>
     *   <th align="left">Byte 1</th>
     *   <th align="left">Byte 2</th>
     *   <th align="left">Byte 3</th>
     * </tr>
     * <tr valign="bottom">
     *   <td>0</td>
     *   <td><code>00000000</code></td>
     *   <td></td>
     *   <td></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>1</td>
     *   <td><code>00000001</code></td>
     *   <td></td>
     *   <td></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>2</td>
     *   <td><code>00000010</code></td>
     *   <td></td>
     *   <td></td>
     * </tr>
     * <tr>
     *   <td valign="top">...</td>
     *   <td valign="bottom"></td>
     *   <td valign="bottom"></td>
     *   <td valign="bottom"></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>127</td>
     *   <td><code>01111111</code></td>
     *   <td></td>
     *   <td></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>128</td>
     *   <td><code>10000000</code></td>
     *   <td><code>00000001</code></td>
     *   <td></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>129</td>
     *   <td><code>10000001</code></td>
     *   <td><code>00000001</code></td>
     *   <td></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>130</td>
     *   <td><code>10000010</code></td>
     *   <td><code>00000001</code></td>
     *   <td></td>
     * </tr>
     * <tr>
     *   <td valign="top">...</td>
     *   <td></td>
     *   <td></td>
     *   <td></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>16,383</td>
     *   <td><code>11111111</code></td>
     *   <td><code>01111111</code></td>
     *   <td></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>16,384</td>
     *   <td><code>10000000</code></td>
     *   <td><code>10000000</code></td>
     *   <td><code>00000001</code></td>
     * </tr>
     * <tr valign="bottom">
     *   <td>16,385</td>
     *   <td><code>10000001</code></td>
     *   <td><code>10000000</code></td>
     *   <td><code>00000001</code></td>
     * </tr>
     * <tr>
     *   <td valign="top">...</td>
     *   <td valign="bottom"></td>
     *   <td valign="bottom"></td>
     *   <td valign="bottom"></td>
     * </tr>
     * </table>
     * <p>This provides compression while still being efficient to decode.</p>
     * 
     * @param i Smaller values take fewer bytes.  Negative numbers are
     * supported, but should be avoided.
     * @throws IOException If there is an I/O error writing to the underlying medium.
     * @see DataInput#readVInt()
     */
    public final void writeVInt(int i) throws IOException {
        while ((i & ~0x7F) != 0) {
            writeByte((byte) ((i & 0x7F) | 0x80));
            i >>>= 7;
        }
        writeByte((byte) i);
    }

    /**
     * Write a {@link BitUtil#zigZagEncode(int) zig-zag}-encoded
     * {@link #writeVInt(int) variable-length} integer. This is typically useful
     * to write small signed ints and is equivalent to calling
     * <code>writeVInt(BitUtil.zigZagEncode(i))</code>.
     * @see DataInput#readZInt()
     */
    public final void writeZInt(int i) throws IOException {
        writeVInt(BitUtil.zigZagEncode(i));
    }

    /** Writes a long as eight bytes.
     * <p>
     * 64-bit unsigned integer written as eight bytes, high-order bytes first.
     * 
     * @see DataInput#readLong()
     */
    public void writeLong(long i) throws IOException {
        writeInt((int) (i >> 32));
        writeInt((int) i);
    }

    /** Writes an long in a variable-length format.  Writes between one and nine
     * bytes.  Smaller values take fewer bytes.  Negative numbers are not
     * supported.
     * <p>
     * The format is described further in {@link DataOutput#writeVInt(int)}.
     * @see DataInput#readVLong()
     */
    public final void writeVLong(long i) throws IOException {
        if (i < 0) {
            throw new IllegalArgumentException("cannot write negative vLong (got: " + i + ")");
        }
        writeSignedVLong(i);
    }

    // write a potentially negative vLong
    private void writeSignedVLong(long i) throws IOException {
        while ((i & ~0x7FL) != 0L) {
            writeByte((byte) ((i & 0x7FL) | 0x80L));
            i >>>= 7;
        }
        writeByte((byte) i);
    }

    /**
     * Write a {@link BitUtil#zigZagEncode(long) zig-zag}-encoded
     * {@link #writeVLong(long) variable-length} long. Writes between one and ten
     * bytes. This is typically useful to write small signed ints.
     * @see DataInput#readZLong()
     */
    public final void writeZLong(long i) throws IOException {
        writeSignedVLong(BitUtil.zigZagEncode(i));
    }

    /** Writes a string.
     * <p>
     * Writes strings as UTF-8 encoded bytes. First the length, in bytes, is
     * written as a {@link #writeVInt VInt}, followed by the bytes.
     * 
     * @see DataInput#readString()
     */
    public void writeString(String s) throws IOException {
        final BytesRef utf8Result = new BytesRef(s);
        writeVInt(utf8Result.length);
        writeBytes(utf8Result.bytes, utf8Result.offset, utf8Result.length);
    }

    private static int COPY_BUFFER_SIZE = 16384;
    private byte[] copyBuffer;

    /** Copy numBytes bytes from input to ourself. */
    public void copyBytes(DataInput input, long numBytes) throws IOException {
        assert numBytes >= 0 : "numBytes=" + numBytes;
        long left = numBytes;
        if (copyBuffer == null)
            copyBuffer = new byte[COPY_BUFFER_SIZE];
        while (left > 0) {
            final int toCopy;
            if (left > COPY_BUFFER_SIZE)
                toCopy = COPY_BUFFER_SIZE;
            else
                toCopy = (int) left;
            input.readBytes(copyBuffer, 0, toCopy);
            writeBytes(copyBuffer, 0, toCopy);
            left -= toCopy;
        }
    }

    /**
     * Writes a String map.
     * <p>
     * First the size is written as an {@link #writeVInt(int) vInt},
     * followed by each key-value pair written as two consecutive 
     * {@link #writeString(String) String}s.
     * 
     * @param map Input map.
     * @throws NullPointerException if {@code map} is null.
     */
    public void writeMapOfStrings(Map<String, String> map) throws IOException {
        writeVInt(map.size());
        for (Map.Entry<String, String> entry : map.entrySet()) {
            writeString(entry.getKey());
            writeString(entry.getValue());
        }
    }

    /**
     * Writes a String set.
     * <p>
     * First the size is written as an {@link #writeVInt(int) vInt},
     * followed by each value written as a
     * {@link #writeString(String) String}.
     * 
     * @param set Input set.
     * @throws NullPointerException if {@code set} is null.
     */
    public void writeSetOfStrings(Set<String> set) throws IOException {
        writeVInt(set.size());
        for (String value : set) {
            writeString(value);
        }
    }
}