org.apache.lucene.index.FieldsWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.index.FieldsWriter.java

Source

package org.apache.lucene.index;

/**
 * Copyright 2004 The Apache Software Foundation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

import java.io.IOException;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.IOUtils;

import com.alimama.mdrill.fdtBlockCompress.FdtCompressIndexInput;
import com.alimama.mdrill.fdtBlockCompress.FdtCompressIndexOutput;

public class FieldsWriter {
    private static final Log LOG = LogFactory.getLog(FieldsWriter.class);

    static final int FIELD_IS_TOKENIZED = 1 << 0;
    static final int FIELD_IS_BINARY = 1 << 1;

    /** @deprecated Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0 */
    @Deprecated
    static final int FIELD_IS_COMPRESSED = 1 << 2;

    private static final int _NUMERIC_BIT_SHIFT = 3;
    static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;

    static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
    static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
    static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
    static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
    // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
    // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;

    // the next possible bits are: 1 << 6; 1 << 7

    // Original format
    static final int FORMAT = 0;

    // Changed strings to UTF8
    static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;

    // Lucene 3.0: Removal of compressed fields
    static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;

    // Lucene 3.2: NumericFields are stored in binary format
    static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;

    // NOTE: if you introduce a new format, make it 1 higher
    // than the current one, and always change this if you
    // switch to a new format!
    static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;

    private FieldInfos fieldInfos;

    // If null - we were supplied with streams, if notnull - we manage them ourselves
    private Directory directory;
    private String segment;
    private IndexOutput fieldsStream;
    private IndexOutput indexStream;

    FieldsWriter(Directory directory, String segment, FieldInfos fn) throws IOException {
        this.directory = directory;
        this.segment = segment;
        fieldInfos = fn;

        boolean success = false;
        try {

            IndexOutput fdt = directory
                    .createOutput(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_EXTENSION));
            indexStream = directory
                    .createOutput(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_INDEX_EXTENSION));

            if (FieldsWriterCompress.isFdtCompress() && !(directory instanceof RAMDirectory)) {
                indexStream.writeInt(FieldsWriterCompress.FORMAT_CURRENT);
                fieldsStream = new FdtCompressIndexOutput(fdt, 1024 * 512);
                fieldsStream.writeInt(FieldsWriterCompress.FORMAT_CURRENT);

            } else {
                fdt.writeInt(FORMAT_CURRENT);
                indexStream.writeInt(FORMAT_CURRENT);
                fieldsStream = fdt;
                fieldsStream.writeInt(FieldsWriterCompress.FORMAT_CURRENT);
            }

            success = true;
        } finally {
            if (!success) {
                abort();
            }
        }
    }

    FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
        directory = null;
        segment = null;
        fieldInfos = fn;
        fieldsStream = fdt;
        indexStream = fdx;
    }

    void setFieldsStream(IndexOutput stream) {
        this.fieldsStream = stream;
    }

    // Writes the contents of buffer into the fields stream
    // and adds a new entry for this document into the index
    // stream.  This assumes the buffer was already written
    // in the correct fields format.
    void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException {
        long pos = fieldsStream.getFilePointer();
        //     LOG.info("flushDocument:"+pos+","+numStoredFields);
        indexStream.writeLong(pos);
        fieldsStream.writeVInt(numStoredFields);
        buffer.writeTo(fieldsStream);
    }

    void skipDocument() throws IOException {
        long pos = fieldsStream.getFilePointer();
        //     LOG.info("skipDocument:"+pos);
        indexStream.writeLong(pos);
        fieldsStream.writeVInt(0);
    }

    void close() throws IOException {
        if (directory != null) {
            try {
                IOUtils.close(fieldsStream, indexStream);
            } finally {
                fieldsStream = indexStream = null;
            }
        }
    }

    void abort() {
        if (directory != null) {
            try {
                close();
            } catch (IOException ignored) {
            }
            try {
                directory.deleteFile(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_EXTENSION));
            } catch (IOException ignored) {
            }
            try {
                directory
                        .deleteFile(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_INDEX_EXTENSION));
            } catch (IOException ignored) {
            }
        }
    }

    final void writeField(FieldInfo fi, Fieldable field) throws IOException {
        fieldsStream.writeVInt(fi.number);
        int bits = 0;
        if (field.isTokenized())
            bits |= FIELD_IS_TOKENIZED;
        if (field.isBinary())
            bits |= FIELD_IS_BINARY;
        if (field instanceof NumericField) {
            switch (((NumericField) field).getDataType()) {
            case INT:
                bits |= FIELD_IS_NUMERIC_INT;
                break;
            case LONG:
                bits |= FIELD_IS_NUMERIC_LONG;
                break;
            case FLOAT:
                bits |= FIELD_IS_NUMERIC_FLOAT;
                break;
            case DOUBLE:
                bits |= FIELD_IS_NUMERIC_DOUBLE;
                break;
            default:
                assert false : "Should never get here";
            }
        }
        fieldsStream.writeByte((byte) bits);

        if (field.isBinary()) {
            final byte[] data;
            final int len;
            final int offset;
            data = field.getBinaryValue();
            len = field.getBinaryLength();
            offset = field.getBinaryOffset();

            fieldsStream.writeVInt(len);
            fieldsStream.writeBytes(data, offset, len);
        } else if (field instanceof NumericField) {
            final NumericField nf = (NumericField) field;
            final Number n = nf.getNumericValue();
            switch (nf.getDataType()) {
            case INT:
                fieldsStream.writeVVInt(n.intValue());
                break;
            case LONG:
                fieldsStream.writeVVLong(n.longValue());
                break;
            case FLOAT:
                fieldsStream.writeVVVInt(Float.floatToIntBits(n.floatValue()));
                break;
            case DOUBLE:
                fieldsStream.writeVVVLong(Double.doubleToLongBits(n.doubleValue()));
                break;
            default:
                assert false : "Should never get here";
            }
        } else {
            fieldsStream.writeString(field.stringValue());
        }
    }

    final void addRawDocuments(final IndexInput stream, long[] lengthsstart, long[] lengthsend, int numDocs)
            throws IOException {
        if (stream instanceof FdtCompressIndexInput) {
            FdtCompressIndexInput inputstream = (FdtCompressIndexInput) stream;
            for (int i = 0; i < numDocs; i++) {
                long position = fieldsStream.getFilePointer();
                indexStream.writeLong(position);
                //            LOG.info("addRawDocuments 1 "+position+","+lengthsstart[i]+","+lengthsend[i]);
                inputstream.writeToPos(fieldsStream, lengthsend[i]);
            }
            return;
        }

        if (fieldsStream instanceof FdtCompressIndexOutput) {
            for (int i = 0; i < numDocs; i++) {
                long position = fieldsStream.getFilePointer();
                indexStream.writeLong(position);
                //               LOG.info("addRawDocuments 2 "+position+","+lengthsstart[i]+","+lengthsend[i]);
                long end = lengthsend[i];
                if (end == -1) {
                    end = stream.length();
                }
                fieldsStream.copyBytes(stream, end - lengthsstart[i]);
            }

            return;
        }

        long position = fieldsStream.getFilePointer();
        long start = position;
        for (int i = 0; i < numDocs; i++) {
            indexStream.writeLong(position);
            //         LOG.info("addRawDocuments 3 "+position+","+lengthsstart[i]+","+lengthsend[i]);

            long end = lengthsend[i];
            if (end == -1) {
                end = stream.length();
            }
            position += end - lengthsstart[i];
        }
        fieldsStream.copyBytes(stream, position - start);
    }

    final void addDocument(Document doc) throws IOException {
        long pos = fieldsStream.getFilePointer();
        indexStream.writeLong(pos);

        //    LOG.info("addDocument "+pos);

        int storedCount = 0;
        List<Fieldable> fields = doc.getFields();
        for (Fieldable field : fields) {
            if (field.isStored())
                storedCount++;
        }
        fieldsStream.writeVInt(storedCount);

        for (Fieldable field : fields) {
            if (field.isStored()) {
                writeField(fieldInfos.fieldInfo(field.name()), field);
            }
        }

    }

}