org.apache.lucene.document.Field.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.document.Field.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.document;

import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.util.BytesRef;

/**
 * Expert: directly create a field for a document.  Most
 * users should use one of the sugar subclasses: 
 * <ul>
 *    <li>{@link TextField}: {@link Reader} or {@link String} indexed for full-text search
 *    <li>{@link StringField}: {@link String} indexed verbatim as a single token
 *    <li>{@link IntPoint}: {@code int} indexed for exact/range queries.
 *    <li>{@link LongPoint}: {@code long} indexed for exact/range queries.
 *    <li>{@link FloatPoint}: {@code float} indexed for exact/range queries.
 *    <li>{@link DoublePoint}: {@code double} indexed for exact/range queries.
 *    <li>{@link SortedDocValuesField}: {@code byte[]} indexed column-wise for sorting/faceting
 *    <li>{@link SortedSetDocValuesField}: {@code SortedSet<byte[]>} indexed column-wise for sorting/faceting
 *    <li>{@link NumericDocValuesField}: {@code long} indexed column-wise for sorting/faceting
 *    <li>{@link SortedNumericDocValuesField}: {@code SortedSet<long>} indexed column-wise for sorting/faceting
 *    <li>{@link StoredField}: Stored-only value for retrieving in summary results
 * </ul>
 *
 * <p> A field is a section of a Document. Each field has three
 * parts: name, type and value. Values may be text
 * (String, Reader or pre-analyzed TokenStream), binary
 * (byte[]), or numeric (a Number).  Fields are optionally stored in the
 * index, so that they may be returned with hits on the document.
 *
 * <p>
 * NOTE: the field type is an {@link IndexableFieldType}.  Making changes
 * to the state of the IndexableFieldType will impact any
 * Field it is used in.  It is strongly recommended that no
 * changes be made after Field instantiation.
 */
public class Field implements IndexableField {

    /**
     * Field's type
     */
    protected final IndexableFieldType type;

    /**
     * Field's name
     */
    protected final String name;

    /** Field's value */
    protected Object fieldsData;

    /** Pre-analyzed tokenStream for indexed fields; this is
     * separate from fieldsData because you are allowed to
     * have both; eg maybe field has a String value but you
     * customize how it's tokenized */
    protected TokenStream tokenStream;

    /**
     * Expert: creates a field with no initial value.
     * Intended only for custom Field subclasses.
     * @param name field name
     * @param type field type
     * @throws IllegalArgumentException if either the name or type
     *         is null.
     */
    protected Field(String name, IndexableFieldType type) {
        if (name == null) {
            throw new IllegalArgumentException("name must not be null");
        }
        this.name = name;
        if (type == null) {
            throw new IllegalArgumentException("type must not be null");
        }
        this.type = type;
    }

    /**
     * Create field with Reader value.
     * @param name field name
     * @param reader reader value
     * @param type field type
     * @throws IllegalArgumentException if either the name or type
     *         is null, or if the field's type is stored(), or
     *         if tokenized() is false.
     * @throws NullPointerException if the reader is null
     */
    public Field(String name, Reader reader, IndexableFieldType type) {
        if (name == null) {
            throw new IllegalArgumentException("name must not be null");
        }
        if (type == null) {
            throw new IllegalArgumentException("type must not be null");
        }
        if (reader == null) {
            throw new NullPointerException("reader must not be null");
        }
        if (type.stored()) {
            throw new IllegalArgumentException("fields with a Reader value cannot be stored");
        }
        if (type.indexOptions() != IndexOptions.NONE && !type.tokenized()) {
            throw new IllegalArgumentException("non-tokenized fields must use String values");
        }

        this.name = name;
        this.fieldsData = reader;
        this.type = type;
    }

    /**
     * Create field with TokenStream value.
     * @param name field name
     * @param tokenStream TokenStream value
     * @param type field type
     * @throws IllegalArgumentException if either the name or type
     *         is null, or if the field's type is stored(), or
     *         if tokenized() is false, or if indexed() is false.
     * @throws NullPointerException if the tokenStream is null
     */
    public Field(String name, TokenStream tokenStream, IndexableFieldType type) {
        if (name == null) {
            throw new IllegalArgumentException("name must not be null");
        }
        if (tokenStream == null) {
            throw new NullPointerException("tokenStream must not be null");
        }
        if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {
            throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized");
        }
        if (type.stored()) {
            throw new IllegalArgumentException("TokenStream fields cannot be stored");
        }

        this.name = name;
        this.fieldsData = null;
        this.tokenStream = tokenStream;
        this.type = type;
    }

    /**
     * Create field with binary value.
     * 
     * <p>NOTE: the provided byte[] is not copied so be sure
     * not to change it until you're done with this field.
     * @param name field name
     * @param value byte array pointing to binary content (not copied)
     * @param type field type
     * @throws IllegalArgumentException if the field name, value or type
     *         is null, or the field's type is indexed().
     */
    public Field(String name, byte[] value, IndexableFieldType type) {
        this(name, value, 0, value.length, type);
    }

    /**
     * Create field with binary value.
     * 
     * <p>NOTE: the provided byte[] is not copied so be sure
     * not to change it until you're done with this field.
     * @param name field name
     * @param value byte array pointing to binary content (not copied)
     * @param offset starting position of the byte array
     * @param length valid length of the byte array
     * @param type field type
     * @throws IllegalArgumentException if the field name, value or type
     *         is null, or the field's type is indexed().
     */
    public Field(String name, byte[] value, int offset, int length, IndexableFieldType type) {
        this(name, value != null ? new BytesRef(value, offset, length) : null, type);
    }

    /**
     * Create field with binary value.
     *
     * <p>NOTE: the provided BytesRef is not copied so be sure
     * not to change it until you're done with this field.
     * @param name field name
     * @param bytes BytesRef pointing to binary content (not copied)
     * @param type field type
     * @throws IllegalArgumentException if the field name, bytes or type
     *         is null, or the field's type is indexed().
     */
    public Field(String name, BytesRef bytes, IndexableFieldType type) {
        if (name == null) {
            throw new IllegalArgumentException("name must not be null");
        }
        if (bytes == null) {
            throw new IllegalArgumentException("bytes must not be null");
        }
        if (type == null) {
            throw new IllegalArgumentException("type must not be null");
        }
        this.name = name;
        this.fieldsData = bytes;
        this.type = type;
    }

    // TODO: allow direct construction of int, long, float, double value too..?

    /**
     * Create field with String value.
     * @param name field name
     * @param value string value
     * @param type field type
     * @throws IllegalArgumentException if either the name, value or type
     *         is null, or if the field's type is neither indexed() nor stored(), 
     *         or if indexed() is false but storeTermVectors() is true.
     */
    public Field(String name, CharSequence value, IndexableFieldType type) {
        if (name == null) {
            throw new IllegalArgumentException("name must not be null");
        }
        if (value == null) {
            throw new IllegalArgumentException("value must not be null");
        }
        if (type == null) {
            throw new IllegalArgumentException("type must not be null");
        }
        if (!type.stored() && type.indexOptions() == IndexOptions.NONE) {
            throw new IllegalArgumentException(
                    "it doesn't make sense to have a field that " + "is neither indexed nor stored");
        }
        this.name = name;
        this.fieldsData = value;
        this.type = type;
    }

    /**
     * The value of the field as a String, or null. If null, the Reader value or
     * binary value is used. Exactly one of stringValue(), readerValue(), and
     * binaryValue() must be set.
     */
    @Override
    public String stringValue() {
        if (fieldsData instanceof CharSequence || fieldsData instanceof Number) {
            return fieldsData.toString();
        } else {
            return null;
        }
    }

    @Override
    public CharSequence getCharSequenceValue() {
        return fieldsData instanceof CharSequence ? (CharSequence) fieldsData : stringValue();
    }

    /**
     * The value of the field as a Reader, or null. If null, the String value or
     * binary value is used. Exactly one of stringValue(), readerValue(), and
     * binaryValue() must be set.
     */
    @Override
    public Reader readerValue() {
        return fieldsData instanceof Reader ? (Reader) fieldsData : null;
    }

    /**
     * The TokenStream for this field to be used when indexing, or null. If null,
     * the Reader value or String value is analyzed to produce the indexed tokens.
     */
    public TokenStream tokenStreamValue() {
        return tokenStream;
    }

    /**
     * <p>
     * Expert: change the value of this field. This can be used during indexing to
     * re-use a single Field instance to improve indexing speed by avoiding GC
     * cost of new'ing and reclaiming Field instances. Typically a single
     * {@link Document} instance is re-used as well. This helps most on small
     * documents.
     * </p>
     * 
     * <p>
     * Each Field instance should only be used once within a single
     * {@link Document} instance. See <a
     * href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed"
     * >ImproveIndexingSpeed</a> for details.
     * </p>
     */
    public void setStringValue(String value) {
        if (!(fieldsData instanceof String)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to String");
        }
        if (value == null) {
            throw new IllegalArgumentException("value must not be null");
        }
        fieldsData = value;
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setReaderValue(Reader value) {
        if (!(fieldsData instanceof Reader)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Reader");
        }
        fieldsData = value;
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setBytesValue(byte[] value) {
        setBytesValue(new BytesRef(value));
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     *
     * <p>NOTE: the provided BytesRef is not copied so be sure
     * not to change it until you're done with this field.
     */
    public void setBytesValue(BytesRef value) {
        if (!(fieldsData instanceof BytesRef)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to BytesRef");
        }
        if (type.indexOptions() != IndexOptions.NONE) {
            throw new IllegalArgumentException("cannot set a BytesRef value on an indexed field");
        }
        if (value == null) {
            throw new IllegalArgumentException("value must not be null");
        }
        fieldsData = value;
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setByteValue(byte value) {
        if (!(fieldsData instanceof Byte)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Byte");
        }
        fieldsData = Byte.valueOf(value);
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setShortValue(short value) {
        if (!(fieldsData instanceof Short)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Short");
        }
        fieldsData = Short.valueOf(value);
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setIntValue(int value) {
        if (!(fieldsData instanceof Integer)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Integer");
        }
        fieldsData = Integer.valueOf(value);
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setLongValue(long value) {
        if (!(fieldsData instanceof Long)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Long");
        }
        fieldsData = Long.valueOf(value);
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setFloatValue(float value) {
        if (!(fieldsData instanceof Float)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Float");
        }
        fieldsData = Float.valueOf(value);
    }

    /**
     * Expert: change the value of this field. See 
     * {@link #setStringValue(String)}.
     */
    public void setDoubleValue(double value) {
        if (!(fieldsData instanceof Double)) {
            throw new IllegalArgumentException(
                    "cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Double");
        }
        fieldsData = Double.valueOf(value);
    }

    /**
     * Expert: sets the token stream to be used for indexing and causes
     * isIndexed() and isTokenized() to return true. May be combined with stored
     * values from stringValue() or binaryValue()
     */
    public void setTokenStream(TokenStream tokenStream) {
        if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {
            throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized");
        }
        this.tokenStream = tokenStream;
    }

    @Override
    public String name() {
        return name;
    }

    @Override
    public Number numericValue() {
        if (fieldsData instanceof Number) {
            return (Number) fieldsData;
        } else {
            return null;
        }
    }

    @Override
    public BytesRef binaryValue() {
        if (fieldsData instanceof BytesRef) {
            return (BytesRef) fieldsData;
        } else {
            return null;
        }
    }

    /** Prints a Field for human consumption. */
    @Override
    public String toString() {
        StringBuilder result = new StringBuilder();
        result.append(type.toString());
        result.append('<');
        result.append(name);
        result.append(':');

        if (fieldsData != null) {
            result.append(fieldsData);
        }

        result.append('>');
        return result.toString();
    }

    /** Returns the {@link FieldType} for this field. */
    @Override
    public IndexableFieldType fieldType() {
        return type;
    }

    @Override
    public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
        if (fieldType().indexOptions() == IndexOptions.NONE) {
            // Not indexed
            return null;
        }

        if (!fieldType().tokenized()) {
            if (stringValue() != null) {
                if (!(reuse instanceof StringTokenStream)) {
                    // lazy init the TokenStream as it is heavy to instantiate
                    // (attributes,...) if not needed
                    reuse = new StringTokenStream();
                }
                ((StringTokenStream) reuse).setValue(stringValue());
                return reuse;
            } else if (binaryValue() != null) {
                if (!(reuse instanceof BinaryTokenStream)) {
                    // lazy init the TokenStream as it is heavy to instantiate
                    // (attributes,...) if not needed
                    reuse = new BinaryTokenStream();
                }
                ((BinaryTokenStream) reuse).setValue(binaryValue());
                return reuse;
            } else {
                throw new IllegalArgumentException("Non-Tokenized Fields must have a String value");
            }
        }

        if (tokenStream != null) {
            return tokenStream;
        } else if (readerValue() != null) {
            return analyzer.tokenStream(name(), readerValue());
        } else if (stringValue() != null) {
            return analyzer.tokenStream(name(), stringValue());
        }

        throw new IllegalArgumentException(
                "Field must have either TokenStream, String, Reader or Number value; got " + this);
    }

    private static final class BinaryTokenStream extends TokenStream {
        private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
        private boolean used = true;
        private BytesRef value;

        /** Creates a new TokenStream that returns a BytesRef as single token.
         * <p>Warning: Does not initialize the value, you must call
         * {@link #setValue(BytesRef)} afterwards!
         */
        BinaryTokenStream() {
        }

        public void setValue(BytesRef value) {
            this.value = value;
        }

        @Override
        public boolean incrementToken() {
            if (used) {
                return false;
            }
            clearAttributes();
            bytesAtt.setBytesRef(value);
            used = true;
            return true;
        }

        @Override
        public void reset() {
            used = false;
        }

        @Override
        public void close() {
            value = null;
        }
    }

    private static final class StringTokenStream extends TokenStream {
        private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
        private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
        private boolean used = true;
        private String value = null;

        /** Creates a new TokenStream that returns a String as single token.
         * <p>Warning: Does not initialize the value, you must call
         * {@link #setValue(String)} afterwards!
         */
        StringTokenStream() {
        }

        /** Sets the string value. */
        void setValue(String value) {
            this.value = value;
        }

        @Override
        public boolean incrementToken() {
            if (used) {
                return false;
            }
            clearAttributes();
            termAttribute.append(value);
            offsetAttribute.setOffset(0, value.length());
            used = true;
            return true;
        }

        @Override
        public void end() throws IOException {
            super.end();
            final int finalOffset = value.length();
            offsetAttribute.setOffset(finalOffset, finalOffset);
        }

        @Override
        public void reset() {
            used = false;
        }

        @Override
        public void close() {
            value = null;
        }
    }

    /** Specifies whether and how a field should be stored. */
    public static enum Store {

        /** Store the original field value in the index. This is useful for short texts
         * like a document's title which should be displayed with the results. The
         * value is stored in its original form, i.e. no analyzer is used before it is
         * stored.
         */
        YES,

        /** Do not store the field value in the index. */
        NO
    }
}