org.carrot2.source.lucene.SimpleFieldMapper.java Source code

Java tutorial

Introduction

Here is the source code for org.carrot2.source.lucene.SimpleFieldMapper.java

Source

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2016, Dawid Weiss, Stanisaw Osiski.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.source.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.carrot2.core.attribute.*;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

/**
 * A simple {@link IFieldMapper} with one-to-one mapping between the default title, url
 * and summary fields.
 */
@Bindable
public class SimpleFieldMapper implements IFieldMapper {
    /** {@link Group} name. */
    final static String INDEX_FIELD_MAPPING = "Index field mapping";
    /** {@link Group} name. */
    final static String HIGHLIGHTER = "Highlighter";

    /**
     * Document title field name.
     */
    @Input
    @Attribute
    @Init
    @Processing
    @Internal(configuration = true)
    @Label("Document title field")
    @Level(AttributeLevel.BASIC)
    @Group(INDEX_FIELD_MAPPING)
    public String titleField;

    /**
     * Document content field name.
     */
    @Input
    @Attribute
    @Init
    @Processing
    @Internal(configuration = true)
    @Label("Document content field")
    @Level(AttributeLevel.BASIC)
    @Group(INDEX_FIELD_MAPPING)
    public String contentField;

    /**
     * Document URL field name.
     */
    @Input
    @Attribute
    @Init
    @Processing
    @Internal(configuration = true)
    @Label("Document URL field")
    @Level(AttributeLevel.BASIC)
    @Group(INDEX_FIELD_MAPPING)
    public String urlField;

    /**
     * Index search field names. If not specified, title and content fields are used.
     */
    @Input
    @Attribute
    @Init
    @Processing
    @Internal(configuration = true)
    @Label("Search fields")
    @Level(AttributeLevel.MEDIUM)
    @Group(INDEX_FIELD_MAPPING)
    public List<String> searchFields;

    /**
     * Snippet formatter for the highlighter. Highlighter is not used if <code>null</code>.
     */
    @Input
    @Attribute
    @Init
    @Processing
    @ImplementingClasses(classes = { PlainTextFormatter.class, SimpleHTMLFormatter.class }, strict = false)
    @Label("Formatter")
    @Level(AttributeLevel.ADVANCED)
    @Group(HIGHLIGHTER)
    public Formatter formatter = new PlainTextFormatter();

    /**
     * Number of context fragments for the highlighter.
     */
    @Input
    @Attribute
    @Init
    @Processing
    @IntRange(min = 1)
    @Label("Context fragments")
    @Level(AttributeLevel.ADVANCED)
    @Group(HIGHLIGHTER)
    public int contextFragments = 3;

    /**
     * A string used to join context fragments when highlighting.
     */
    @Input
    @Attribute
    @Init
    @Processing
    @Label("Join string")
    @Level(AttributeLevel.ADVANCED)
    @Group(HIGHLIGHTER)
    public String fragmentJoin = "...";

    /**
     * Last initialized highlighter.
     */
    private Highlighter highlighter;

    /**
     * Last received {@link Query} object in
     * {@link #map(Query, Analyzer, Document, org.carrot2.core.Document)}.
     */
    private Query query;

    /*
     * 
     */
    public String[] getSearchFields() {
        if (searchFields == null || searchFields.size() == 0) {
            ArrayList<String> fields = new ArrayList<String>();
            if (!StringUtils.isEmpty(titleField)) {
                fields.add(titleField);
            }

            if (!StringUtils.isEmpty(contentField)) {
                fields.add(contentField);
            }

            return fields.toArray(new String[fields.size()]);
        }
        return searchFields.toArray(new String[searchFields.size()]);
    }

    /*
     * 
     */
    public void map(Query luceneQuery, Analyzer analyzer, Document luceneDoc, org.carrot2.core.Document doc) {
        if (luceneQuery != query) {
            this.query = luceneQuery;
            resetHighlighter();
        }

        /*
         * Map title and url
         */
        String value = fieldValue(titleField, luceneDoc);
        if (value != null) {
            doc.setField(org.carrot2.core.Document.TITLE, value);
        }

        value = fieldValue(urlField, luceneDoc);
        if (value != null) {
            doc.setField(org.carrot2.core.Document.CONTENT_URL, value);
        }

        /*
         * Map content field.
         */
        value = fieldValue(contentField, luceneDoc);
        if (value != null) {
            try {
                final String summary;
                if (this.highlighter != null) {
                    final String[] fragments = highlighter.getBestFragments(analyzer, contentField, value,
                            contextFragments);

                    if (fragments.length > 0) {
                        summary = StringUtils.join(fragments, fragmentJoin);
                    } else {
                        summary = value;
                    }
                } else {
                    summary = value;
                }
                doc.setField(org.carrot2.core.Document.SUMMARY, summary);
            } catch (IOException e) {
                throw ExceptionUtils.wrapAsRuntimeException(e);
            } catch (InvalidTokenOffsetsException e) {
                throw ExceptionUtils.wrapAsRuntimeException(e);
            }
        }
    }

    /*
     * 
     */
    private String fieldValue(String fieldName, Document doc) {
        if (StringUtils.isEmpty(fieldName)) {
            return null;
        }

        StringBuilder builder = null;
        for (IndexableField field : doc.getFields()) {
            if (field.name().equals(fieldName)) {
                if (builder == null)
                    builder = new StringBuilder();
                if (builder.length() > 0)
                    builder.append(" ");

                builder.append(field.stringValue());
            }
        }

        return builder == null ? null : builder.toString();
    }

    /*
     * 
     */
    private void resetHighlighter() {
        if (formatter != null) {
            this.highlighter = new Highlighter(formatter, new QueryScorer(query));
            this.highlighter.setEncoder(new DefaultEncoder());
        } else {
            this.highlighter = null;
        }
    }
}