org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.query.fulltext;

import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;

import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableSet;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.query.Query;
import org.apache.jackrabbit.oak.query.QueryImpl;
import org.apache.jackrabbit.oak.query.ast.AndImpl;
import org.apache.jackrabbit.oak.query.ast.ConstraintImpl;
import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
import org.apache.jackrabbit.oak.query.ast.LiteralImpl;
import org.apache.jackrabbit.oak.query.ast.OrImpl;
import org.apache.jackrabbit.oak.spi.query.PropertyValues;

import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;

/**
 * This class can extract excerpts from node.
 */
public class SimpleExcerptProvider {

    public static final String REP_EXCERPT_FN = "rep:excerpt(.)";
    public static final String EXCERPT_END = "</span></div>";
    public static final String EXCERPT_BEGIN = "<div><span>";

    private static int maxFragmentSize = 150;

    public static String getExcerpt(String path, String columnName, Query query, boolean highlight) {
        if (path == null) {
            return null;
        }
        Tree t = query.getTree(path);
        if (t == null || !t.exists()) {
            return null;
        }
        columnName = extractExcerptProperty(columnName);
        if (columnName != null && columnName.contains("/")) {
            for (String p : PathUtils.elements(PathUtils.getParentPath(columnName))) {
                if (t.hasChild(p)) {
                    t = t.getChild(p);
                } else {
                    return null;
                }
            }
            columnName = PathUtils.getName(columnName);
        }

        StringBuilder text = new StringBuilder();
        String separator = "";
        for (PropertyState p : t.getProperties()) {
            if (p.getType().tag() == Type.STRING.tag()
                    && (columnName == null || columnName.equalsIgnoreCase(p.getName()))) {
                text.append(separator);
                separator = " ";
                for (String v : p.getValue(Type.STRINGS)) {
                    text.append(v);
                }
            }
        }
        Set<String> searchToken = extractFulltext(query);
        if (highlight && searchToken != null) {
            return highlight(text, searchToken);
        }
        return noHighlight(text);
    }

    private static String extractExcerptProperty(String column) {
        // most frequent case first
        if (REP_EXCERPT_FN.equalsIgnoreCase(column)) {
            return null;
        }
        return column.substring(column.indexOf("(") + 1, column.indexOf(")"));
    }

    private static Set<String> extractFulltext(Query q) {
        // TODO instanceof should not be used
        if (q instanceof QueryImpl) {
            return extractFulltext(((QueryImpl) q).getConstraint());
        }
        return ImmutableSet.of();
    }

    private static Set<String> extractFulltext(ConstraintImpl c) {
        Set<String> tokens = new HashSet<String>();
        // TODO instanceof should not be used,
        // as it will break without us noticing if we extend the AST
        if (c instanceof FullTextSearchImpl) {
            FullTextSearchImpl f = (FullTextSearchImpl) c;
            if (f.getFullTextSearchExpression() instanceof LiteralImpl) {
                LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression();
                tokens.add(l.getLiteralValue().getValue(Type.STRING));
            }
        }
        if (c instanceof AndImpl) {
            for (ConstraintImpl constraint : ((AndImpl) c).getConstraints()) {
                tokens.addAll(extractFulltext(constraint));
            }
        }
        if (c instanceof OrImpl) {
            for (ConstraintImpl constraint : ((OrImpl) c).getConstraints()) {
                tokens.addAll(extractFulltext(constraint));
            }
        }
        return tokens;
    }

    private static Set<String> tokenize(Set<String> in) {
        Set<String> tokens = new HashSet<String>();
        for (String s : in) {
            tokens.addAll(tokenize(s));
        }
        return tokens;
    }

    private static Set<String> tokenize(String in) {
        Set<String> out = new HashSet<String>();
        StringBuilder token = new StringBuilder();
        boolean quote = false;
        for (int i = 0; i < in.length();) {
            final int c = in.codePointAt(i);
            int length = Character.charCount(c);
            switch (c) {
            case ' ':
                if (quote) {
                    token.append(' ');
                } else if (token.length() > 0) {
                    out.add(token.toString());
                    token = new StringBuilder();
                }
                break;
            case '"':
            case '\'':
                if (quote) {
                    quote = false;
                    if (token.length() > 0) {
                        out.add(token.toString());
                        token = new StringBuilder();
                    }
                } else {
                    quote = true;
                }
                break;
            default:
                token.append(new String(Character.toChars(c)));
            }
            i += length;
        }
        if (token.length() > 0) {
            out.add(token.toString());
        }
        return out;
    }

    private static String noHighlight(StringBuilder text) {
        if (text.length() > maxFragmentSize) {
            int lastSpace = text.lastIndexOf(" ", maxFragmentSize);
            if (lastSpace != -1) {
                text.setLength(lastSpace);
            } else {
                text.setLength(maxFragmentSize);
            }
            text.append(" ...");
        }
        StringBuilder excerpt = new StringBuilder("<div><span>");
        excerpt.append(encodeIllegalXMLCharacters(text.toString()));
        excerpt.append("</span></div>");
        return excerpt.toString();
    }

    static String highlight(StringBuilder text, Set<String> searchToken) {
        Set<String> tokens = tokenize(searchToken);
        String escaped = encodeIllegalXMLCharacters(text.toString());
        BitSet highlight = new BitSet();
        for (String token : tokens) {
            highlight(escaped, highlight, token);
        }
        StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
        boolean strong = false;
        for (int i = 0; i < escaped.length(); i++) {
            if (highlight.get(i) && !strong) {
                strong = true;
                excerpt.append("<strong>");
            } else if (!highlight.get(i) && strong) {
                strong = false;
                excerpt.append("</strong>");
            }
            excerpt.append(escaped.charAt(i));
        }
        if (strong) {
            excerpt.append("</strong>");
        }
        excerpt.append(EXCERPT_END);
        return excerpt.toString();
    }

    private static void highlight(String text, BitSet highlightBits, String token) {
        boolean isLike = false;
        if (token.endsWith("*")) {
            if (token.length() == 1) {
                // don't highlight the '*' character itself
                return;
            }
            token = token.substring(0, token.length() - 1);
            isLike = true;
        }
        int index = 0;
        while (index < text.length()) {
            index = text.indexOf(token, index);
            if (index < 0) {
                break;
            }
            int endIndex = index + token.length();
            if (isLike) {
                int nextSpace = text.indexOf(" ", endIndex);
                if (nextSpace != -1) {
                    endIndex = nextSpace;
                } else {
                    endIndex = text.length();
                }
            }
            while (index < endIndex) {
                highlightBits.set(index++);
            }
        }
    }

    public static PropertyValue getExcerpt(PropertyValue value) {
        Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings();
        StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
        for (String v : listSplitter.splitToList(value.toString())) {
            excerpt.append(v);
        }
        excerpt.append(EXCERPT_END);
        return PropertyValues.newString(excerpt.toString());
    }
}