com.edgenius.wiki.search.service.AbstractSearchService.java Source code

Introduction

Here is the source code for com.edgenius.wiki.search.service.AbstractSearchService.java
Source

/* 
 * =============================================================
 * Copyright (C) 2007-2011 Edgenius (http://www.edgenius.com)
 * =============================================================
 * License Information: http://www.edgenius.com/licensing/edgenius/2.0/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2.0
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 *
 * http://www.gnu.org/licenses/gpl.txt
 *  
 * ****************************************************************
 */
package com.edgenius.wiki.search.service;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;

import com.edgenius.core.SecurityValues.OPERATIONS;
import com.edgenius.core.model.User;
import com.edgenius.core.service.UserReadingService;
import com.edgenius.wiki.gwt.client.model.WidgetModel;
import com.edgenius.wiki.gwt.client.server.utils.SharedConstants;
import com.edgenius.wiki.gwt.client.server.utils.StringUtil;
import com.edgenius.wiki.search.lucene.IndexSearcherSupport;
import com.edgenius.wiki.search.lucene.LuceneConfig;
import com.edgenius.wiki.search.lucene.SearcherCallback;
import com.edgenius.wiki.security.service.SecurityService;

/**
 * @author Dapeng.Ni
 */
public abstract class AbstractSearchService extends IndexSearcherSupport {
    protected static final int FRAGMENT_LEN = 200;

    protected final Logger log = LoggerFactory.getLogger(this.getClass());

    protected SecurityService securityService;
    protected UserReadingService userReadingService;

    protected SearchResult emptyResult(final String keyword, final int currPageNumber) {

        SearchResult rs = new SearchResult();
        rs.setCurrentPage(currPageNumber);
        rs.setTotalPage(0);
        rs.setTimeSecond(0);
        rs.setKeyword(keyword);
        return rs;
    }

    protected SearchResult commonSearch(final String keyword, final int currPageNumber, final int returnCount,
            final User user, final String... advance) throws SearchException {

        try {
            return (SearchResult) this.search(new SearcherCallback() {
                public Object doWithSearcher(IndexSearcher searcher) throws SearchException {
                    try {
                        if (StringUtils.trimToEmpty(keyword).length() > 0) {
                            Query[] queries = createQuery(keyword, advance);
                            Sort sort = createSort(advance);
                            // Don't use Lucene default filter function to filter out no reading permission results 
                            // it is too slow - it will retrieve all documents in index - whatever it is matched or not
                            //Filter filter = new SecurityFilter(user);
                            TopDocs hits;
                            if (sort == null) {
                                hits = searcher.search(queries[0], LuceneConfig.MAX_RETURN);
                            } else {
                                hits = searcher.search(queries[0], LuceneConfig.MAX_RETURN, sort);
                            }

                            SearchResult rs = getResult(searcher, hits, keyword, currPageNumber, returnCount, user,
                                    queries[1]);
                            return rs;
                        } else {
                            return emptyResult(keyword, currPageNumber);
                        }
                    } catch (Exception e) {
                        throw new SearchException(e);
                    }
                }

            });
        } catch (Exception e) {
            log.error("Search failed ", e);
            throw new SearchException(e);
        }
    }

    // ********************************************************************
    // private class
    // ********************************************************************
    private Sort createSort(String[] advance) {
        if (advance != null) {
            //parse all possible type
            for (String str : advance) {
                if (str.length() < 2)
                    continue;
                if (str.charAt(0) == SearchService.ADV_GROUP_BY) {
                    if (NumberUtils
                            .toInt(Character.valueOf(str.charAt(1)).toString()) == SearchService.GROUP_SPACE) {
                        return new Sort(new SortField(FieldName.UNSEARCH_SPACE_UNIXNAME, SortField.STRING));
                    } else if (NumberUtils
                            .toInt(Character.valueOf(str.charAt(1)).toString()) == SearchService.GROUP_TYPE) {
                        return new Sort(new SortField(FieldName.DOC_TYPE, SortField.INT));
                    }
                }
            }
        }
        return null;
    }

    /**
     * 
     * @param keyword
     * @param advance
     * @return 2 elements: first is keyword query with advance query criteria. Last is highlight query.
     * @throws ParseException
     */
    private Query[] createQuery(String keyword, String... advance) throws ParseException {

        //default value
        int advKeyword = 0;
        String space = null;
        int srcType = 0;
        long from = 0, to = 0;
        //group by space - need filter out some special type
        boolean groupBySpace = false;
        if (advance != null) {
            //parse all possible type
            for (String str : advance) {
                if (str.length() < 2)
                    continue;

                if (str.charAt(0) == SearchService.ADV_KEYWORD_TYPE)
                    advKeyword = NumberUtils.toInt(str.substring(1));

                if (str.charAt(0) == SearchService.ADV_SPACE)
                    space = str.substring(1);

                if (str.charAt(0) == SearchService.ADV_SOURCE_TYPES)
                    srcType = NumberUtils.toInt(str.substring(1));

                if (str.charAt(0) == SearchService.ADV_DATE_SCOPE) {
                    String fromStr = str.substring(1);
                    int sep = fromStr.indexOf(":");
                    if (sep != -1) {
                        from = NumberUtils.toLong(fromStr.substring(0, sep));
                        to = NumberUtils.toLong(fromStr.substring(sep + 1));
                        if (from != 0 && to == 0) {
                            //from: (to is missed)
                            to = new Date().getTime();
                        }
                    } else {
                        from = NumberUtils.toLong(fromStr);
                        to = new Date().getTime();
                    }
                }

                if (str.charAt(0) == SearchService.ADV_GROUP_BY && NumberUtils
                        .toInt(Character.valueOf(str.charAt(1)).toString()) == SearchService.GROUP_SPACE) {
                    groupBySpace = true;
                }

            }
        }

        keyword = QueryParser.escape(StringUtils.trimToEmpty(keyword));
        QueryParser parser = new QueryParser(LuceneConfig.VERSION, FieldName.CONTENT,
                searcherFactory.getAnalyzer());

        if (advKeyword == SearchService.KEYWORD_EXACT)
            keyword = "\"" + keyword + "\"";
        else if (advKeyword == SearchService.KEYWORD_ALL) {
            String[] words = keyword.split(" ");
            StringBuffer buf = new StringBuffer();
            for (String word : words) {
                buf.append("+").append(word).append(" ");
            }
            keyword = buf.toString().trim();
        }

        //highlight query on return result, could be null
        Query hlQuery = null;
        if (keyword.length() > 0) {
            //Lucene throw exception if keyword is empty
            hlQuery = parser.parse(keyword);
        }
        //date scope
        if (from != 0 && to != 0) {
            keyword += " [" + from + " TO " + to + "]";
        }
        Query keyQuery = parser.parse(keyword);

        Query spaceQuery = null;

        //space scope
        if (!StringUtils.isBlank(space)) {
            //as here is not necessary to limit search type: as only page, comment, pageTag, attachment 
            //has UNSEARCH_SPACE_UNIXNAME field, so the search type is already restrict these four types
            spaceQuery = new TermQuery(new Term(FieldName.UNSEARCH_SPACE_UNIXNAME, space.trim().toLowerCase()));
        }

        //type filter
        List<Query> typeQueries = null;
        if (srcType != 0) {
            typeQueries = new ArrayList<Query>();
            TermQuery tq;
            if ((srcType & SearchService.INDEX_PAGE) > 0) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_PAGE + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_SPACE) > 0 && !groupBySpace) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_SPACE + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_COMMENT) > 0) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_COMMENT + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_ROLE) > 0 && !groupBySpace) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_ROLE + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_USER) > 0 && !groupBySpace) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_USER + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_TAGONPAGE) > 0) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_PAGE_TAG + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_TAGONSPACE) > 0 && !groupBySpace) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_SPACE_TAG + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_ATTACHMENT) > 0) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_ATTACHMENT + ""));
                typeQueries.add(tq);
            }
            if ((srcType & SearchService.INDEX_WIDGET) > 0 && !groupBySpace) {
                tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_WIDGET + ""));
                typeQueries.add(tq);
            }
        }

        BooleanQuery query = new BooleanQuery();
        boolean queryIsEmpty = true;
        if (keyQuery != null) {
            queryIsEmpty = false;
            query.add(keyQuery, Occur.MUST);
        }
        if (spaceQuery != null) {
            queryIsEmpty = false;
            query.add(spaceQuery, Occur.MUST);
        }
        if (groupBySpace && (typeQueries == null || typeQueries.size() == 0)) {
            //users don't choose any special type, but sort by space, so it need give limitation on search type
            //so far - page, comment, attachment, tag on page are for Space type
            typeQueries = new ArrayList<Query>();
            TermQuery tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_PAGE + ""));
            typeQueries.add(tq);
            tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_COMMENT + ""));
            typeQueries.add(tq);
            tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_COMMENT + ""));
            typeQueries.add(tq);
            tq = new TermQuery(new Term(FieldName.DOC_TYPE, SharedConstants.SEARCH_PAGE_TAG + ""));
            typeQueries.add(tq);
        }

        if (typeQueries != null && typeQueries.size() > 0) {
            BooleanQuery typeQuery = new BooleanQuery();
            for (Query typeQ : typeQueries) {
                typeQuery.add(typeQ, Occur.SHOULD);
            }
            queryIsEmpty = false;
            query.add(typeQuery, Occur.MUST);
        }

        return new Query[] { queryIsEmpty ? null : query, hlQuery };
    }

    private SearchResult getResult(IndexSearcher searcher, TopDocs hits, String keyword, int currPageNumber, int ps,
            User user, Query hlQuery) throws IOException {
        // failure tolerance
        if (currPageNumber < 1)
            currPageNumber = 1;

        long time = System.currentTimeMillis();
        List<SearchResultItem> detachedDocs = new ArrayList<SearchResultItem>();

        int from = (currPageNumber - 1) * ps;
        int total = detach(searcher, detachedDocs, hits, hlQuery, from, from + ps, user);

        int numberOfPages = (int) Math.ceil((float) total / ps);

        time = System.currentTimeMillis() - time;
        SearchResult rs = new SearchResult();
        rs.setCurrentPage(currPageNumber);
        rs.setTotalPage(numberOfPages);
        rs.setTimeSecond(time / 1000);
        rs.setItems(detachedDocs);
        rs.setKeyword(keyword);
        rs.setTotalItem(total);
        return rs;
    }

    private int detach(IndexSearcher searcher, List<SearchResultItem> viewableMatchedResults, TopDocs hits,
            Query hlQuery, int from, int to, User user) throws IOException {

        Assert.isTrue(from <= to && from >= 0 && (to >= 0 || to == -1));

        //For performance issue, we simply return total result set length without permission filter out.
        //This means is total length might be larger than the set that user can view, as some result will be filter out
        //if user doesn't have permission to see.
        int len = hits.totalHits;

        if (len > 0 && from < len) {
            to = to == -1 ? len : (len > to ? to : len);
            //security filter from return result

            List<Integer> resultIdx = new ArrayList<Integer>();
            for (int idx = from; idx < to; idx++) {
                //does not include "to" , For example, from:to is 0:10, then return index is 0-9

                //TODO: if page includes some result that invisible to user, it is better display message to tell user
                //some result is hidden for security reason.
                if (!isAllowView(searcher.doc(hits.scoreDocs[idx].doc), user))
                    continue;

                resultIdx.add(idx);
            }

            //create a highlighter for all fragment parser
            Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");
            Highlighter hl = null;
            if (hlQuery != null) {
                Scorer scorer = new QueryScorer(hlQuery);
                hl = new Highlighter(formatter, scorer);
                Fragmenter fragmenter = new SimpleFragmenter(FRAGMENT_LEN);
                hl.setTextFragmenter(fragmenter);
            }

            for (int idx : resultIdx) {
                SearchResultItem item = new SearchResultItem();

                Document doc = searcher.doc(hits.scoreDocs[idx].doc);
                String docType = doc.get(FieldName.DOC_TYPE);

                //common items in search results
                item.setType(NumberUtils.toInt(docType));
                item.setDatetime(doc.get(FieldName.UPDATE_DATE));
                if (userReadingService != null
                        && !new Integer(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                    String username = doc.get(FieldName.CONTRIBUTOR);
                    User contirUser = userReadingService.getUserByName(username);
                    if (contirUser != null) {
                        item.setContributor(contirUser.getFullname());
                        item.setContributorUsername(username);
                    }
                }
                if (Integer.valueOf(SharedConstants.SEARCH_PAGE).toString().equals(docType)) {
                    String content = doc.get(FieldName.PAGE_CONTENT);
                    item.setTitle(doc.get(FieldName.PAGE_TITLE));
                    item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));

                    //does set item.desc() as content, which maybe very big string. no necessary returned
                    item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), content)));

                } else if (Integer.valueOf(SharedConstants.SEARCH_COMMENT).toString().equals(docType)) {
                    String content = doc.get(FieldName.CONTENT);

                    item.setItemUid(doc.get(FieldName.COMMENT_UID));
                    item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                    item.setTitle(doc.get(FieldName.UNSEARCH_PAGE_TITLE));
                    //does set item.desc() as content, which maybe very big string. no necessary returned
                    item.setFragment(createFragment(hl, content));

                } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE).toString().equals(docType)) {
                    String title = doc.get(FieldName.SPACE_NAME);
                    item.setTitle(title);
                    item.setSpaceUname(doc.get(FieldName.SPACE_UNIXNAME));
                    item.setDesc(doc.get(FieldName.SPACE_DESC));
                    item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), item.getDesc())));

                } else if (Integer.valueOf(SharedConstants.SEARCH_WIDGET).toString().equals(docType)) {
                    //wTitle-> title; wDesc-> desc; wTitle(could be pageTitle or markup title) ->spaceUname
                    String widgetType = doc.get(FieldName.WIDGET_TYPE);
                    String title = doc.get(FieldName.WIDGET_TITLE);

                    //does content need transfer back?? so far no
                    String content = doc.get(FieldName.WIDGET_CONTENT);
                    if (WidgetModel.TYPE_PAGE_LINKER.equals(widgetType)) {
                        //don't use as Highlighter fragment
                        content = "";
                    }

                    String desc = doc.get(FieldName.WIDGET_DESC);

                    item.setDesc(desc);
                    item.setTitle(title);

                    //add little confuse field mapping :(
                    item.setSpaceUname(doc.get(FieldName.WIDGET_KEY));
                    item.setItemUid(widgetType);

                    item.setFragment(createFragment(hl, StringUtil.join(" ", item.getDesc(), content)));

                } else if (Integer.valueOf(SharedConstants.SEARCH_PAGE_TAG).toString().equals(docType)) {
                    //page tag
                    item.setTitle(doc.get(FieldName.PAGE_TAG_NAME));
                    item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                    item.setFragment(createFragment(hl, item.getTitle()));
                } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE_TAG).toString().equals(docType)) {
                    //space tag
                    item.setTitle(doc.get(FieldName.SPACE_TAG_NAME));
                    item.setFragment(createFragment(hl, item.getTitle()));

                } else if (Integer.valueOf(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                    String username = doc.get(FieldName.USER_NAME);
                    item.setTitle(username);
                    String fullname = doc.get(FieldName.USER_FULLNAME);
                    //hacker - contributor is current user fullname
                    item.setContributor(fullname);
                    if (userReadingService != null)
                        item.setDesc(userReadingService.getUserByName(username).getSetting().getStatus());
                    item.setFragment(createFragment(hl, fullname));

                } else if (Integer.valueOf(SharedConstants.SEARCH_ROLE).toString().equals(docType)) {
                    item.setSpaceUname(doc.get(FieldName.ROLE_NAME));
                    item.setTitle(doc.get(FieldName.ROLE_DISPLAY_NAME));
                    item.setDesc(doc.get(FieldName.ROLE_DESC));
                    //item.setFragment("");

                } else if (Integer.valueOf(SharedConstants.SEARCH_ATTACHMENT).toString().equals(docType)) {
                    item.setTitle(doc.get(FieldName.FILE_NAME));
                    item.setDesc(doc.get(FieldName.FILE_COMMENT));
                    item.setItemUid(doc.get(FieldName.FILE_NODE_UUID));
                    item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                    String text = doc.get(FieldName.TEXT);
                    //does not mark file content fragment, because it does not store in index
                    String fragment = createFragment(hl, StringUtil.join(" ", item.getDesc(), text));
                    item.setFragment(
                            (fragment == null || fragment.trim().length() == 0) ? ("Comment: " + item.getDesc())
                                    : fragment);
                }
                viewableMatchedResults.add(item);
            }
        }
        return len;
    }

    private boolean isAllowView(Document doc, User user) {
        long s = System.currentTimeMillis();
        boolean readAllow = true;
        int docType = Integer.valueOf(doc.get(FieldName.DOC_TYPE));
        String spaceUname;
        if (docType == SharedConstants.SEARCH_SPACE) {
            // check space permission
            spaceUname = doc.get(FieldName.SPACE_UNIXNAME);
            //only private space need check if this space allow read or not, for public space, space is always searchable.
            if (securityService.isPrivateSpace(spaceUname)) {
                readAllow = securityService.isAllowSpaceReading(spaceUname, user);
            }
        } else if (docType == SharedConstants.SEARCH_PAGE) {
            readAllow = securityService.isAllowPageReading(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME),
                    doc.get(FieldName.PAGE_UUID), user);
        } else if (docType == SharedConstants.SEARCH_WIDGET) {
            readAllow = securityService.isAllowWidget(OPERATIONS.READ, doc.get(FieldName.WIDGET_KEY), user);

        } else if (docType == SharedConstants.SEARCH_COMMENT) {
            readAllow = securityService.isAllowPageReading(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME),
                    doc.get(FieldName.PAGE_UUID), user);
        } else if (docType == SharedConstants.SEARCH_PAGE_TAG) {
            //even space is public, tag is only searchable for read permission user. 
            readAllow = securityService.isAllowSpaceReading(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME), user);
        } else if (docType == SharedConstants.SEARCH_ATTACHMENT) {
            if (Boolean.valueOf(doc.get(FieldName.FILE_SHARED)).booleanValue()) {
                //even space is public, tag is only searchable for read permission user. 
                //shared, only need check space level permission
                readAllow = securityService.isAllowSpaceReading(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME), user);
            } else {
                //non-shared, need check page level permission
                readAllow = securityService.isAllowPageReading(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME),
                        doc.get(FieldName.PAGE_UUID), user);
            }
        }

        log.info("Check read permission for search result takes {}ms", (System.currentTimeMillis() - s));
        //skip TYPE_SPACE_TAG and TYPE_USER: both all compliant with Instance Reading Permission, which checked before search start.
        return readAllow;
    }

    /**
     * Match all given name-value pairs, return combined fragment. For example, spaceUname and space desc have matched
     * fragment, then these 2 pieces are merge into one String fragment and return.
     * @param namedValues
     * @return
     * @throws IOException
     */
    private String createFragment(Highlighter hl, String content) throws IOException {
        if (content == null)
            return "";

        if (hl == null)
            return content;

        TokenStream tokenStream = searcherFactory.getAnalyzer().tokenStream(FieldName.CONTENT,
                new StringReader(content));
        String frag;
        try {
            frag = hl.getBestFragments(tokenStream, content, 3, "...");
        } catch (InvalidTokenOffsetsException e) {
            log.error("Highlight fragment error", e);
            frag = StringUtils.abbreviate(content, FRAGMENT_LEN);
        }

        return frag;
    }

    public void setSecurityService(SecurityService securityService) {
        this.securityService = securityService;
    }

    public void setUserReadingService(UserReadingService userReadingService) {
        this.userReadingService = userReadingService;
    }

}