org.paxle.se.index.lucene.impl.SnippetFetcher.java Source code

Java tutorial

Introduction

Here is the source code for org.paxle.se.index.lucene.impl.SnippetFetcher.java

Source

/**
 * This file is part of the Paxle project.
 * Visit http://www.paxle.net for more information.
 * Copyright 2007-2010 the original author or authors.
 *
 * Licensed under the terms of the Common Public License 1.0 ("CPL 1.0").
 * Any use, reproduction or distribution of this program constitutes the recipient's acceptance of this agreement.
 * The full license text is available under http://www.opensource.org/licenses/cpl1.0.txt
 * or in the file LICENSE.txt in the root directory of the Paxle distribution.
 *
 * Unless required by applicable law or agreed to in writing, this software is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

package org.paxle.se.index.lucene.impl;

import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.InvocationHandler;
import java.lang.reflect.Method;
import java.lang.reflect.Proxy;
import java.net.URI;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.paxle.core.IMWComponent;
import org.paxle.core.doc.ICommand;
import org.paxle.core.doc.IDocumentFactory;
import org.paxle.core.doc.IIndexerDocument;
import org.paxle.core.doc.IParserDocument;
import org.paxle.core.doc.ICommand.Result;
import org.paxle.core.doc.IParserDocument.Status;
import org.paxle.core.io.IIOTools;

@Component(immediate = true, metatype = false)
@Service(ISnippetFetcher.class)
public class SnippetFetcher implements ISnippetFetcher {
    /**
     * Thread pool service
     */
    private ExecutorService execService;

    /**
     * For logging
     */
    protected Log logger = LogFactory.getLog(this.getClass());

    /**
     * The crawler component
     */
    @Reference(target = "(mwcomponent.ID=org.paxle.crawler)")
    protected IMWComponent<ICommand> crawler;

    /**
     * The parser component
     */
    @Reference(target = "(mwcomponent.ID=org.paxle.parser)")
    protected IMWComponent<ICommand> parser;

    /**
     * The indexer component
     */
    @Reference(target = "(docType=org.paxle.core.doc.ICommand)")
    protected IDocumentFactory docFactory;

    @Reference
    protected IStopwordsManager stopwordsManager;

    @Reference
    protected IIOTools ioTools;

    /**
     * The default {@link Analyzer}
     */
    protected PaxleAnalyzer analyzer;

    @Activate
    protected void activate(Map<String, Object> props) {
        this.analyzer = this.stopwordsManager.getDefaultAnalyzer();
        this.execService = Executors.newCachedThreadPool();
    }

    @Deactivate
    protected void deactivate() {
        // shutdown thread-pool
        this.execService.shutdown();
    }

    public String getSnippet(Query query, String locationStr) {
        Reader textReader = null;
        try {
            // creating a dummy command
            URI locationURI = URI.create(locationStr);
            ICommand cmd = this.docFactory.createDocument(ICommand.class);
            cmd.setLocation(locationURI);

            // crawling the resource
            this.crawler.process(cmd);
            if (cmd.getResult() != Result.Passed)
                return null;

            // parsing the resource
            this.parser.process(cmd);
            if (cmd.getResult() != Result.Passed)
                return null;

            // trying to get the parsed content
            IParserDocument pdoc = cmd.getParserDocument();
            if (pdoc == null)
                return null;
            else if (pdoc.getStatus() != Status.OK)
                return null;

            // getting the document content
            textReader = pdoc.getTextAsReader();
            if (textReader == null)
                return null;

            // reading some text
            StringBuilder text = new StringBuilder();
            this.ioTools.copy(textReader, text, 10240);

            final Highlighter highlighter = new Highlighter(new QueryScorer(query));
            final TokenStream tokenStream = this.analyzer.tokenStream("content", new StringReader(text.toString()));
            final String result = highlighter.getBestFragments(tokenStream, text.toString(), 3, "...");

            return result;
        } catch (Throwable e) {
            this.logger.error(e.getMessage(), e);
        } finally {
            // closing reader
            if (textReader != null) {
                try {
                    textReader.close();
                } catch (Exception e) {
                    this.logger.error(e.getMessage(), e);
                }
            }
        }

        return null;
    }

    /**
     * Method to generate a dynamic proxy around an {@link IIndexerDocument}
     * @param idoc the {@link IIndexerDocument} to wrap
     * @param query the query as entered by the user
     * @param deadline a point in time when the snippet generation should have finished
     * @return a wrapped {@link IIndexerDocument}
     */
    public IIndexerDocument createProxy(IIndexerDocument idoc, Query query, long deadline) {
        IIndexerDocument idocProxy = (IIndexerDocument) Proxy.newProxyInstance(
                IIndexerDocument.class.getClassLoader(), new Class[] { IIndexerDocument.class },
                new SnippetFetchingWrapper(idoc, query, deadline));
        return idocProxy;
    }

    /**
     * This class is a dynamic wrapper around an {@link IIndexerDocument}, intercepts
     * method calls to {@link IIndexerDocument#get(org.paxle.core.doc.Field)} and 
     * injects snippets fetched asynchronous by the {@link ExecutorService}
     */
    private class SnippetFetchingWrapper implements InvocationHandler, Callable<String> {
        /**
         * The {@link IIndexerDocument indexer-document} we need to generate a snippet for
         */
        private IIndexerDocument idoc;

        /**
         * The query as entered by the user
         */
        private Query query;

        /**
         * An object to determine the snippet-generation status
         */
        private Future<String> pendingTask;

        /**
         * The time when snippet-generation should have been finished
         */
        private long deadline;

        /**
         * A flag specifying if the caller has already tried to get the snippet via a function
         * call to {@link IIndexerDocument#get(org.paxle.core.doc.Field)}.
         */
        private boolean fetched;

        public SnippetFetchingWrapper(IIndexerDocument idoc, Query query, long deadline) {
            this.idoc = idoc;
            this.query = query;
            this.deadline = deadline;

            // starting an async task for snippet fetching
            this.pendingTask = execService.submit(this);
        }

        /**
         * This method is used to intercept function calls to {@link IIndexerDocument#get(org.paxle.core.doc.Field)} if 
         * {@link IIndexerDocument#SNIPPET} is used as argument, and to take a look if the asnychronous snippet-fetching-task
         * has finished and returned a result.
         */
        public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
            if (method.getName().equals("get") && args != null && args.length > 0
                    && IIndexerDocument.SNIPPET.equals(args[0])) {
                this.fetched = true;
                if (this.idoc.get(IIndexerDocument.SNIPPET) == null) {
                    /* 
                     * if the task has not finished yet we'll wait some time to receive the result
                     */
                    if (!this.pendingTask.isDone()) {
                        try {
                            long timeToWait = this.deadline - System.currentTimeMillis();
                            if (timeToWait > 100) {
                                String result = this.pendingTask.get(timeToWait, TimeUnit.MILLISECONDS);
                                if (result != null)
                                    return result;
                            }
                        } catch (Exception e) {
                            // ignore this
                        }
                    }
                }
            }

            return method.invoke(this.idoc, args);
        }

        /**
         * This method is called asynchronous by an {@link ExecutorService} to generate
         * a snippet for a found {@link IIndexerDocument} 
         */
        public String call() throws Exception {
            // getting the URI of the document
            String locationStr = this.idoc.get(IIndexerDocument.LOCATION);

            // generating the snippet
            String snippet = getSnippet(query, locationStr);

            // if a snippet was generated successfully we store it into the
            // IIndexerDocument now
            if (snippet != null) {
                this.idoc.set(IIndexerDocument.SNIPPET, snippet);
                if (this.fetched) {
                    /* The caller already has tried to fetch the snippet.
                     * 
                     * TODO: we could insert the generated snippet into a cache here 
                     * so that it can be fetched asynchronous, e.g. by an ajax task
                     */
                }
            }
            return snippet;
        }

    }
}