net.sf.zekr.engine.search.tanzil.ZeroHighlighter.java Source code

Java tutorial

Introduction

Here is the source code for net.sf.zekr.engine.search.tanzil.ZeroHighlighter.java

Source

/*
 *               In the name of Allah
 * This file is part of The Zekr Project. Use is subject to
 * license terms.
 *
 * Author:         Hamid Zarrabi-Zadeh, Mohsen Saboorian
 * Start Date:     Mar 18, 2008
 */
package net.sf.zekr.engine.search.tanzil;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import net.sf.zekr.common.config.ApplicationConfig;
import net.sf.zekr.common.resource.IQuranLocation;
import net.sf.zekr.common.resource.IQuranText;
import net.sf.zekr.common.resource.QuranPropertiesUtils;
import net.sf.zekr.common.util.CollectionUtils;
import net.sf.zekr.engine.search.AbstractSearcher;
import net.sf.zekr.engine.search.ISearchScorer;
import net.sf.zekr.engine.search.SearchException;
import net.sf.zekr.engine.search.SearchResultItem;
import net.sf.zekr.engine.search.SearchResultModel;
import net.sf.zekr.engine.search.SearchScope;
import net.sf.zekr.engine.search.ZeroScorer;
import net.sf.zekr.engine.search.tanzil.PatternEnricherFactory.QuranPatternEnricher;
import net.sf.zekr.engine.translation.TranslationData;

import org.apache.commons.lang.StringUtils;

class ZeroHighlighter implements ISearchResultHighlighter {
    public String highlight(String text, String pattern) {
        return text;
    }
}

/**
 * An advanced search, mainly inspired and converted from <a href="http://tanzil.info">tanzil.info</a>
 * JavaScript and PHP code.
 * 
 * @author Hamid Zarrabi-Zadeh
 * @author Mohsen Saboorian
 */
public class AdvancedQuranTextSearch extends AbstractSearcher {
    private ISearchResultHighlighter highlighter;
    private IQuranText quranText;
    private ISearchScorer searchScorer;
    private List<IQuranLocation> locations;

    public void setSearchScope(SearchScope searchScope) {
        this.searchScope = searchScope;
        locations = CollectionUtils.toArrayList(QuranPropertiesUtils.getLocations());
        if (searchScope != null) {
            logger.debug("Initializing searchable locations.");
            for (int i = locations.size() - 1; i >= 0; i--) {
                if (!searchScope.includes(locations.get(i))) {
                    locations.remove(i);
                }
            }
            logger.debug("Searching through '" + locations.size() + "' ayas.");
        }
    }

    public void setSearchScorer(ISearchScorer searchScorer) {
        this.searchScorer = searchScorer;
    }

    public AdvancedQuranTextSearch(IQuranText quranText, ISearchResultHighlighter highlighter,
            ISearchScorer searchScorer) {
        if (highlighter == null) {
            this.highlighter = new ZeroHighlighter();
        } else {
            this.highlighter = highlighter;
        }
        if (searchScorer == null) {
            this.searchScorer = new ZeroScorer();
        } else {
            this.searchScorer = searchScorer;
        }
        this.quranText = quranText;
        setSearchScope(null);
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    protected SearchResultModel doSearch(String rawQuery) throws SearchException {
        try {
            logger.debug("Searching for query: " + rawQuery);
            rawQuery = rawQuery.replaceAll("\\-", "!");

            boolean thisIsQuran = !quranText.isTranslation();
            PatternEnricher enricher = PatternEnricherFactory.getEnricher(quranText.getLanguage(), thisIsQuran);
            if (thisIsQuran) { // it's translation
                enricher.setParameter(QuranPatternEnricher.IGNORE_HARAKA, Boolean.FALSE);
            }
            String pattern = enricher.enrich(rawQuery);

            String highlightPattern = pattern.replaceAll("[+!]", "|");
            highlightPattern = highlightPattern.replaceAll("^[|]+", "").replaceAll("!", ""); // remove leading '|'s
            logger.debug("Rewritten query: " + pattern);

            pattern = StringUtils.replace(pattern, "!", "+!");
            if (pattern.startsWith("+")) {
                pattern = pattern.substring(1);
            }
            String[] patterns = pattern.split("\\+");
            Set<String> clauses = new LinkedHashSet<String>();
            List intermediateResult = locations;
            for (int i = 0; i < patterns.length; i++) {
                // TODO: for queries with patterns.length > 1, first search for larger (more filtering) patterns[i]
                String p = patterns[i];
                boolean exclude;
                if (exclude = p.charAt(0) == '!') {
                    p = p.substring(1);
                }
                intermediateResult = filterBucket(intermediateResult, p, exclude, i == 0, enricher);
            }

            // extract matched parts and clauses
            int total = 0;
            List resultItems = intermediateResult;
            for (int i = 0; i < patterns.length; i++) {
                if (patterns[i].charAt(0) == '!') {
                    continue;
                }
                Pattern regex = Pattern.compile(patterns[i], Pattern.CASE_INSENSITIVE);
                for (int j = 0; j < resultItems.size(); j++) {
                    SearchResultItem sri = (SearchResultItem) resultItems.get(j);
                    Matcher matcher = regex.matcher(sri.text);
                    while (matcher.find()) {
                        total++;
                        sri.matchedParts.add(new String(matcher.group()));
                        clauses.add(getClause(sri.text, matcher));
                    }
                }
            }

            // score and highlight results
            logger.debug("Score and highlight search results.");
            scoreSearchResult(resultItems, highlightPattern, patterns.length);

            return new SearchResultModel(quranText, resultItems, StringUtils.join(clauses, " "), rawQuery, total,
                    searchResultComparator, ascending);
        } catch (SearchException se) {
            throw se;
        } catch (Exception e) {
            throw new SearchException(e);
        }
    }

    /**
     * Score and highlight the search result.
     * 
     * @param resultItems
     * @param highlightPattern
     * @param patternCount
     */
    @SuppressWarnings("rawtypes")
    private void scoreSearchResult(List resultItems, String highlightPattern, int patternCount) {
        for (int i = 0; i < resultItems.size(); i++) {
            SearchResultItem sri = (SearchResultItem) resultItems.get(i);
            sri.score = searchScorer.score(sri);
            sri.score /= patternCount;
            sri.text = highlighter.highlight(sri.text, highlightPattern);
        }
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    private List filterBucket(List intermediateResult, String pattern, boolean exclude, boolean firstTime,
            PatternEnricher enricher) throws SearchException {
        try {
            List res = new ArrayList();
            Pattern regex = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
            for (int i = 0; i < intermediateResult.size(); i++) {
                Matcher matcher;
                String line;
                IQuranLocation loc;
                if (firstTime) {
                    loc = (IQuranLocation) intermediateResult.get(i);
                    line = ' ' + quranText.get(loc) + ' ';
                } else {
                    SearchResultItem sri = (SearchResultItem) intermediateResult.get(i);
                    loc = sri.location;
                    line = sri.text;
                }
                // matcher = regex.matcher(enricher.enrich(line));
                matcher = regex.matcher(line);
                if (matcher.find() ^ exclude) {
                    if (firstTime) {
                        res.add(new SearchResultItem(line, loc));
                    } else {
                        res.add(intermediateResult.get(i));
                    }
                }
            }
            return res;
        } catch (PatternSyntaxException pse) {
            logger.implicitLog(pse);
            throw new SearchException(pse.getMessage());
        }
    }

    private String getClause(String text, Matcher matcher) {
        int a = text.substring(0, matcher.start() + 1).lastIndexOf(' ');
        int b = text.indexOf(' ', matcher.end() - 1);
        return new String(text.substring(a + 1, b));
    }

    public static void main(String[] args) throws IOException {
        String s = "";
        s = "";
        s = "-";
        s = " ";
        s = "\" \"  \"\" ";
        s = "- ";
        s = "\" \"";
        s = " ";
        s = " ";
        s = "\"\"";
        s = "???";
        s = ".";
        s = "__";
        System.out.println("Initialize AdvancedQuranTextSearch" + new Date());
        // QuranText text = QuranText.getSimpleTextInstance();
        TranslationData quranText = ApplicationConfig.getInstance().getTranslation().get("makarem");
        PatternEnricher enricher = PatternEnricherFactory.getEnricher(quranText.getLanguage(),
                !quranText.isTranslation());
        System.out.println("Enriched query: " + enricher.enrich(s));
        AdvancedQuranTextSearch ats = new AdvancedQuranTextSearch(quranText, new SimpleSearchResultHighlighter(),
                new DefaultSearchScorer());
        Date d1 = new Date();
        System.out.println("Before search: " + d1);
        SearchResultModel res = ats.doSearch(s);
        Date d2 = new Date();
        System.out.println("Matches: " + res.getClause());
        System.out
                .println("After search: " + d2 + ". Took: " + (d2.getTime() - d1.getTime()) / 1000.0 + "seconds.");
        System.out.println(res.getTotalMatch() + " - " + res.getResults().size() + ":");
        for (SearchResultItem sri : res.getResults()) {
            System.out.println(sri);
        }
    }
}