Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Formatter formatter, Scorer fragmentScorer) 

Source Link

Usage

From source file:com.taobao.common.tedis.support.lucene.analysis.xanalyzer.TestHighLight.java

License:Open Source License

/**
 * @param args//from  w  ww  .  j  ava2  s .co m
 */
public static void main(String[] args) {

    Directory ramDir = new RAMDirectory();
    try {
        IndexWriter writer = new IndexWriter(ramDir, /*
                                                      * new
                                                      * StandardAnalyzer()/
                                                      */XFactory.getWriterAnalyzer());
        Document doc = new Document();
        Field fd = new Field(FIELD_NAME, CONTENT, Field.Store.YES, Field.Index.TOKENIZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS);
        doc.add(fd);
        writer.addDocument(doc);
        writer.optimize();
        writer.close();

        IndexReader reader = IndexReader.open(ramDir);
        String queryString = QUERY;
        QueryParser parser = new QueryParser(FIELD_NAME, /*
                                                          * new
                                                          * StandardAnalyzer
                                                          * ()/
                                                          */XFactory.getWriterAnalyzer());
        Query query = parser.parse(queryString);
        System.out.println(query);
        Searcher searcher = new IndexSearcher(ramDir);
        query = query.rewrite(reader);
        System.out.println(query);
        System.out.println("Searching for: " + query.toString(FIELD_NAME));
        Hits hits = searcher.search(query);

        BoldFormatter formatter = new BoldFormatter();
        Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(50));
        for (int i = 0; i < hits.length(); i++) {
            String text = hits.doc(i).get(FIELD_NAME);
            int maxNumFragmentsRequired = 5;
            String fragmentSeparator = "...";
            TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(hits.id(i), FIELD_NAME);
            TokenStream tokenStream = TokenSources.getTokenStream(tpv);
            /*
             * TokenStream tokenStream2= (new StandardAnalyzer())
             * //XFactory.getWriterAnalyzer() .tokenStream(FIELD_NAME,new
             * StringReader(text));
             *
             * do { Token t = tokenStream2.next(); if(t==null)break;
             * System.out.println("\t" + t.startOffset() + "," +
             * t.endOffset() + "\t" + t.termText()); }while(true);
             */
            String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
                    fragmentSeparator);
            System.out.println("\n" + result);
        }
        reader.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.thinkgem.jeesite.common.persistence.BaseDaoImpl.java

License:Open Source License

/**
 * //from  w ww.  ja  v a 2 s.  c  o  m
 * 
 * @param query
 *            
 * @param list
 *            
 * @param fields
 *            ??
 */
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(100));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String desciption = highlighter.getBestFragment(analyzer, field, text);
                if (desciption != null) {
                    Reflections.invokeSetter(entity, fields[0], desciption);
                    break;
                }
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbreviate(text, 100));
            }
            // Reflections.invokeSetter(entity, fields[1],
            // "sdfkjsdlkfjklsdjf");
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
    return list;
}

From source file:de.blizzy.documentr.search.GetSearchHitTask.java

License:Open Source License

@Override
public SearchHit call() throws IOException {
    Formatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); //$NON-NLS-1$ //$NON-NLS-2$
    Scorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(FRAGMENT_SIZE));
    highlighter.setEncoder(new SimpleHTMLEncoder());

    Document doc = reader.document(docId);
    String projectName = doc.get(PageIndex.PROJECT);
    String branchName = doc.get(PageIndex.BRANCH);
    String path = doc.get(PageIndex.PATH);
    String title = doc.get(PageIndex.TITLE);
    String text = doc.get(PageIndex.TEXT);
    String[] tagsArray = doc.getValues(PageIndex.TAG);
    List<String> tags = Lists.newArrayList(tagsArray);
    Collections.sort(tags);/*  ww w.  j  av  a  2s  .co  m*/
    TokenStream tokenStream = null;
    String highlightedText = StringUtils.EMPTY;
    try {
        tokenStream = TokenSources.getAnyTokenStream(reader, docId, PageIndex.TEXT, doc, analyzer);
        String[] fragments = highlighter.getBestFragments(tokenStream, text, NUM_FRAGMENTS);
        cleanupFragments(fragments);
        highlightedText = Util.join(fragments, " <strong>...</strong> "); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        // ignore
    } finally {
        Closeables.closeQuietly(tokenStream);
    }
    return new SearchHit(projectName, branchName, path, title, highlightedText, tags);
}

From source file:de.elbe5.cms.search.ContentSearchData.java

License:Open Source License

public void setContexts(Query query, Analyzer analyzer) {
    Highlighter highlighter = new Highlighter(new SearchContextFormatter(), new QueryScorer(query));
    String context = getContext(highlighter, analyzer, "name", CONTEXT_LENGTH_NAME);
    setNameContext(context == null || context.length() == 0 ? getName() : context);
    context = getContext(highlighter, analyzer, "description", CONTEXT_LENGTH_DESCRIPTION);
    setDescriptionContext(context);/*  w ww .j  a  va  2 s  .c  om*/
    context = getContext(highlighter, analyzer, "keywords", CONTEXT_LENGTH_KEYWORDS);
    setKeywordsContext(context);
    context = getContext(highlighter, analyzer, "authorName", CONTEXT_LENGTH_AUTHOR);
    setAuthorContext(context);
    context = getContext(highlighter, analyzer, "content", CONTEXT_LENGTH_CONTENT);
    setContentContext(context);
}

From source file:de.elbe5.cms.search.UserSearchData.java

License:Open Source License

public void setContexts(Query query, Analyzer analyzer) {
    Highlighter highlighter = new Highlighter(new SearchContextFormatter(), new QueryScorer(query));
    String context = getContext(highlighter, analyzer, "name", CONTEXT_LENGTH_NAME);
    setNameContext(context == null || context.length() == 0 ? getName() : context);
}

From source file:de.fhg.iais.cortex.search.highlight.SolrTermRememberingHighlighter.java

License:Apache License

private Highlighter createNewHighlighter(String fieldName, SolrQueryRequest request,
        Highlighter originalHighlighter) {
    Formatter localDecoratedFormatter = createDecoratedFormatter(fieldName, request);
    Highlighter newHighlighter = new Highlighter(localDecoratedFormatter,
            originalHighlighter.getFragmentScorer());
    newHighlighter.setTextFragmenter(originalHighlighter.getTextFragmenter());
    return newHighlighter;
}

From source file:de.ilias.services.lucene.search.highlight.HitHighlighter.java

License:Open Source License

/**
 * @throws ConfigurationException //from   w  w w.  j  a va2 s  .c o  m
 * @throws IOException 
 * @throws SQLException 
 * 
 */
private void init() throws ConfigurationException, IOException, SQLException {

    // init lucene settings
    luceneSettings = LuceneSettings.getInstance();

    // init highlighter
    QueryScorer queryScorer = new QueryScorer(query);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(HIGHLIGHT_BEGIN_TAG, HIGHLIGHT_END_TAG);

    // Default highlighter
    highlighter = new Highlighter(formatter, queryScorer);
    Fragmenter fragmenter = new SimpleFragmenter(luceneSettings.getFragmentSize());
    highlighter.setTextFragmenter(fragmenter);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

    // Title description highlighter -> bigger FRAGMENT SIZE
    titleHighlighter = new Highlighter(formatter, queryScorer);
    Fragmenter titleFragmenter = new SimpleFragmenter(FRAGMENT_TITLE_SIZE);
    titleHighlighter.setTextFragmenter(titleFragmenter);

    // init fieldinfo
    fieldInfo = FieldInfo.getInstance(LocalSettings.getClientKey());

    // init searcher
    searcher = SearchHolder.getInstance().getSearcher();

    return;
}

From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java

License:Open Source License

public Highlighter createHighlighter(String itemOrMeta, Query query, Formatter formatter) {
    if (itemOrMeta == null) {
        return null;
    }/*w w  w  .jav a2  s.c  o m*/
    if (query == null) {
        return null;
    }

    // create scorer
    // use only terms for this item and terms for allcontent for scoring
    List terms = new ArrayList();
    WeightedTerm[] termsForThisItem = QueryTermExtractor.getTerms(query, false, itemOrMeta);
    if (termsForThisItem != null) {
        for (int i = 0; i < termsForThisItem.length; i++) {
            terms.add(termsForThisItem[i]);
        }
    }
    WeightedTerm[] termsForAllContent = QueryTermExtractor.getTerms(query, false,
            LuceneManager.INDEXFIELD_ALLCONTENT);
    if (termsForAllContent != null) {
        for (int i = 0; i < termsForAllContent.length; i++) {
            terms.add(termsForAllContent[i]);
        }
    }
    WeightedTerm[] termsArray = new WeightedTerm[terms.size()];
    for (int i = 0; i < terms.size(); i++) {
        WeightedTerm term = (WeightedTerm) terms.get(i);
        termsArray[i] = term;
    }
    QueryTermScorer scorer = new QueryTermScorer(termsArray);

    // create highlighter
    Highlighter highlighter = new Highlighter(formatter, scorer);

    return highlighter;
}

From source file:de.iteratec.iteraplan.businesslogic.service.SearchServiceImpl.java

License:Open Source License

/**
 * Execute the hibernate search//from   w w w.  j av  a2s .co  m
 * 
 * @param searchMap
 *          Multimap of SearhRowDTOs contains the results
 * @param queryString
 *          the query string which the user entered
 */
private void executeSearchQuery(Multimap<String, SearchRowDTO> searchMap, String queryString,
        String buildingBlockTypeFilter) {
    // reader provider is required to close the reader after search has finished
    ReaderProvider readerProvider = searchDAO.getReaderProvider();
    IndexReader reader = searchDAO.openReader(readerProvider, getClassArray());

    // if the reader is null (i.e. the user has no functional permissions to search any one of the
    // building blocks) return without executing a search
    if (reader == null) {
        return;
    }

    try {
        // index fields that will be searched
        String[] productFields = { "attributeValueAssignments.attributeValue.valueString", "name", "version",
                "description", "informationSystem.name", "technicalComponent.name", "runtimePeriod.start",
                "runtimePeriod.end", "informationSystemReleaseA.informationSystem.name",
                "informationSystemReleaseA.version", "informationSystemReleaseB.informationSystem.name",
                "informationSystemReleaseB.version" };

        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_31, productFields,
                new StandardAnalyzer(Version.LUCENE_31));

        // allow wildcard * at the beginning of a query string
        parser.setAllowLeadingWildcard(true);
        // automatically put quotes around the search term for phrase search, because that's what most people expect
        parser.setAutoGeneratePhraseQueries(true);
        // workaround for known issue with highlighter and wildcard queries
        parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);

        Query luceneQuery = null;
        try {
            // parse the query string
            luceneQuery = parser.parse(queryString);

            // rewrite luceneQuery for highlighting
            luceneQuery = luceneQuery.rewrite(reader);

        } catch (TooManyClauses tmcEx) {
            throw new IteraplanBusinessException(IteraplanErrorMessages.LUCENE_QUERY_TOO_COMPLEX, tmcEx);
        } catch (ParseException e) {
            throw new IteraplanBusinessException(IteraplanErrorMessages.LUCENE_QUERY_PARSE_FAILED, e);
        } catch (IOException e) {
            throw new IteraplanTechnicalException(IteraplanErrorMessages.LUCENE_QUERY_REWRITE_FAILED, e);
        }

        // the found content is being highlighted
        SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter("<span class=\"highlighted\">", "</span>");
        Highlighter highlighter = new Highlighter(sHtmlF, new QueryScorer(luceneQuery));
        highlighter.setTextFragmenter(new SimpleFragmenter(40));

        // sorted by Building block types
        SortField sortField = new SortField("buildingBlockType.name", SortField.STRING);
        Sort sort = new Sort(sortField);

        String[] projections = { "id", "buildingBlockType.name", FullTextQuery.DOCUMENT, "name",
                "informationSystem.name", "technicalComponent.name", "version", "description",
                "runtimePeriod.start", "runtimePeriod.end", "informationSystemReleaseA.informationSystem.name",
                "informationSystemReleaseA.version", "informationSystemReleaseB.informationSystem.name",
                "informationSystemReleaseB.version" };

        List<Object[]> results = searchDAO.search(luceneQuery, sort, getClassArray(), projections);

        addResultsToSearchMap(searchMap, results, buildingBlockTypeFilter, highlighter);
    } finally {
        readerProvider.closeReader(reader);
    }

}

From source file:de.joergjahnke.jdesktopsearch.SearchPanel.java

License:Open Source License

/**
 * Get HTML representation of search results
 *
 * @param   documents   found result documents
 * @param   query   query which was executed and which contains the search terms
 * @param   maxResults  maximum number of results to display, 0 for unlimited search
 * @return  StringBuffer containing HTML to display
 *///w  w w.ja  va  2  s .  com
private StringBuffer getSearchResultHTML(final Collection<Document> documents, final Query query,
        final int maxResults) {
    // create result string with HTML content
    final StringBuffer result = new StringBuffer("<html><body>");

    result.append("<i>Searching for &quot;" + query.toString() + "&quot;</i><br><br>");

    // get query strings to mark in result
    final Set<String> searchTerms = QueryUtils.getSearchTerms(query, "contents");

    // add hits to result string
    int found = 0;

    // prepare highlighter
    final Formatter formatter = new SimpleHTMLFormatter(MARK_START, MARK_END);
    final Highlighter highlighter;

    if (IS_USE_HIGHLIGHTER) {
        //QueryScorer q = new QueryScorer(query);
        highlighter = new Highlighter(formatter, new QueryScorer(query));
    }

    for (Document doc : documents) {
        final String path = doc.get("path");

        // add title if one exists
        String[] values = doc.getValues("title");

        if (null != values) {
            result.append("<b>");

            for (int j = 0; j < values.length; ++j) {
                result.append(values[j]);
                if (j != values.length - 1) {
                    result.append(' ');
                }
            }

            result.append("</b><br>");
        }

        // add part of the document
        values = doc.getValues("contents");
        if (null != values) {
            final StringBuffer contents = new StringBuffer();
            for (int j = 0; j < values.length && contents.length() < MAX_LENGTH; ++j) {
                final String value = values[j];
                String newContents = null;

                if (IS_USE_HIGHLIGHTER) {
                    final TokenStream tokenStream = new StandardAnalyzer().tokenStream("contents",
                            new StringReader(value));

                    try {
                        // get 3 best fragments and seperate with a "..."
                        newContents = highlighter.getBestFragments(tokenStream, value, 3, "<br>...<br>");
                    } catch (IOException e) {
                        System.err.println("why here " + e.getMessage());
                    }
                } else {
                    newContents = highlightSearchTerms(value, searchTerms,
                            contents.toString().replaceAll(MARK_START, "").replaceAll(MARK_END, "").length());
                }

                if (null != newContents && newContents.length() > 0) {
                    contents.append(newContents);
                    contents.append(' ');
                }
            }

            if (contents.length() > 0) {
                result.append(contents.toString());
                result.append("<br>");
            }
        } else {
            System.err.println("why no contents");
        }

        // add url to document
        result.append("<a href='" + new File(path).toURI() + "'>" + path + "</a><br><br>");
        ++found;
    }

    result.append("<br>" + found + " documents found.");

    // show link to get more results if the maximum result number was reached
    if (maxResults > 0 && documents.size() >= maxResults) {
        result.append("<br><br>The maximum of " + maxResults
                + " documents to display was reached. More results might be available. Click <a href='special://searchAgain'>here</a> to search again without a result number limitation. Please note that such a search might take a considerable amount of time depending on the number of documents found.");
    }

    result.append("</body></html>");

    return result;
}