Example usage for org.apache.lucene.search.highlight Highlighter getBestFragments

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragments.

Prototype

public final String getBestFragments(TokenStream tokenStream, String text, int maxNumFragments,
        String separator) throws IOException, InvalidTokenOffsetsException

Source Link

Document

Highlights terms in the text , extracting the most relevant sections and concatenating the chosen fragments with a separator (typically "...").

Usage

From source file:aos.lucene.tools.HighlightIt.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: HighlightIt <filename-out>");
        System.exit(-1);//from  w  w w  .j  ava2  s. c om
    }

    String filename = args[0];

    String searchText = "term"; //
    QueryParser parser = new QueryParser(Version.LUCENE_46, //
            "f", //
            new StandardAnalyzer(Version.LUCENE_46));// #1
    Query query = parser.parse(searchText); //

    SimpleHTMLFormatter formatter = //
            new SimpleHTMLFormatter("<span class=\"highlight\">", //
                    "</span>"); //

    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) //
            .tokenStream("f", new StringReader(text)); //

    QueryScorer scorer = new QueryScorer(query, "f"); //

    Highlighter highlighter = new Highlighter(formatter, scorer); //
    highlighter.setTextFragmenter( //
            new SimpleSpanFragmenter(scorer)); //

    String result = //
            highlighter.getBestFragments(tokens, text, 3, "..."); //

    FileWriter writer = new FileWriter(filename); //
    writer.write("<html>"); //
    writer.write("<style>\n" + //
            ".highlight {\n" + //
            " background: yellow;\n" + //
            "}\n" + //
            "</style>"); //
    writer.write("<body>"); //
    writer.write(result); //
    writer.write("</body></html>"); //
    writer.close(); //
}

From source file:blackbelt.lucene.testHighlight.MainHighlight.java

License:Open Source License

public static void main(String[] args) throws ParseException, IOException {

    String keyWord = "hibernate";
    String language = "en";
    String text = "Hibernate is an object-relational mapping (ORM) library for the Java language,"
            + "providing a framework for mapping an object-oriented domain model to a traditional relational"
            + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct "
            + "persistence-related database accesses with high-level object handling functions. "
            + "Hibernate is free software that is distributed under the GNU Lesser General Public License. "
            + "Hibernate's primary feature is mapping from Java classes to database tables "
            + "(and from Java data types to SQL data types). Hibernate also provides data query"
            + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve"
            + " the developer from manual result set handling and object conversion and keep the application"
            + " portable to all supported SQL databases with little performance overhead.";
    String result;//  w  ww  . j  a  v a 2 s  .  c  om

    QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30));
    Query query = parser.parse(keyWord);

    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text));

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85));

    try {
        result = highlighter.getBestFragments(tokens, text, 4, "<BR/>...");
        System.out.println(result);
        System.out.println("\n" + result.length());
    } catch (InvalidTokenOffsetsException e) {
        throw new RuntimeException(e);

    }
    result = "<html><body>" + result + "</body></html>";
    File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html");
    try {
        PrintWriter pw = new PrintWriter(file);
        pw.print(result);
        pw.close();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:ca.dracode.ais.indexer.FileSearcher.java

License:Open Source License

/**
 * Takes a list of Documents and highlights information relevant to a given Query
 * @param docs The documents to highlight
 * @param qry The query used to highlight the documents
 * @param type The type of the search, one of QUERY_BOOLEAN,
 *             which just notes the page on which the term exists or QUERY_STANDARD,
 *             which gives highlighted fragments and the page on which they exist.
 * @param term The term that created the query
 * @param maxResults The maximum number of results that will be returned
 * @return A SearchResult containing the results sorted by relevance and page
 *///ww w . j  a v  a2s. com
private SearchResult getHighlightedResults(List<Document> docs, Query qry, int type, String term,
        int maxResults) {
    try {
        int numResults = 0;
        LinkedHashMap<String, LinkedHashMap<Integer, List<String>>> results = new LinkedHashMap<String, LinkedHashMap<Integer, List<String>>>();
        for (int i = 0; i < docs.size() && numResults < maxResults; i++) {
            Document d = docs.get(i);
            int docPage = Integer.parseInt(d.get("page"));
            String name = d.get("path");
            LinkedHashMap<Integer, List<String>> docResult = results.get(name);
            if (docResult == null) {
                docResult = new LinkedHashMap<Integer, List<String>>();
                results.put(name, docResult);
            }
            if (type != FileSearcher.QUERY_BOOLEAN) {
                String contents = d.get("text");
                Highlighter highlighter = new Highlighter(new QueryScorer(qry));

                String[] frag = null;
                try {
                    frag = highlighter.getBestFragments(new SimpleAnalyzer(Version.LUCENE_47), "text", contents,
                            maxResults - numResults);
                    numResults += frag.length;
                } catch (IOException e) {
                    Log.e(TAG, "Error while reading index", e);
                } catch (InvalidTokenOffsetsException e) {
                    Log.e(TAG, "Error while highlighting", e);
                }
                if (frag != null) {
                    Log.i(TAG, "Frags: " + frag.length + " " + frag + " " + frag[0]);
                }
                ArrayList<String> tmpList = new ArrayList<String>(
                        Arrays.asList(frag != null ? frag : new String[0]));
                Log.i(TAG, "list " + tmpList.getClass().getName());
                docResult.put(docPage, tmpList);
            } else {
                ArrayList<String> tmp = new ArrayList<String>();
                tmp.add(term);
                docResult.put(docPage, tmp);
            }

        }
        Log.i(TAG, "" + results.size());
        return new SearchResult(results);
    } catch (Exception e) {
        Log.e("TAG", "Error while Highlighting", e);
        return null;
    }
}

From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java

License:Apache License

@Override
public SearchResult search(SearchRequest sResquest) throws EsearchException {
    try {/*from  w  w  w  .  j  a  va2  s  .co m*/
        esearchSystem.flushEvents(2000);
    } catch (EsearchException e) {
        LOGGER.error("Esearch flush events error. \n{}", e);
    }
    String queryString = sResquest.getQuery();
    String queryField = sResquest.getField();
    LOGGER.info("The search request coming: queryField:{},queryString:{}", queryField, queryString);

    Analyzer analyzer = esearchSystem.getAnalyzer();
    QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryField, analyzer);
    SearchResult result = new SearchResult();

    List<EsearchMultiReader<R>> readers = null;
    MultiReader multiReader = null;
    IndexSearcher searcher = null;
    try {
        Query query = null;
        if (Strings.isNullOrEmpty(queryString)) {
            query = new MatchAllDocsQuery();
        } else {
            query = queryParser.parse(queryString);
        }
        readers = esearchSystem.getIndexReaders();
        multiReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false);
        searcher = new IndexSearcher(multiReader);
        long start = System.currentTimeMillis();
        TopDocs docs = searcher.search(query, null, sResquest.getSize());
        long end = System.currentTimeMillis();

        result.setTime(end - start);
        result.setTotalDocs(multiReader.numDocs());
        result.setTotalHits(docs.totalHits);

        LOGGER.info("Got {} hits. Cost:{} ms", docs.totalHits, end - start);

        if (sResquest.getSearchType() == SearchRequest.SearchType.COUNT) {
            return result;
        }

        ScoreDoc[] scoreDocs = docs.scoreDocs;
        ArrayList<SearchHit> hitList = new ArrayList<SearchHit>(scoreDocs.length);
        for (ScoreDoc scoreDoc : scoreDocs) {
            SearchHit hit = new SearchHit();
            hit.setScore(scoreDoc.score);
            int docID = scoreDoc.doc;

            Document doc = multiReader.document(docID);
            String content = doc.get(queryField);

            Scorer qs = new QueryScorer(query);

            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hl\">", "</span>");
            Highlighter hl = new Highlighter(formatter, qs);
            String[] fragments = hl.getBestFragments(analyzer, queryField, content, 1);

            Map<String, String[]> fields = convert(doc, sResquest.getSearchType());
            fields.put("fragment", fragments);
            hit.setFields(fields);
            hitList.add(hit);
        }
        result.setHits(hitList.toArray(new SearchHit[hitList.size()]));
        return result;
    } catch (Exception e) {
        LOGGER.error(e.getMessage(), e);
        throw new EsearchException(e.getMessage(), e);
    } finally {
        if (multiReader != null) {
            try {
                multiReader.close();
            } catch (IOException e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
        esearchSystem.returnIndexReaders(readers);
    }
}

From source file:com.appeligo.alerts.KeywordAlertThread.java

License:Apache License

/**
 * @param searchExecutor callback to get the set of hits for the given query. This can be
 * executed in different ways./*from w w w .  j ava  2 s.c  o  m*/
 * @return true if we hit too many consecutive exceptions so we broke out of the loop
 */
private boolean executeKeywordSearch(SearchExecutor searchExecutor, String messagePrefix,
        boolean groupQueries) {
    ChunkedResults<KeywordAlert> results = KeywordAlert.getAllInNormalizedQueryOrder();
    Hits hits = null;
    String lastNormalizedQuery = null;
    Query lastLuceneQuery = null;
    int consecutiveExceptions = 0;
    results.beforeFirst();
    while (results.next() && isActive()) {
        KeywordAlert keywordAlert = results.get();
        try {
            if (keywordAlert.isDeleted() || keywordAlert.isDisabled()) {
                if (log.isDebugEnabled())
                    log.debug("keyword alert is deleted or disabled");
                continue;
            }
            User user = keywordAlert.getUser();
            if (user == null) {
                if (log.isDebugEnabled())
                    log.debug("keyword alert is implicitly deleted (user is null)");
                keywordAlert.setDeleted(true);
                keywordAlert.save();
                continue;
            }

            if (helper.maxAlertsExceeded(keywordAlert)) {
                continue;
            }

            if (groupQueries) {
                if ((hits == null) || (!keywordAlert.getNormalizedQuery().equals(lastNormalizedQuery))) {
                    hits = searchExecutor.search(null, keywordAlert.getNormalizedQuery());
                    lastLuceneQuery = searchExecutor.getLuceneQuery();
                } else if (log.isDebugEnabled())
                    log.debug("Not searching on " + keywordAlert.getNormalizedQuery() + " again");
            } else {
                hits = searchExecutor.search(keywordAlert.getUser().getLineupId(),
                        keywordAlert.getNormalizedQuery());
                // Note that I'm searching with the lineup from the user, which will
                // only ensure that the liveIndex doesn't return shows that don't ever
                // play for this lineup.  However, it does not guarantee that the show
                // on this user's lineup is playing at the same time (meaning alerts
                // might tell the user of a show that is only in the future).
                lastLuceneQuery = searchExecutor.getLuceneQuery();
            }
            lastNormalizedQuery = keywordAlert.getNormalizedQuery();
            Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(lastLuceneQuery));
            PorterStemAnalyzer analyzer = new PorterStemAnalyzer(LuceneIndexer.STOP_WORDS);

            for (int i = 0; i < hits.length(); i++) {
                Document doc = hits.doc(i);

                if (!isActive()) {
                    break;
                }

                //                 if (groupQueries && (!"true".equals(doc.get("lineup-"+keywordAlert.getUser().getLineupId())))) {
                if (groupQueries
                        && (doc.get("lineup-" + keywordAlert.getUser().getLineupId() + "-startTime") == null)) {
                    // This "if" statement checks to make sure the program is or did play on the user's
                    // lineup, which might be on a different station, a different time, past or future.
                    if (log.isDebugEnabled())
                        log.debug(doc.get("programTitle") + " matched on " + keywordAlert.getNormalizedQuery()
                                + " but it isn't airing on this user's lineup anytime soon.");
                    continue;
                }

                Transaction transaction = HibernateUtil.currentSession().beginTransaction();
                try {
                    if ((!helper.maxAlertsExceeded(keywordAlert)) && helper.isNewMatch(keywordAlert, doc)) {
                        if (log.isDebugEnabled())
                            log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for "
                                    + keywordAlert.getNormalizedQuery() + "... sending messages");
                        String text = doc.get("text");
                        String fragments = null;
                        if (text != null) {
                            TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(text));
                            fragments = highlighter.getBestFragments(tokenStream, text, 3, "...");
                        }

                        helper.incrementTodaysAlertCount(keywordAlert);
                        helper.sendMessages(keywordAlert, fragments, doc, messagePrefix);
                    } else if (log.isDebugEnabled())
                        log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for "
                                + keywordAlert.getNormalizedQuery()
                                + " but max exceeded or we already matched this one");
                } catch (Throwable t) {
                    log.error(
                            "Error processing keyword alerts when searching live lucene index. Rolling back transaction.",
                            t);
                    transaction.rollback();
                } finally {
                    if (!transaction.wasRolledBack()) {
                        transaction.commit();
                    }
                }
            }
            consecutiveExceptions = 0;
        } catch (Throwable t) {
            User user = keywordAlert.getUser();
            log.error("Caught throwable on keyword " + keywordAlert.getId() + ", " + keywordAlert.getUserQuery()
                    + ", user " + ((user == null) ? null : user.getUsername()), t);
            consecutiveExceptions++;
            if (consecutiveExceptions >= maxConsecutiveExceptions) {
                return true;
            }
        }
    }
    return false;
}

From source file:com.appeligo.search.actions.SearchResults.java

License:Apache License

private void addDocument(Document doc, float score, EPGProvider epgProvider, Highlighter highlighter,
        Analyzer analyzer, ScheduledProgram next, ScheduledProgram last, Program programInfo)
        throws IOException {
    String text = doc.get("text");
    String fragments = null;/* ww w . ja  v  a2  s. c  o m*/
    if (text != null) {
        TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(text));
        fragments = highlighter.getBestFragments(tokenStream, text, 3, "...");
    }
    SearchResult searchResult = new SearchResult(lineup, new DocumentWrapper(doc, score, fragments),
            programInfo, last, next);
    results.add(searchResult);
    programToSearchResult.put(doc.get("programID"), searchResult);
}

From source file:com.bewsia.script.safe.lucene.SEntity.java

License:Open Source License

public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments,
        String separator) throws Exception {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
    CachingTokenFilter tokenStream = new CachingTokenFilter(
            analyzer.tokenStream(field, new StringReader(text)));
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
    tokenStream.reset();//from www.ja v  a2s .  c o m
    String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
    return rv.length() == 0 ? text : rv;
}

From source file:com.bugull.mongo.lucene.BuguHighlighter.java

License:Apache License

public String getResult(String fieldName, String fieldValue) throws Exception {
    BuguIndex index = BuguIndex.getInstance();
    QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer());
    Query query = parser.parse(keywords);
    TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue));
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
    return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "...");
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

public String highlight(String strToHighlight, String fieldName, Query luceneQuery) {
    String highlightedText;/* w  w w  . ja v a2s.  c  o  m*/
    Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
    try {
        Directory directory = FSDirectory.open(indexDir);
        IndexReader indexReader = DirectoryReader.open(directory);
        Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader);
        QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);
        SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>");
        Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);

        Fragmenter luceneFragmenter;
        // Si la chaine  highlighter est sup  250 carac
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            // Cration de best fragments de 100 carac chaque
            luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT);
        } else {
            // Toute la chaine est highlight
            luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);
        }
        luceneHighlighter.setTextFragmenter(luceneFragmenter);

        TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight));
        String fragment = null;
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT,
                    FRAGMENT_SEP);
        } else {
            fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight);
        }

        if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) {
            fragment = strToHighlight;
        }
        indexReader.close();
        directory.close();

        highlightedText = fragment;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InvalidTokenOffsetsException e) {
        throw new RuntimeException(e);
    }
    return highlightedText;
}

From source file:com.edgenius.wiki.search.service.AbstractSearchService.java

License:Open Source License

/**
 * Match all given name-value pairs, return combined fragment. For example, spaceUname and space desc have matched
 * fragment, then these 2 pieces are merge into one String fragment and return.
 * @param namedValues// w  w w  .j a  v a2  s . co m
 * @return
 * @throws IOException
 */
private String createFragment(Highlighter hl, String content) throws IOException {
    if (content == null)
        return "";

    if (hl == null)
        return content;

    TokenStream tokenStream = searcherFactory.getAnalyzer().tokenStream(FieldName.CONTENT,
            new StringReader(content));
    String frag;
    try {
        frag = hl.getBestFragments(tokenStream, content, 3, "...");
    } catch (InvalidTokenOffsetsException e) {
        log.error("Highlight fragment error", e);
        frag = StringUtils.abbreviate(content, FRAGMENT_LEN);
    }

    return frag;
}