Example usage for org.apache.lucene.search.highlight Highlighter getBestFragment

List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragment

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragment.

Prototype

public final String getBestFragment(TokenStream tokenStream, String text)
        throws IOException, InvalidTokenOffsetsException 

Source Link

Document

Highlights chosen terms in a text, extracting the most relevant section.

Usage

From source file:net.skyatlas.icd.test.AnsegTest.java

private String toHighlighter(Analyzer analyzer, Query query, Document doc) throws InvalidTokenOffsetsException {
    String field = "text";
    try {//w w  w .j a v  a2s .com
        SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
        Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
        TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
        String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
        return highlighterStr == null ? doc.get(field) : highlighterStr;
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (InvalidTokenOffsetsException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

From source file:org.apache.nutch.searcher.Summarizer.java

License:Apache License

public static String getsummary(String queryString, String content, Analyzer analyzer) {
    if (queryString == null && content != null) {
        if (content.length() > SUM_LENGTH)
            return content.substring(0, (SUM_LENGTH) - 1);
        else//  w w w .j  a va2 s . co m
            return content;
    } else if (queryString != null && content == null)
        return "";
    else if (queryString == null && content == null)
        return "";
    SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter(cssfront, cssend);

    org.apache.lucene.search.Query summarizerQuery = null;
    QueryParser queryParse = new QueryParser("content", analyzer);
    try {
        summarizerQuery = queryParse.parse(queryString);
    } catch (ParseException ex) {
        if (content.length() > SUM_LENGTH)
            return content.substring(0, (SUM_LENGTH) - 1);
        else
            return content;
    }
    QueryScorer qs = new QueryScorer(summarizerQuery);
    Highlighter highlighter = new Highlighter(sHtmlF, qs);
    highlighter.setTextFragmenter(new SimpleFragmenter(SUM_LENGTH));
    TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
    String str;
    try {
        str = highlighter.getBestFragment(tokenStream, content);
    } catch (IOException e) {
        str = null;
    }
    if (str == null) {
        if (content.length() > SUM_LENGTH)
            str = content.substring(0, (SUM_LENGTH) - 1);
        else
            str = content;
    }
    return str;
}

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

public String fragment(Resource resource, String propertyName, String text) throws SearchEngineException {

    Highlighter highlighter = createHighlighter(propertyName);
    TokenStream tokenStream = createTokenStream(resource, propertyName, text);

    try {//w w  w  . j a  va2s  . c  o  m
        return highlighter.getBestFragment(tokenStream, text);
    } catch (IOException e) {
        throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias()
                + "] and property [" + propertyName + "]");
    }
}

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

public String[] multiValueFragment(Resource resource, String propertyName, String[] texts)
        throws SearchEngineException {
    List fragmentList = new ArrayList();
    Highlighter highlighter = createHighlighter(propertyName);
    for (int i = 0; i < texts.length; i++) {
        String text = texts[i];/*from  w w  w  .j  a v a  2 s. c  om*/
        if (text != null && text.length() > 0) {
            //TokenStream tokenStream = createTokenStream(resource, propertyName, text);
            // We have to re-analyze one field value at a time
            TokenStream tokenStream = createTokenStreamFromAnalyzer(propertyName, text);
            try {
                String fragment = highlighter.getBestFragment(tokenStream, text);
                if (fragment != null && fragment.length() > 0) {
                    fragmentList.add(fragment);
                }
            } catch (IOException e) {
                throw new SearchEngineException("Failed to highlight fragments for alias ["
                        + resource.getAlias() + "] and property [" + propertyName + "]");
            }
        }
    }
    return (String[]) fragmentList.toArray(new String[fragmentList.size()]);
}

From source file:org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighterTests.java

License:Apache License

public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException {
    Map analysers = new HashMap<String, Analyzer>();
    analysers.put("text", new StandardAnalyzer());
    FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers);
    Query termQuery = new TermQuery(new Term("text", "failure"));
    Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD))
            .add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build();
    org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(
            new CustomQueryScorer(boolQuery));
    String fragment = highlighter.getBestFragment(
            fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"),
            "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
    // TODO: This test will fail if we pass in an instance of GeoPointInBBoxQueryImpl too. Should we also find a way to work around that
    // or can the query not be rewritten before it is passed into the highlighter?
}

From source file:org.elasticsearch.search.highlight.PlainHighlighterTests.java

License:Apache License

public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException {
    Map analysers = new HashMap<String, Analyzer>();
    analysers.put("text", new StandardAnalyzer());
    FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers);
    Query termQuery = new TermQuery(new Term("text", "failure"));
    Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD))
            .add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build();
    org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(
            new CustomQueryScorer(boolQuery));
    String fragment = highlighter.getBestFragment(
            fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"),
            "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
    Query rewritten = boolQuery.rewrite(null);
    highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(rewritten));
    fragment = highlighter.getBestFragment(
            fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"),
            "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
}

From source file:org.intermine.api.search.SearchResults.java

License:GNU General Public License

/**
 * Actually filter the web searchable items we have to get a reduced list of matches.
 * @param origQueryString A query to filter the items against. Assumes the query
 *                        string is neither null not empty.
 * @param target Information about the scope and type of items to receive.
 * @param profileRepo The repository of the user who wants to find something.
 * @return A set of search results./*w ww .ja  va2s  . c om*/
 * @throws ParseException If the query string cannot be parsed.
 * @throws IOException If there is an issue opening the indices.
 */
private static SearchResults doFilteredSearch(String origQueryString, SearchTarget target,
        SearchRepository profileRepo) throws ParseException, IOException {

    Map<WebSearchable, String> highlightedDescMap = new HashMap<WebSearchable, String>();

    String queryString = prepareQueryString(origQueryString);

    LOG.info("Searching " + target + " for " + " was:" + origQueryString + " now:" + queryString);
    long time = System.currentTimeMillis();

    org.apache.lucene.search.Query query;

    Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);

    // The default search field is the content buffer.
    QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);
    query = queryParser.parse(queryString);

    // Get directories.
    String type = target.getType();
    Map<String, WebSearchable> globalWebSearchables = new HashMap<String, WebSearchable>();
    Set<SearchRepository> globals = SearchRepository.getGlobalSearchRepositories();
    List<Directory> globalDirs = new ArrayList<Directory>();
    for (SearchRepository sr : globals) {
        globalWebSearchables.putAll(sr.getWebSearchableMap(type));
        globalDirs.add(sr.getSearchIndex(type));
    }
    Map<String, WebSearchable> userWebSearchables = profileRepo.getWebSearchableMap(type);
    Directory userDirectory = profileRepo.getSearchIndex(type);

    MultiSearcher searcher = prepareSearcher(target, userDirectory, globalDirs);

    // required to expand search terms
    query = searcher.rewrite(query);
    TopDocs topDocs = searcher.search(query, 1000); //FIXME: hardcoded limit

    time = System.currentTimeMillis() - time;
    LOG.info("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "' in " + time
            + " milliseconds:");

    QueryScorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(FORMATTER, scorer);

    Map<WebSearchable, Float> hitMap = new HashMap<WebSearchable, Float>();
    Map<WebSearchable, Set<String>> tags = new HashMap<WebSearchable, Set<String>>();

    for (int i = 0; i < topDocs.totalHits; i++) {
        WebSearchable webSearchable = null;
        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
        //String docScope = doc.get("scope");
        String name = doc.get("name");

        webSearchable = userWebSearchables.get(name);
        if (webSearchable == null) {
            webSearchable = globalWebSearchables.get(name);
        }
        if (webSearchable == null) {
            throw new RuntimeException("unknown WebSearchable: " + name);
        }

        Float luceneScore = new Float(topDocs.scoreDocs[i].score);
        hitMap.put(webSearchable, luceneScore);

        tags.put(webSearchable, new HashSet<String>(asList(split(doc.get("tags")))));

        try {
            if (highlightedDescMap != null) {
                String highlightString = webSearchable.getDescription();
                if (highlightString == null) {
                    highlightString = "";
                }
                TokenStream tokenStream = analyzer.tokenStream("", new StringReader(highlightString));
                highlighter.setTextFragmenter(new NullFragmenter());
                highlightedDescMap.put(webSearchable,
                        highlighter.getBestFragment(tokenStream, highlightString));
            }
        } catch (InvalidTokenOffsetsException e) {
            LOG.warn("Highlighter exception", e);
        }
    }

    Map<String, WebSearchable> wsMap = new HashMap<String, WebSearchable>();
    for (WebSearchable ws : hitMap.keySet()) {
        wsMap.put(ws.getName(), ws);
    }

    return new SearchResults(hitMap, wsMap, highlightedDescMap, tags);
}

From source file:org.zilverline.service.SearchServiceImpl.java

License:Open Source License

/**
 * Helper that creates a Result from a Document.
 * /*  w  w  w .  j a  v a 2 s.  c o m*/
 * @param doc the Document
 * @param score the score of the Document in the hit
 * @param hl the Highlighter used
 * @param an the Analyzer used
 * 
 * @return Result the resulting object that is used in the model.
 */
private Result doc2ResultHelper(final Document doc, final float score, final Highlighter hl,
        final Analyzer an) {
    String docTitle = doc.get("title");
    String docName = doc.get("name");
    String docPath = doc.get("path");
    String zipName = doc.get("zipName");
    if (log.isDebugEnabled()) {
        log.debug("Preparing result " + docName + ":" + zipName);
    }
    String zipPath = doc.get("zipPath");
    String docURL = "";
    String docCache = "";
    String docCollection = doc.get("collection");

    // get the collection
    DocumentCollection thisCollection = collectionManager.getCollectionByName(docCollection);

    if (thisCollection != null) {
        docURL = thisCollection.getUrlDefault();

        if (thisCollection.isKeepCacheWithManagerDefaults()) {
            docCache = thisCollection.getCacheUrlWithManagerDefaults();
        }
    } else {
        log.error("Unknown collection '" + docCollection + "' found, can not find its URL.");
    }

    // get the modification date, and convert it to a readable form date is stored as (yyyyMMdd)
    DateFormat df1 = new SimpleDateFormat("yyyyMMdd");
    Date docDate = null;

    try {
        docDate = df1.parse(doc.get("modified"));
    } catch (ParseException e) {
        log.debug("Invalid date retrieved, trying backward compatibility with v 1.0-rc3-patch1");

        // backward compatibility with v 1.0-rc3-patch1 storage of date:
        // Keyword<modified:0cee68g00>
        docDate = DateField.stringToDate(doc.get("modified"));
        if (docDate == null) {
            log.warn("Invalid date retrieved, returning epoch (1970) for " + docName);
            docDate = new Date(0);
        }
    }

    String docSize = doc.get("size");
    String docType = doc.get("type");
    String docISBN = doc.get("isbn");

    // use the name if it has no title
    // TODO this logic could go into Result
    if ((docTitle == null) || docTitle.equals("")) {
        if ((zipName == null) || zipName.equals("")) {
            docTitle = docName;
        } else {
            docTitle = zipName;
        }
    }
    // then make a Result
    Result thisResult = new Result();

    // highlight the title with search terms
    String highlightedText;
    TokenStream tokenStream = an.tokenStream("title", new StringReader(docTitle));

    try {
        highlightedText = hl.getBestFragment(tokenStream, docTitle);

        if ((highlightedText != null) && (highlightedText.length() > 0)) {
            docTitle = highlightedText;
        }
    } catch (IOException e1) {
        log.warn("Can't highlight " + docTitle, e1);
    }

    thisResult.setTitle(docTitle);

    // highlight the name with search terms
    /*
     * tokenStream = an.tokenStream("title", new StringReader(docName)); try { highlightedText = hl.getBestFragment(tokenStream,
     * docName); log.debug("name after highlighting: " + highlightedText); if (highlightedText != null &&
     * highlightedText.length() > 0) { docName = highlightedText; } } catch (IOException e1) { log.warn("Can't highlight " +
     * docName, e1); }
     */
    thisResult.setName(docName);
    thisResult.setCollection(docCollection);
    thisResult.setPath(docPath);
    thisResult.setURL(docURL);
    thisResult.setCache(docCache);
    thisResult.setZipName(zipName);
    thisResult.setZipPath(zipPath);
    thisResult.setScore(score);
    thisResult.setISBN(docISBN);

    String text = doc.get("summary");
    if (text == null) {
        text = "";
    }

    // highlight the summary with search terms
    tokenStream = an.tokenStream("summary", new StringReader(text));

    try {
        highlightedText = hl.getBestFragment(tokenStream, text);

        if ((highlightedText != null) && (highlightedText.length() > 0)) {
            text = highlightedText;
        }
    } catch (IOException e1) {
        log.warn("Can't highlight " + text, e1);
    }

    thisResult.setSummary(text);

    thisResult.setModificationDate(docDate);
    thisResult.setSize(docSize);
    thisResult.setType(docType);

    return thisResult;
}