Example usage for org.apache.lucene.search.highlight Highlighter getBestFragments

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragments.

Prototype

public final String getBestFragments(TokenStream tokenStream, String text, int maxNumFragments,
        String separator) throws IOException, InvalidTokenOffsetsException

Source Link

Document

Highlights terms in the text , extracting the most relevant sections and concatenating the chosen fragments with a separator (typically "...").

Usage

From source file:de.elbe5.cms.search.SearchData.java

License:Open Source License

public String getContext(Highlighter highlighter, Analyzer analyzer, String fieldName, int contextLength) {
    highlighter.setTextFragmenter(new SimpleFragmenter(contextLength));
    TokenStream tokenStream = null;//from   w  ww . java 2 s.c o  m
    String text = getDoc().get(fieldName);
    if (text != null && text.length() > 0)
        tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
    try {
        text = tokenStream == null ? "" : highlighter.getBestFragments(tokenStream, text, 1, "...");
    } catch (Exception ignore) {
    }
    return text;
}

From source file:de.iteratec.iteraplan.businesslogic.service.SearchServiceImpl.java

License:Open Source License

private String getFoundInHighlightedString(Highlighter highlighter, Analyzer analyzer, String text) {
    String foundInHighlighted = null;
    // create a token stream from the text
    TokenStream tokenStream = analyzer.tokenStream("", new StringReader(text));
    try {//from   w w  w.  j a v  a  2s . c o  m
        // mark the found queryString in the token stream
        foundInHighlighted = highlighter.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED,
                ELLIPSIS_WITH_SEPARATOR);
    } catch (IOException e) {
        throw new IteraplanTechnicalException(e);
    } catch (InvalidTokenOffsetsException e) {
        throw new IteraplanTechnicalException(e);
    }
    return foundInHighlighted;
}

From source file:de.joergjahnke.jdesktopsearch.SearchPanel.java

License:Open Source License

/**
 * Get HTML representation of search results
 *
 * @param   documents   found result documents
 * @param   query   query which was executed and which contains the search terms
 * @param   maxResults  maximum number of results to display, 0 for unlimited search
 * @return  StringBuffer containing HTML to display
 *///from w w  w .j  av  a 2 s  . c  om
private StringBuffer getSearchResultHTML(final Collection<Document> documents, final Query query,
        final int maxResults) {
    // create result string with HTML content
    final StringBuffer result = new StringBuffer("<html><body>");

    result.append("<i>Searching for &quot;" + query.toString() + "&quot;</i><br><br>");

    // get query strings to mark in result
    final Set<String> searchTerms = QueryUtils.getSearchTerms(query, "contents");

    // add hits to result string
    int found = 0;

    // prepare highlighter
    final Formatter formatter = new SimpleHTMLFormatter(MARK_START, MARK_END);
    final Highlighter highlighter;

    if (IS_USE_HIGHLIGHTER) {
        //QueryScorer q = new QueryScorer(query);
        highlighter = new Highlighter(formatter, new QueryScorer(query));
    }

    for (Document doc : documents) {
        final String path = doc.get("path");

        // add title if one exists
        String[] values = doc.getValues("title");

        if (null != values) {
            result.append("<b>");

            for (int j = 0; j < values.length; ++j) {
                result.append(values[j]);
                if (j != values.length - 1) {
                    result.append(' ');
                }
            }

            result.append("</b><br>");
        }

        // add part of the document
        values = doc.getValues("contents");
        if (null != values) {
            final StringBuffer contents = new StringBuffer();
            for (int j = 0; j < values.length && contents.length() < MAX_LENGTH; ++j) {
                final String value = values[j];
                String newContents = null;

                if (IS_USE_HIGHLIGHTER) {
                    final TokenStream tokenStream = new StandardAnalyzer().tokenStream("contents",
                            new StringReader(value));

                    try {
                        // get 3 best fragments and seperate with a "..."
                        newContents = highlighter.getBestFragments(tokenStream, value, 3, "<br>...<br>");
                    } catch (IOException e) {
                        System.err.println("why here " + e.getMessage());
                    }
                } else {
                    newContents = highlightSearchTerms(value, searchTerms,
                            contents.toString().replaceAll(MARK_START, "").replaceAll(MARK_END, "").length());
                }

                if (null != newContents && newContents.length() > 0) {
                    contents.append(newContents);
                    contents.append(' ');
                }
            }

            if (contents.length() > 0) {
                result.append(contents.toString());
                result.append("<br>");
            }
        } else {
            System.err.println("why no contents");
        }

        // add url to document
        result.append("<a href='" + new File(path).toURI() + "'>" + path + "</a><br><br>");
        ++found;
    }

    result.append("<br>" + found + " documents found.");

    // show link to get more results if the maximum result number was reached
    if (maxResults > 0 && documents.size() >= maxResults) {
        result.append("<br><br>The maximum of " + maxResults
                + " documents to display was reached. More results might be available. Click <a href='special://searchAgain'>here</a> to search again without a result number limitation. Please note that such a search might take a considerable amount of time depending on the number of documents found.");
    }

    result.append("</body></html>");

    return result;
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath,
        Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();
    SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState(
            theSearcher.getIndexReader());

    List<QueryResultDocument> theResultDocuments = new ArrayList<>();

    long theStartTime = System.currentTimeMillis();

    LOGGER.info("Querying for " + aQueryString);

    DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH);

    try {/*from w ww . j a v  a2s  .  com*/

        List<FacetDimension> theDimensions = new ArrayList<>();

        // Search only if a search query is given
        if (!StringUtils.isEmpty(aQueryString)) {

            Query theQuery = computeBooleanQueryFor(aQueryString);

            LOGGER.info(" query is " + theQuery);

            theQuery = theQuery.rewrite(theSearcher.getIndexReader());

            LOGGER.info(" rewritten query is " + theQuery);

            DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery);
            aDrilldownFields.entrySet().stream().forEach(aEntry -> {
                LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue());
                theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue());
            });

            FacetsCollector theFacetCollector = new FacetsCollector();

            TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null,
                    aConfiguration.getNumberOfSearchResults(), theFacetCollector);
            SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState,
                    theFacetCollector);

            List<Facet> theAuthorFacets = new ArrayList<>();
            List<Facet> theFileTypesFacets = new ArrayList<>();
            List<Facet> theLastModifiedYearFacet = new ArrayList<>();
            List<Facet> theLanguageFacet = new ArrayList<>();

            LOGGER.info("Found " + theDocs.scoreDocs.length + " documents");

            // We need this cache to detect duplicate documents while searching for similarities
            Set<Integer> theUniqueDocumentsFound = new HashSet<>();

            Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>();

            for (int i = 0; i < theDocs.scoreDocs.length; i++) {
                int theDocumentID = theDocs.scoreDocs[i].doc;
                theUniqueDocumentsFound.add(theDocumentID);
                Document theDocument = theSearcher.doc(theDocumentID);

                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue();
                String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue();
                QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash);
                if (theExistingDocument != null) {
                    theExistingDocument.addFileName(theFoundFileName);
                } else {
                    Date theLastModified = new Date(
                            theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue());
                    SupportedLanguage theLanguage = SupportedLanguage
                            .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue());
                    String theFieldName;
                    if (analyzerCache.supportsLanguage(theLanguage)) {
                        theFieldName = analyzerCache.getFieldNameFor(theLanguage);
                    } else {
                        theFieldName = IndexFields.CONTENT;
                    }

                    String theOriginalContent = theDocument.getField(theFieldName).stringValue();

                    final Query theFinalQuery = theQuery;

                    ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> {
                        StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified));
                        theResult.append("&nbsp;-&nbsp;");
                        Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                new QueryScorer(theFinalQuery));
                        for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName,
                                theOriginalContent, NUMBER_OF_FRAGMENTS)) {
                            if (theResult.length() > 0) {
                                theResult = theResult.append("...");
                            }
                            theResult = theResult.append(theFragment);
                        }
                        return theResult.toString();
                    });

                    int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5);

                    File theFileOnDisk = new File(theFoundFileName);
                    if (theFileOnDisk.exists()) {

                        boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk);

                        theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName,
                                theHighligherResult,
                                Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()),
                                theNormalizedScore, theUniqueID, thePreviewAvailable);
                        theDocumentsByHash.put(theHash, theExistingDocument);
                        theResultDocuments.add(theExistingDocument);
                    }
                }
            }

            if (aConfiguration.isShowSimilarDocuments()) {

                MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader());
                theMoreLikeThis.setAnalyzer(analyzer);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames());

                for (QueryResultDocument theDocument : theResultDocuments) {
                    Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID());
                    TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5);
                    for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) {
                        int theSimilarDocument = theMoreLikeThisScoreDoc.doc;
                        if (theUniqueDocumentsFound.add(theSimilarDocument)) {
                            Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument);
                            String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME)
                                    .stringValue();
                            theDocument.addSimilarFile(theFilename);
                        }
                    }
                }
            }

            LOGGER.info("Got Dimensions");
            for (FacetResult theResult : theFacetCounts.getAllDims(20000)) {
                String theDimension = theResult.dim;
                if ("author".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theAuthorFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("extension".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theFileTypesFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("last-modified-year".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if (IndexFields.LANGUAGEFACET.equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            Locale theLocale = new Locale(theLabelAndValue.label);
                            theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH),
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }

                LOGGER.info(" " + theDimension);
            }

            if (!theAuthorFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("Author", theAuthorFacets));
            }
            if (!theLastModifiedYearFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet));
            }
            if (!theFileTypesFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("File types", theFileTypesFacets));
            }
            if (!theLanguageFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Language", theLanguageFacet));
            }

            // Wait for all Tasks to complete for the search result highlighter
            ForkJoinTask.helpQuiesce();
        }

        long theDuration = System.currentTimeMillis() - theStartTime;

        LOGGER.info("Total amount of time : " + theDuration + "ms");

        return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions,
                theSearcher.getIndexReader().numDocs(), aBacklink);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

protected String getHighlightedSearchResult(Analyzer aAnalyzer, Highlighter aHighlighter, String aContent,
        Query aQuery) throws IOException, InvalidTokenOffsetsException {

    CachingTokenFilter tokenStream = new CachingTokenFilter(
            aAnalyzer.tokenStream(ProfileIndexerService.CONTENT, new StringReader(aContent)));

    return aHighlighter.getBestFragments(tokenStream, aContent, 5, "&nbsp;...&nbsp;");
}

From source file:dk.defxws.fgslucene.Statement.java

License:Open Source License

public ResultSet executeQuery(IndexSearcher searcher, String queryString, int startRecord, int maxResults,
        int snippetsMax, int fieldMaxLength, Analyzer analyzer, String defaultQueryFields,
        boolean allowLeadingWildcard, boolean lowercaseExpandedTerms, String indexPath, String indexName,
        String snippetBegin, String snippetEnd, String sortFields) throws GenericSearchException {
    if (logger.isDebugEnabled())
        logger.debug("executeQuery" + " query=" + queryString + " startRecord=" + startRecord + " maxResults="
                + maxResults + " snippetsMax=" + snippetsMax + " fieldMaxLength=" + fieldMaxLength
                + " indexName=" + indexName + " sortFields=" + sortFields + " defaultQueryFields="
                + defaultQueryFields + " allowLeadingWildcard=" + allowLeadingWildcard
                + " lowercaseExpandedTerms=" + lowercaseExpandedTerms);
    this.searcher = searcher;
    ResultSet rs = null;//from  w ww  .j  a v a  2s.  c  o m
    StringTokenizer defaultFieldNames = new StringTokenizer(defaultQueryFields);
    int countFields = defaultFieldNames.countTokens();
    String[] defaultFields = new String[countFields];
    for (int i = 0; i < countFields; i++) {
        defaultFields[i] = defaultFieldNames.nextToken();
    }
    Query query = null;
    if (defaultFields.length == 1) {
        QueryParser queryParser = new QueryParser(Version.LUCENE_36, defaultFields[0], analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery queryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    } else {
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, defaultFields,
                analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery mfqueryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after parse query=" + query);
    try {
        query.rewrite(searcher.getIndexReader());
    } catch (Exception e) {
        throw new GenericSearchException(e.toString());
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after rewrite query=" + query);
    int start = Integer.parseInt(Integer.toString(startRecord));
    TopDocs hits = getHits(query, start + maxResults - 1, sortFields);
    ScoreDoc[] docs = hits.scoreDocs;
    int end = Math.min(hits.totalHits, start + maxResults - 1);
    if (logger.isDebugEnabled())
        logger.debug("executeQuery hits.totalHits=" + hits.totalHits);
    StringBuffer resultXml = new StringBuffer();
    resultXml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
    String queryStringEncoded = null;
    try {
        queryStringEncoded = URLEncoder.encode(queryString, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        errorExit(e.toString());
    }
    resultXml.append("<lucenesearch " + "   xmlns:dc=\"http://purl.org/dc/elements/1.1/" + "\" query=\""
            + queryStringEncoded + "\" indexName=\"" + indexName + "\" sortFields=\"" + sortFields
            + "\" hitPageStart=\"" + startRecord + "\" hitPageSize=\"" + maxResults + "\" hitTotal=\""
            + hits.totalHits + "\">");
    ScoreDoc hit = null;
    Document doc = null;
    String hitsScore = null;
    for (int i = start; i <= end; i++) {
        try {
            hit = docs[i - 1];
            doc = searcher.doc(hit.doc);
            hitsScore = "" + hit.score;
        } catch (CorruptIndexException e) {
            errorExit(e.toString());
        } catch (IOException e) {
            errorExit(e.toString());
        }
        resultXml.append("<hit no=\"" + i + "\" score=\"" + hitsScore + "\">");
        for (ListIterator li = doc.getFields().listIterator(); li.hasNext();) {
            Fieldable f = (Fieldable) li.next();
            resultXml.append("<field name=\"" + f.name() + "\"");
            String snippets = null;
            if (snippetsMax > 0) {
                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("!!!SNIPPETBEGIN", "!!!SNIPPETEND");
                QueryScorer scorer = new QueryScorer(query, f.name());
                Highlighter highlighter = new Highlighter(formatter, scorer);
                Fragmenter fragmenter = new SimpleFragmenter(fieldMaxLength);
                highlighter.setTextFragmenter(fragmenter);
                TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(f.stringValue()));
                try {
                    snippets = highlighter.getBestFragments(tokenStream, f.stringValue(), snippetsMax, " ... ");
                } catch (Exception e) { // all Exceptions to be caught, not just IOException 
                    errorExit(e.toString());
                }
                snippets = checkTruncatedWords(snippets, " ... ");
                snippets = StreamUtility.enc(snippets);
                snippets = snippets.replaceAll("!!!SNIPPETBEGIN", snippetBegin);
                snippets = snippets.replaceAll("!!!SNIPPETEND", snippetEnd);
                if (snippets != null && !snippets.equals("")) {
                    resultXml.append(" snippet=\"yes\">" + snippets);
                }
            }
            if (snippets == null || snippets.equals(""))
                if (fieldMaxLength > 0 && f.stringValue().length() > fieldMaxLength) {
                    String snippet = f.stringValue().substring(0, fieldMaxLength);
                    int iamp = snippet.lastIndexOf("&");
                    if (iamp > -1 && iamp > fieldMaxLength - 8)
                        snippet = snippet.substring(0, iamp);
                    resultXml.append(">" + StreamUtility.enc(snippet) + " ... ");
                } else
                    resultXml.append(">" + StreamUtility.enc(f.stringValue()));
            resultXml.append("</field>");
        }
        resultXml.append("</hit>");
    }
    resultXml.append("</lucenesearch>");
    if (logger.isDebugEnabled()) {
        int size = 500;
        if (resultXml.length() < size)
            size = resultXml.length();
        String debugString = resultXml.substring(0, size);
        if (resultXml.length() > size)
            debugString += "...";
        logger.debug("executeQuery resultXml=" + debugString);
    }
    rs = new ResultSet(resultXml);
    return rs;
}

From source file:dk.defxws.fgssolr.Statement.java

License:Open Source License

public ResultSet executeQuery(IndexSearcher searcher, String queryString, int startRecord, int maxResults,
        int snippetsMax, int fieldMaxLength, Analyzer analyzer, String defaultQueryFields, String indexPath,
        String indexName, String snippetBegin, String snippetEnd, String sortFields)
        throws GenericSearchException {
    boolean allowLeadingWildcard = true;
    boolean lowercaseExpandedTerms = true;
    if (logger.isDebugEnabled())
        logger.debug("executeQuery" + " query=" + queryString + " startRecord=" + startRecord + " maxResults="
                + maxResults + " snippetsMax=" + snippetsMax + " fieldMaxLength=" + fieldMaxLength
                + " indexName=" + indexName + " sortFields=" + sortFields + " defaultQueryFields="
                + defaultQueryFields + " allowLeadingWildcard=" + allowLeadingWildcard
                + " lowercaseExpandedTerms=" + lowercaseExpandedTerms);
    this.searcher = searcher;
    ResultSet rs = null;//from   w w w  .ja  va 2  s .  c om
    StringTokenizer defaultFieldNames = new StringTokenizer(defaultQueryFields);
    int countFields = defaultFieldNames.countTokens();
    String[] defaultFields = new String[countFields];
    for (int i = 0; i < countFields; i++) {
        defaultFields[i] = defaultFieldNames.nextToken();
    }
    Query query = null;
    if (defaultFields.length == 1) {
        QueryParser queryParser = new QueryParser(Version.LUCENE_36, defaultFields[0], analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery queryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    } else {
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, defaultFields,
                analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery mfqueryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after parse query=" + query);
    try {
        query.rewrite(searcher.getIndexReader());
    } catch (Exception e) {
        throw new GenericSearchException(e.toString());
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after rewrite query=" + query);
    int start = Integer.parseInt(Integer.toString(startRecord));
    TopDocs hits = getHits(query, start + maxResults - 1, sortFields);
    ScoreDoc[] docs = hits.scoreDocs;
    int end = Math.min(hits.totalHits, start + maxResults - 1);
    if (logger.isDebugEnabled())
        logger.debug("executeQuery hits.totalHits=" + hits.totalHits);
    StringBuffer resultXml = new StringBuffer();
    resultXml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
    String queryStringEncoded = null;
    try {
        queryStringEncoded = URLEncoder.encode(queryString, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        errorExit(e.toString());
    }
    resultXml.append("<solrsearch " + "   xmlns:dc=\"http://purl.org/dc/elements/1.1/" + "\" query=\""
            + queryStringEncoded + "\" indexName=\"" + indexName + "\" sortFields=\"" + sortFields
            + "\" hitPageStart=\"" + startRecord + "\" hitPageSize=\"" + maxResults + "\" hitTotal=\""
            + hits.totalHits + "\">");
    ScoreDoc hit = null;
    Document doc = null;
    String hitsScore = null;
    for (int i = start; i <= end; i++) {
        try {
            hit = docs[i - 1];
            doc = searcher.doc(hit.doc);
            hitsScore = "" + hit.score;
        } catch (CorruptIndexException e) {
            errorExit(e.toString());
        } catch (IOException e) {
            errorExit(e.toString());
        }
        resultXml.append("<hit no=\"" + i + "\" score=\"" + hitsScore + "\">");
        for (ListIterator li = doc.getFields().listIterator(); li.hasNext();) {
            Fieldable f = (Fieldable) li.next();
            resultXml.append("<field name=\"" + f.name() + "\"");
            String snippets = null;
            if (snippetsMax > 0) {
                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("!!!SNIPPETBEGIN", "!!!SNIPPETEND");
                QueryScorer scorer = new QueryScorer(query, f.name());
                Highlighter highlighter = new Highlighter(formatter, scorer);
                Fragmenter fragmenter = new SimpleFragmenter(fieldMaxLength);
                highlighter.setTextFragmenter(fragmenter);
                TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(f.stringValue()));
                try {
                    snippets = highlighter.getBestFragments(tokenStream, f.stringValue(), snippetsMax, " ... ");
                } catch (Exception e) { // all Exceptions to be caught, not just IOException 
                    errorExit(e.toString());
                }
                snippets = checkTruncatedWords(snippets, " ... ");
                snippets = StreamUtility.enc(snippets);
                snippets = snippets.replaceAll("!!!SNIPPETBEGIN", snippetBegin);
                snippets = snippets.replaceAll("!!!SNIPPETEND", snippetEnd);
                if (snippets != null && !snippets.equals("")) {
                    resultXml.append(" snippet=\"yes\">" + snippets);
                }
            }
            if (snippets == null || snippets.equals(""))
                if (fieldMaxLength > 0 && f.stringValue().length() > fieldMaxLength) {
                    String snippet = f.stringValue().substring(0, fieldMaxLength);
                    int iamp = snippet.lastIndexOf("&");
                    if (iamp > -1 && iamp > fieldMaxLength - 8)
                        snippet = snippet.substring(0, iamp);
                    resultXml.append(">" + StreamUtility.enc(snippet) + " ... ");
                } else
                    resultXml.append(">" + StreamUtility.enc(f.stringValue()));
            resultXml.append("</field>");
        }
        resultXml.append("</hit>");
    }
    resultXml.append("</solrsearch>");
    if (logger.isDebugEnabled()) {
        int size = 500;
        if (resultXml.length() < size)
            size = resultXml.length();
        String debugString = resultXml.substring(0, size);
        if (resultXml.length() > size)
            debugString += "...";
        logger.debug("executeQuery resultXml=" + debugString);
    }
    rs = new ResultSet(resultXml);
    return rs;
}

From source file:docet.engine.SimpleDocetDocSearcher.java

License:Apache License

@Override
public List<DocetPage> searchForMatchingDocuments(final String searchText, final String lang,
        final int maxNumResults) throws DocetDocumentSearchException {
    final List<DocetPage> results = new ArrayList<>();
    final String fallbackLang = this.getFallbackLangForLang(lang);
    final String actualSearchLang;
    if (fallbackLang.isEmpty()) {
        actualSearchLang = lang;//from  ww w  . ja  v a  2  s .c  o  m
    } else {
        actualSearchLang = fallbackLang;
    }
    try {
        final IndexSearcher searcher = new IndexSearcher(reader);
        final Analyzer analyzer = new AnalyzerBuilder().language(actualSearchLang).build();
        QueryParser queryParser = new QueryParser(LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang, analyzer);
        final Query query = queryParser.parse(constructLucenePhraseTermSearchQuery(searchText));
        final QueryScorer queryScorer = new QueryScorer(query, LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang);

        final Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        final Highlighter highlighter = new Highlighter(queryScorer);
        highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
        highlighter.setTextFragmenter(fragmenter);

        final TopDocs res = searcher.search(query, maxNumResults);
        final float maxScore = res.getMaxScore();
        final List<ScoreDoc> scoreDocs = Arrays.asList(res.scoreDocs);
        Map<org.apache.lucene.document.Document, String> docs = new HashMap<>();
        Map<String, ScoreDoc> scoresForDocs = new HashMap<>();
        for (final ScoreDoc sd : scoreDocs) {
            final org.apache.lucene.document.Document doc = searcher.doc(sd.doc);
            final String contents = doc.get(LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang);
            final String docId = doc.get("id");
            final String[] fragments = highlighter.getBestFragments(analyzer,
                    LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang, contents, MAX_NUM_FRAGMENTS);
            List<String> fragmentList = Arrays.asList(fragments);
            fragmentList = fragmentList.stream().map(s1 -> s1.trim().split("\n"))
                    .map(s1 -> Arrays.asList(s1).stream().filter(s -> !s.trim().isEmpty())
                            .reduce((sa, sb) -> sa + MACHING_EXCERPTS_SEPARATOR + sb)
                            .orElse(MACHING_EXCERPTS_SEPARATOR))
                    .collect(Collectors.toList());
            docs.put(doc,
                    MACHING_EXCERPTS_SEPARATOR
                            + fragmentList.stream().filter(s -> !s.isEmpty())
                                    .reduce((s1, s2) -> s1 + "..." + s2).orElse("")
                            + MACHING_EXCERPTS_SEPARATOR);
            scoresForDocs.putIfAbsent(docId, sd);
        }
        docs.entrySet().stream().forEach(e -> {
            final int relevance = Math.round((scoresForDocs.get(e.getKey().get("id")).score / maxScore) * 100);
            results.add(DocetPage.toDocetDocument(e.getKey(), e.getValue(), relevance));
        });
        return results;
    } catch (ParseException | IOException | InvalidTokenOffsetsException ex) {
        throw new DocetDocumentSearchException(
                "Error on searching query " + searchText + " for lang " + actualSearchLang, ex);
    }
}

From source file:edu.virginia.cs.searcher.PostSearcher.java

/**
 * Performs the actual Lucene search.//ww  w. j a v  a 2 s  .  c  o  m
 *
 * @param luceneQuery
 * @param numResults
 * @return the SearchResult
 */
private SearchResult runSearch(Query luceneQuery, SearchQuery searchQuery) {
    try {
        TopDocs docs = indexSearcher.search(luceneQuery, searchQuery.numResults());
        ScoreDoc[] hits = docs.scoreDocs;
        String field = searchQuery.fields().get(0);

        SearchResult searchResult = new SearchResult(searchQuery, docs.totalHits);
        for (ScoreDoc hit : hits) {
            Document doc = indexSearcher.doc(hit.doc);
            Posts pdoc = new Posts();
            String highlighted = null;
            try {
                Highlighter highlighter = new Highlighter(formatter, new QueryScorer(luceneQuery));
                String contents = doc.getField(field).stringValue();
                pdoc.setId(Integer.parseInt(doc.getField("id").stringValue()));
                pdoc.setBody(doc.getField("body").stringValue());
                pdoc.setTitle(doc.getField("title").stringValue());
                pdoc.setCode(doc.getField("code").stringValue());
                pdoc.setTags(doc.getField("tags").stringValue());
                pdoc.setScore(Integer.parseInt(doc.getField("score").stringValue()));
                pdoc.setAcceptedAnswerId(Integer.parseInt(doc.getField("acceptedAnswerId").stringValue()));
                pdoc.setViewCount(Integer.parseInt(doc.getField("viewCount").stringValue()));
                pdoc.setAnswerCount(Integer.parseInt(doc.getField("answerCount").stringValue()));
                pdoc.setCommentCount(Integer.parseInt(doc.getField("commentCount").stringValue()));
                pdoc.setFavoriteCount(Integer.parseInt(doc.getField("favoriteCount").stringValue()));

                String[] snippets = highlighter.getBestFragments(analyzer, field, contents, numFragments);
                highlighted = createOneSnippet(snippets);
            } catch (InvalidTokenOffsetsException exception) {
                exception.printStackTrace();
                highlighted = "(no snippets yet)";
            }
            searchResult.addResult(pdoc);
            searchResult.setSnippet(pdoc, highlighted);
        }
        return searchResult;
    } catch (IOException exception) {
        exception.printStackTrace();
    }
    return new SearchResult(searchQuery);
}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndex(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery andQuery = new BooleanQuery();
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }//from  w  w  w .  j ava  2s  .c  o  m
    Query query = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer).parse(queryString);
    andQuery.add(query, BooleanClause.Occur.MUST);
    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);
    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);

    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    //setsback to action
    objectsToReturn.put("hits", hits);

    //highlighter
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));
    if (hits != null) {
        logger.debug("hits size: " + hits.length);
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);

            String[] subobjNames = doc.getValues(IndexingConstants.SUBOBJ_NAME);
            if (subobjNames != null && subobjNames.length != 0) {
                String views = "";
                for (int k = 0; k < subobjNames.length; k++) {
                    views += subobjNames[k] + " ";
                }
                objectsToReturn.put(biobjId + "-views", views);
            }
            String summary = "";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);

                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);
                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (NumberFormatException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }
            }
        }
    }
    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}