Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Formatter formatter, Scorer fragmentScorer) 

Source Link

Usage

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath,
        Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();
    SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState(
            theSearcher.getIndexReader());

    List<QueryResultDocument> theResultDocuments = new ArrayList<>();

    long theStartTime = System.currentTimeMillis();

    LOGGER.info("Querying for " + aQueryString);

    DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH);

    try {/*from   w  w w .  j  av  a 2  s.  c o  m*/

        List<FacetDimension> theDimensions = new ArrayList<>();

        // Search only if a search query is given
        if (!StringUtils.isEmpty(aQueryString)) {

            Query theQuery = computeBooleanQueryFor(aQueryString);

            LOGGER.info(" query is " + theQuery);

            theQuery = theQuery.rewrite(theSearcher.getIndexReader());

            LOGGER.info(" rewritten query is " + theQuery);

            DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery);
            aDrilldownFields.entrySet().stream().forEach(aEntry -> {
                LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue());
                theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue());
            });

            FacetsCollector theFacetCollector = new FacetsCollector();

            TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null,
                    aConfiguration.getNumberOfSearchResults(), theFacetCollector);
            SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState,
                    theFacetCollector);

            List<Facet> theAuthorFacets = new ArrayList<>();
            List<Facet> theFileTypesFacets = new ArrayList<>();
            List<Facet> theLastModifiedYearFacet = new ArrayList<>();
            List<Facet> theLanguageFacet = new ArrayList<>();

            LOGGER.info("Found " + theDocs.scoreDocs.length + " documents");

            // We need this cache to detect duplicate documents while searching for similarities
            Set<Integer> theUniqueDocumentsFound = new HashSet<>();

            Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>();

            for (int i = 0; i < theDocs.scoreDocs.length; i++) {
                int theDocumentID = theDocs.scoreDocs[i].doc;
                theUniqueDocumentsFound.add(theDocumentID);
                Document theDocument = theSearcher.doc(theDocumentID);

                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue();
                String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue();
                QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash);
                if (theExistingDocument != null) {
                    theExistingDocument.addFileName(theFoundFileName);
                } else {
                    Date theLastModified = new Date(
                            theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue());
                    SupportedLanguage theLanguage = SupportedLanguage
                            .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue());
                    String theFieldName;
                    if (analyzerCache.supportsLanguage(theLanguage)) {
                        theFieldName = analyzerCache.getFieldNameFor(theLanguage);
                    } else {
                        theFieldName = IndexFields.CONTENT;
                    }

                    String theOriginalContent = theDocument.getField(theFieldName).stringValue();

                    final Query theFinalQuery = theQuery;

                    ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> {
                        StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified));
                        theResult.append("&nbsp;-&nbsp;");
                        Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                new QueryScorer(theFinalQuery));
                        for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName,
                                theOriginalContent, NUMBER_OF_FRAGMENTS)) {
                            if (theResult.length() > 0) {
                                theResult = theResult.append("...");
                            }
                            theResult = theResult.append(theFragment);
                        }
                        return theResult.toString();
                    });

                    int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5);

                    File theFileOnDisk = new File(theFoundFileName);
                    if (theFileOnDisk.exists()) {

                        boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk);

                        theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName,
                                theHighligherResult,
                                Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()),
                                theNormalizedScore, theUniqueID, thePreviewAvailable);
                        theDocumentsByHash.put(theHash, theExistingDocument);
                        theResultDocuments.add(theExistingDocument);
                    }
                }
            }

            if (aConfiguration.isShowSimilarDocuments()) {

                MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader());
                theMoreLikeThis.setAnalyzer(analyzer);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames());

                for (QueryResultDocument theDocument : theResultDocuments) {
                    Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID());
                    TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5);
                    for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) {
                        int theSimilarDocument = theMoreLikeThisScoreDoc.doc;
                        if (theUniqueDocumentsFound.add(theSimilarDocument)) {
                            Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument);
                            String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME)
                                    .stringValue();
                            theDocument.addSimilarFile(theFilename);
                        }
                    }
                }
            }

            LOGGER.info("Got Dimensions");
            for (FacetResult theResult : theFacetCounts.getAllDims(20000)) {
                String theDimension = theResult.dim;
                if ("author".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theAuthorFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("extension".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theFileTypesFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("last-modified-year".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if (IndexFields.LANGUAGEFACET.equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            Locale theLocale = new Locale(theLabelAndValue.label);
                            theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH),
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }

                LOGGER.info(" " + theDimension);
            }

            if (!theAuthorFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("Author", theAuthorFacets));
            }
            if (!theLastModifiedYearFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet));
            }
            if (!theFileTypesFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("File types", theFileTypesFacets));
            }
            if (!theLanguageFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Language", theLanguageFacet));
            }

            // Wait for all Tasks to complete for the search result highlighter
            ForkJoinTask.helpQuiesce();
        }

        long theDuration = System.currentTimeMillis() - theStartTime;

        LOGGER.info("Total amount of time : " + theDuration + "ms");

        return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions,
                theSearcher.getIndexReader().numDocs(), aBacklink);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

@Override
public DataPage<ProfileSearchEntry> findProfileDataPage(SavedProfileSearch aRequest, int startRow, int pageSize)
        throws Exception {

    if (aRequest.getId() == null) {
        // Kann passieren, wenn die Suche das erste mal aufgerufen wird
        return new DataPage<ProfileSearchEntry>(0, 0, new ArrayList<ProfileSearchEntry>());
    }//from  w w w.j a  v a  2 s .  com

    Analyzer theAnalyzer = ProfileAnalyzerFactory.createAnalyzer();

    FullTextSession theSession = Search.getFullTextSession(sessionFactory.getCurrentSession());

    Query theQuery = getRealQuery(aRequest, theAnalyzer);

    LOGGER.info("Search query is " + theQuery + " from " + startRow + " with pagesize " + pageSize);

    Highlighter theHighlighter = new Highlighter(new SpanGradientFormatter(1, "#000000", "#0000FF", null, null),
            new QueryScorer(theQuery));

    BooleanQuery theRealQuery = new BooleanQuery();
    theRealQuery.add(theQuery, Occur.MUST);

    if (aRequest != null) {
        for (String theId : aRequest.getProfilesToIgnore()) {
            theRealQuery.add(new TermQuery(new Term(ProfileIndexerService.UNIQUE_ID, theId)), Occur.MUST_NOT);
        }
    }

    LOGGER.info("Query with ignore is " + theRealQuery);

    Sort theSort = null;
    if (!StringUtils.isEmpty(aRequest.getSortierung())) {
        int theSortType = SortField.STRING;
        boolean theReverse = false;

        String theSortField = aRequest.getSortierungField();

        if (ProfileIndexerService.STUNDENSATZ.equals(theSortField)) {
            theSortType = SortField.LONG;
        }
        if (ProfileIndexerService.VERFUEGBARKEIT.equals(theSortField)) {
            theReverse = true;
        }
        if (ProfileIndexerService.LETZTERKONTAKT.equals(theSortField)) {
            theReverse = true;
        }

        if (aRequest.isSortierungReverse()) {
            theReverse = !theReverse;
        }

        theSort = new Sort(new SortField(theSortField, theSortType, theReverse));
    }

    List<Filter> theFilterList = new ArrayList<Filter>();
    TermsFilter theContactForbidden = new TermsFilter();
    theContactForbidden.addTerm(new Term(ProfileIndexerService.KONTAKTSPERRE, "false"));
    theFilterList.add(theContactForbidden);

    if (aRequest.getStundensatzVon() != null || aRequest.getStundensatzBis() != null) {
        if (aRequest.getStundensatzVon() != null) {
            theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ,
                    aRequest.getStundensatzVon(), Long.MAX_VALUE, true, true));
        }
        if (aRequest.getStundensatzBis() != null) {
            theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ, 0l,
                    aRequest.getStundensatzBis(), true, true));
        }
    }

    Filter theFilter = new ChainedFilter(theFilterList.toArray(new Filter[theFilterList.size()]),
            ChainedFilter.AND);

    int theEnd = startRow + pageSize;

    FullTextQuery theHibernateQuery = theSession.createFullTextQuery(theRealQuery, Freelancer.class);
    if (theFilter != null) {
        theHibernateQuery.setFilter(theFilter);
    }
    if (theSort != null) {
        theHibernateQuery.setSort(theSort);
    }
    theHibernateQuery.setFirstResult(startRow);
    theHibernateQuery.setMaxResults(theEnd - startRow);
    theHibernateQuery.setProjection(FullTextQuery.THIS, FullTextQuery.DOCUMENT);

    List<ProfileSearchEntry> theResult = new ArrayList<ProfileSearchEntry>();

    for (Object theSingleEntity : theHibernateQuery.list()) {
        Object[] theRow = (Object[]) theSingleEntity;
        Freelancer theFreelancer = (Freelancer) theRow[0];
        Document theDocument = (Document) theRow[1];
        ProfileSearchEntry theEntry = createResultEntry(theAnalyzer, theQuery, theHighlighter, theFreelancer,
                theDocument);

        theResult.add(theEntry);
    }

    return new DataPage<ProfileSearchEntry>(theHibernateQuery.getResultSize(), startRow, theResult);
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

@Override
public List<ProfileSearchEntry> getSimilarFreelancer(Freelancer aFreelancer) {
    List<ProfileSearchEntry> theResult = new ArrayList<ProfileSearchEntry>();
    if (aFreelancer != null && aFreelancer.getId() != null) {

        FullTextSession theSession = Search.getFullTextSession(sessionFactory.getCurrentSession());
        SearchFactory theSearchFactory = theSession.getSearchFactory();

        Analyzer theAnalyzer = ProfileAnalyzerFactory.createAnalyzer();

        DirectoryProvider theFreeelancerProvider = theSearchFactory.getDirectoryProviders(Freelancer.class)[0];

        IndexReader theIndexReader = null;

        try {//ww w.ja v a  2  s.  c o  m
            theIndexReader = theSearchFactory.getReaderProvider().openReader(theFreeelancerProvider);

            MoreLikeThis theMoreLikeThis = new MoreLikeThis(theIndexReader);

            // Zuerst den Freiberufler raussuchen
            Query theQuery = new TermQuery(
                    new Term(ProfileIndexerService.UNIQUE_ID, aFreelancer.getId().toString()));
            FullTextQuery theHibernateQuery = theSession.createFullTextQuery(theQuery, Freelancer.class);
            theHibernateQuery.setProjection(FullTextQuery.THIS, FullTextQuery.DOCUMENT);

            for (Object theSingleEntity : theHibernateQuery.list()) {
                Object[] theRow = (Object[]) theSingleEntity;
                Freelancer theFreelancer = (Freelancer) theRow[0];
                Document theDocument = (Document) theRow[1];

                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setAnalyzer(theAnalyzer);
                theMoreLikeThis.setFieldNames(new String[] { ProfileIndexerService.CONTENT });
                Query theMltQuery = theMoreLikeThis
                        .like(new StringReader(theDocument.get(ProfileIndexerService.ORIG_CONTENT)));

                FullTextQuery theMoreLikeThisQuery = theSession.createFullTextQuery(theMltQuery,
                        Freelancer.class);
                theMoreLikeThisQuery.setProjection(FullTextQuery.THIS, FullTextQuery.DOCUMENT,
                        FullTextQuery.SCORE);
                theMoreLikeThisQuery.setMaxResults(50);

                Highlighter theHighlighter = new Highlighter(
                        new SpanGradientFormatter(1, "#000000", "#0000FF", null, null),
                        new QueryScorer(theMltQuery));

                for (Object theSingleMltEntry : theMoreLikeThisQuery.list()) {
                    Object[] theMltRow = (Object[]) theSingleMltEntry;
                    Freelancer theMltFreelancer = (Freelancer) theMltRow[0];
                    Document theMltDocument = (Document) theMltRow[1];
                    Float theMltScore = (Float) theMltRow[2];

                    if (theMltFreelancer != theFreelancer) {
                        // Einen gefunden
                        ProfileSearchEntry theEntry = createResultEntry(theAnalyzer, theMltQuery,
                                theHighlighter, theMltFreelancer, theMltDocument);
                        theResult.add(theEntry);
                    }
                }

            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            if (theIndexReader != null) {
                theSearchFactory.getReaderProvider().closeReader(theIndexReader);
            }
        }

    }
    return theResult;
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

@Override
public List<ProfileSearchEntry> getSimilarFreelancer(Project aProject) {
    List<ProfileSearchEntry> theResult = new ArrayList<ProfileSearchEntry>();
    if (aProject != null && aProject.getId() != null) {

        FullTextSession theSession = Search.getFullTextSession(sessionFactory.getCurrentSession());
        SearchFactory theSearchFactory = theSession.getSearchFactory();

        Analyzer theAnalyzer = ProfileAnalyzerFactory.createAnalyzer();

        DirectoryProvider theFreeelancerProvider = theSearchFactory.getDirectoryProviders(Freelancer.class)[0];

        IndexReader theIndexReader = null;

        try {/*from  ww  w .  j  a  va  2  s . c o  m*/
            theIndexReader = theSearchFactory.getReaderProvider().openReader(theFreeelancerProvider);

            MoreLikeThis theMoreLikeThis = new MoreLikeThis(theIndexReader);

            theMoreLikeThis.setMinDocFreq(1);
            theMoreLikeThis.setMinTermFreq(1);
            theMoreLikeThis.setAnalyzer(theAnalyzer);
            theMoreLikeThis.setFieldNames(new String[] { ProfileIndexerService.CONTENT });
            Query theMltQuery = theMoreLikeThis.like(new StringReader(aProject.getDescriptionShort() + " "
                    + aProject.getDescriptionLong() + " " + aProject.getSkills()));

            FullTextQuery theMoreLikeThisQuery = theSession.createFullTextQuery(theMltQuery, Freelancer.class);
            theMoreLikeThisQuery.setProjection(FullTextQuery.THIS, FullTextQuery.DOCUMENT, FullTextQuery.SCORE);
            theMoreLikeThisQuery.setMaxResults(50);

            Highlighter theHighlighter = new Highlighter(
                    new SpanGradientFormatter(1, "#000000", "#0000FF", null, null),
                    new QueryScorer(theMltQuery));

            for (Object theSingleMltEntry : theMoreLikeThisQuery.list()) {
                Object[] theMltRow = (Object[]) theSingleMltEntry;
                Freelancer theMltFreelancer = (Freelancer) theMltRow[0];
                Document theMltDocument = (Document) theMltRow[1];
                Float theMltScore = (Float) theMltRow[2];

                ProfileSearchEntry theEntry = createResultEntry(theAnalyzer, theMltQuery, theHighlighter,
                        theMltFreelancer, theMltDocument);
                theResult.add(theEntry);
            }

        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            if (theIndexReader != null) {
                theSearchFactory.getReaderProvider().closeReader(theIndexReader);
            }
        }

    }
    return theResult;
}

From source file:dk.defxws.fgslucene.Statement.java

License:Open Source License

public ResultSet executeQuery(IndexSearcher searcher, String queryString, int startRecord, int maxResults,
        int snippetsMax, int fieldMaxLength, Analyzer analyzer, String defaultQueryFields,
        boolean allowLeadingWildcard, boolean lowercaseExpandedTerms, String indexPath, String indexName,
        String snippetBegin, String snippetEnd, String sortFields) throws GenericSearchException {
    if (logger.isDebugEnabled())
        logger.debug("executeQuery" + " query=" + queryString + " startRecord=" + startRecord + " maxResults="
                + maxResults + " snippetsMax=" + snippetsMax + " fieldMaxLength=" + fieldMaxLength
                + " indexName=" + indexName + " sortFields=" + sortFields + " defaultQueryFields="
                + defaultQueryFields + " allowLeadingWildcard=" + allowLeadingWildcard
                + " lowercaseExpandedTerms=" + lowercaseExpandedTerms);
    this.searcher = searcher;
    ResultSet rs = null;/*from ww w. j  a  va2  s  . co  m*/
    StringTokenizer defaultFieldNames = new StringTokenizer(defaultQueryFields);
    int countFields = defaultFieldNames.countTokens();
    String[] defaultFields = new String[countFields];
    for (int i = 0; i < countFields; i++) {
        defaultFields[i] = defaultFieldNames.nextToken();
    }
    Query query = null;
    if (defaultFields.length == 1) {
        QueryParser queryParser = new QueryParser(Version.LUCENE_36, defaultFields[0], analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery queryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    } else {
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, defaultFields,
                analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery mfqueryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after parse query=" + query);
    try {
        query.rewrite(searcher.getIndexReader());
    } catch (Exception e) {
        throw new GenericSearchException(e.toString());
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after rewrite query=" + query);
    int start = Integer.parseInt(Integer.toString(startRecord));
    TopDocs hits = getHits(query, start + maxResults - 1, sortFields);
    ScoreDoc[] docs = hits.scoreDocs;
    int end = Math.min(hits.totalHits, start + maxResults - 1);
    if (logger.isDebugEnabled())
        logger.debug("executeQuery hits.totalHits=" + hits.totalHits);
    StringBuffer resultXml = new StringBuffer();
    resultXml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
    String queryStringEncoded = null;
    try {
        queryStringEncoded = URLEncoder.encode(queryString, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        errorExit(e.toString());
    }
    resultXml.append("<lucenesearch " + "   xmlns:dc=\"http://purl.org/dc/elements/1.1/" + "\" query=\""
            + queryStringEncoded + "\" indexName=\"" + indexName + "\" sortFields=\"" + sortFields
            + "\" hitPageStart=\"" + startRecord + "\" hitPageSize=\"" + maxResults + "\" hitTotal=\""
            + hits.totalHits + "\">");
    ScoreDoc hit = null;
    Document doc = null;
    String hitsScore = null;
    for (int i = start; i <= end; i++) {
        try {
            hit = docs[i - 1];
            doc = searcher.doc(hit.doc);
            hitsScore = "" + hit.score;
        } catch (CorruptIndexException e) {
            errorExit(e.toString());
        } catch (IOException e) {
            errorExit(e.toString());
        }
        resultXml.append("<hit no=\"" + i + "\" score=\"" + hitsScore + "\">");
        for (ListIterator li = doc.getFields().listIterator(); li.hasNext();) {
            Fieldable f = (Fieldable) li.next();
            resultXml.append("<field name=\"" + f.name() + "\"");
            String snippets = null;
            if (snippetsMax > 0) {
                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("!!!SNIPPETBEGIN", "!!!SNIPPETEND");
                QueryScorer scorer = new QueryScorer(query, f.name());
                Highlighter highlighter = new Highlighter(formatter, scorer);
                Fragmenter fragmenter = new SimpleFragmenter(fieldMaxLength);
                highlighter.setTextFragmenter(fragmenter);
                TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(f.stringValue()));
                try {
                    snippets = highlighter.getBestFragments(tokenStream, f.stringValue(), snippetsMax, " ... ");
                } catch (Exception e) { // all Exceptions to be caught, not just IOException 
                    errorExit(e.toString());
                }
                snippets = checkTruncatedWords(snippets, " ... ");
                snippets = StreamUtility.enc(snippets);
                snippets = snippets.replaceAll("!!!SNIPPETBEGIN", snippetBegin);
                snippets = snippets.replaceAll("!!!SNIPPETEND", snippetEnd);
                if (snippets != null && !snippets.equals("")) {
                    resultXml.append(" snippet=\"yes\">" + snippets);
                }
            }
            if (snippets == null || snippets.equals(""))
                if (fieldMaxLength > 0 && f.stringValue().length() > fieldMaxLength) {
                    String snippet = f.stringValue().substring(0, fieldMaxLength);
                    int iamp = snippet.lastIndexOf("&");
                    if (iamp > -1 && iamp > fieldMaxLength - 8)
                        snippet = snippet.substring(0, iamp);
                    resultXml.append(">" + StreamUtility.enc(snippet) + " ... ");
                } else
                    resultXml.append(">" + StreamUtility.enc(f.stringValue()));
            resultXml.append("</field>");
        }
        resultXml.append("</hit>");
    }
    resultXml.append("</lucenesearch>");
    if (logger.isDebugEnabled()) {
        int size = 500;
        if (resultXml.length() < size)
            size = resultXml.length();
        String debugString = resultXml.substring(0, size);
        if (resultXml.length() > size)
            debugString += "...";
        logger.debug("executeQuery resultXml=" + debugString);
    }
    rs = new ResultSet(resultXml);
    return rs;
}

From source file:dk.defxws.fgssolr.Statement.java

License:Open Source License

public ResultSet executeQuery(IndexSearcher searcher, String queryString, int startRecord, int maxResults,
        int snippetsMax, int fieldMaxLength, Analyzer analyzer, String defaultQueryFields, String indexPath,
        String indexName, String snippetBegin, String snippetEnd, String sortFields)
        throws GenericSearchException {
    boolean allowLeadingWildcard = true;
    boolean lowercaseExpandedTerms = true;
    if (logger.isDebugEnabled())
        logger.debug("executeQuery" + " query=" + queryString + " startRecord=" + startRecord + " maxResults="
                + maxResults + " snippetsMax=" + snippetsMax + " fieldMaxLength=" + fieldMaxLength
                + " indexName=" + indexName + " sortFields=" + sortFields + " defaultQueryFields="
                + defaultQueryFields + " allowLeadingWildcard=" + allowLeadingWildcard
                + " lowercaseExpandedTerms=" + lowercaseExpandedTerms);
    this.searcher = searcher;
    ResultSet rs = null;//from www . j a v a2  s  .c om
    StringTokenizer defaultFieldNames = new StringTokenizer(defaultQueryFields);
    int countFields = defaultFieldNames.countTokens();
    String[] defaultFields = new String[countFields];
    for (int i = 0; i < countFields; i++) {
        defaultFields[i] = defaultFieldNames.nextToken();
    }
    Query query = null;
    if (defaultFields.length == 1) {
        QueryParser queryParser = new QueryParser(Version.LUCENE_36, defaultFields[0], analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery queryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    } else {
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, defaultFields,
                analyzer);
        queryParser.setAllowLeadingWildcard(allowLeadingWildcard);
        queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
        if (logger.isDebugEnabled())
            logger.debug("executeQuery mfqueryParser" + " allowLeadingWildcard="
                    + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms="
                    + queryParser.getLowercaseExpandedTerms());
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw new GenericSearchException(e.toString());
        }
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after parse query=" + query);
    try {
        query.rewrite(searcher.getIndexReader());
    } catch (Exception e) {
        throw new GenericSearchException(e.toString());
    }
    if (logger.isDebugEnabled())
        logger.debug("executeQuery after rewrite query=" + query);
    int start = Integer.parseInt(Integer.toString(startRecord));
    TopDocs hits = getHits(query, start + maxResults - 1, sortFields);
    ScoreDoc[] docs = hits.scoreDocs;
    int end = Math.min(hits.totalHits, start + maxResults - 1);
    if (logger.isDebugEnabled())
        logger.debug("executeQuery hits.totalHits=" + hits.totalHits);
    StringBuffer resultXml = new StringBuffer();
    resultXml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
    String queryStringEncoded = null;
    try {
        queryStringEncoded = URLEncoder.encode(queryString, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        errorExit(e.toString());
    }
    resultXml.append("<solrsearch " + "   xmlns:dc=\"http://purl.org/dc/elements/1.1/" + "\" query=\""
            + queryStringEncoded + "\" indexName=\"" + indexName + "\" sortFields=\"" + sortFields
            + "\" hitPageStart=\"" + startRecord + "\" hitPageSize=\"" + maxResults + "\" hitTotal=\""
            + hits.totalHits + "\">");
    ScoreDoc hit = null;
    Document doc = null;
    String hitsScore = null;
    for (int i = start; i <= end; i++) {
        try {
            hit = docs[i - 1];
            doc = searcher.doc(hit.doc);
            hitsScore = "" + hit.score;
        } catch (CorruptIndexException e) {
            errorExit(e.toString());
        } catch (IOException e) {
            errorExit(e.toString());
        }
        resultXml.append("<hit no=\"" + i + "\" score=\"" + hitsScore + "\">");
        for (ListIterator li = doc.getFields().listIterator(); li.hasNext();) {
            Fieldable f = (Fieldable) li.next();
            resultXml.append("<field name=\"" + f.name() + "\"");
            String snippets = null;
            if (snippetsMax > 0) {
                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("!!!SNIPPETBEGIN", "!!!SNIPPETEND");
                QueryScorer scorer = new QueryScorer(query, f.name());
                Highlighter highlighter = new Highlighter(formatter, scorer);
                Fragmenter fragmenter = new SimpleFragmenter(fieldMaxLength);
                highlighter.setTextFragmenter(fragmenter);
                TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(f.stringValue()));
                try {
                    snippets = highlighter.getBestFragments(tokenStream, f.stringValue(), snippetsMax, " ... ");
                } catch (Exception e) { // all Exceptions to be caught, not just IOException 
                    errorExit(e.toString());
                }
                snippets = checkTruncatedWords(snippets, " ... ");
                snippets = StreamUtility.enc(snippets);
                snippets = snippets.replaceAll("!!!SNIPPETBEGIN", snippetBegin);
                snippets = snippets.replaceAll("!!!SNIPPETEND", snippetEnd);
                if (snippets != null && !snippets.equals("")) {
                    resultXml.append(" snippet=\"yes\">" + snippets);
                }
            }
            if (snippets == null || snippets.equals(""))
                if (fieldMaxLength > 0 && f.stringValue().length() > fieldMaxLength) {
                    String snippet = f.stringValue().substring(0, fieldMaxLength);
                    int iamp = snippet.lastIndexOf("&");
                    if (iamp > -1 && iamp > fieldMaxLength - 8)
                        snippet = snippet.substring(0, iamp);
                    resultXml.append(">" + StreamUtility.enc(snippet) + " ... ");
                } else
                    resultXml.append(">" + StreamUtility.enc(f.stringValue()));
            resultXml.append("</field>");
        }
        resultXml.append("</hit>");
    }
    resultXml.append("</solrsearch>");
    if (logger.isDebugEnabled()) {
        int size = 500;
        if (resultXml.length() < size)
            size = resultXml.length();
        String debugString = resultXml.substring(0, size);
        if (resultXml.length() > size)
            debugString += "...";
        logger.debug("executeQuery resultXml=" + debugString);
    }
    rs = new ResultSet(resultXml);
    return rs;
}

From source file:edu.coeia.util.TextHighlighter.java

License:Open Source License

public static String getHighlightString(String text, String keyword) throws IOException {
    TermQuery query = new TermQuery(new Term("f", keyword));
    QueryScorer scorer = new QueryScorer(query);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    Fragmenter fragmenter = new SimpleFragmenter(50);
    highlighter.setTextFragmenter(fragmenter);
    TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_20).tokenStream("f", new StringReader(text));
    //String result = highlighter.getBestFragments(tokenStream, text, 30, "...");

    StringBuilder writer = new StringBuilder("");
    writer.append("<html>");
    writer.append("<style>\n" + ".highlight {\n" + " background: yellow;\n" + "}\n" + "</style>");
    writer.append("<body>");
    writer.append("");
    writer.append("</body></html>");

    return (writer.toString());
}

From source file:edu.virginia.cs.searcher.PostSearcher.java

/**
 * Performs the actual Lucene search./*  w w w .  j  a  va2 s  . c o  m*/
 *
 * @param luceneQuery
 * @param numResults
 * @return the SearchResult
 */
private SearchResult runSearch(Query luceneQuery, SearchQuery searchQuery) {
    try {
        TopDocs docs = indexSearcher.search(luceneQuery, searchQuery.numResults());
        ScoreDoc[] hits = docs.scoreDocs;
        String field = searchQuery.fields().get(0);

        SearchResult searchResult = new SearchResult(searchQuery, docs.totalHits);
        for (ScoreDoc hit : hits) {
            Document doc = indexSearcher.doc(hit.doc);
            Posts pdoc = new Posts();
            String highlighted = null;
            try {
                Highlighter highlighter = new Highlighter(formatter, new QueryScorer(luceneQuery));
                String contents = doc.getField(field).stringValue();
                pdoc.setId(Integer.parseInt(doc.getField("id").stringValue()));
                pdoc.setBody(doc.getField("body").stringValue());
                pdoc.setTitle(doc.getField("title").stringValue());
                pdoc.setCode(doc.getField("code").stringValue());
                pdoc.setTags(doc.getField("tags").stringValue());
                pdoc.setScore(Integer.parseInt(doc.getField("score").stringValue()));
                pdoc.setAcceptedAnswerId(Integer.parseInt(doc.getField("acceptedAnswerId").stringValue()));
                pdoc.setViewCount(Integer.parseInt(doc.getField("viewCount").stringValue()));
                pdoc.setAnswerCount(Integer.parseInt(doc.getField("answerCount").stringValue()));
                pdoc.setCommentCount(Integer.parseInt(doc.getField("commentCount").stringValue()));
                pdoc.setFavoriteCount(Integer.parseInt(doc.getField("favoriteCount").stringValue()));

                String[] snippets = highlighter.getBestFragments(analyzer, field, contents, numFragments);
                highlighted = createOneSnippet(snippets);
            } catch (InvalidTokenOffsetsException exception) {
                exception.printStackTrace();
                highlighted = "(no snippets yet)";
            }
            searchResult.addResult(pdoc);
            searchResult.setSnippet(pdoc, highlighted);
        }
        return searchResult;
    } catch (IOException exception) {
        exception.printStackTrace();
    }
    return new SearchResult(searchQuery);
}

From source file:es.ua.labidiomas.corpus.searcher.Searcher.java

/**
 * Prepares the highlighter to highlight the terms that matches with the
 * search criteria.// w w  w . j  a v a  2  s.c o m
 *
 * @param searchQuery the query that contains the search criteria.
 * @return the highlighter configured.
 */
private void setHighlighter(SearchConfiguration params) {
    Query query;
    if (params.getOptions().isTitle()) {
        query = _prepareQuery(params.getSearchNodes(), "title", params.getOptions().isOrder(),
                params.getOptions().isDistance());
    } else {
        query = _prepareQuery(params.getSearchNodes(), "text", params.getOptions().isOrder(),
                params.getOptions().isDistance());
    }
    QueryScorer scorer = new QueryScorer(query);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
    this.textHighlighter = new Highlighter(formatter, scorer);
    textHighlighter.setTextFragmenter(new NullFragmenter());
}

From source file:Example.lucene.SearchNHilight.java

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "golf user";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)
            .parse(querystr);/*w  ww.  jav a2  s . c o m*/

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        System.out.println(doc.get("url"));
        System.out.println(doc.get("title"));
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                analyzer);
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                k++;
                // Get 2 Line Preview
                if (k >= 2)
                    break;
            }
        }
        //Term vector
        System.out.println("-------------");
    }
}