List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:org.opensolaris.opengrok.search.SearchEngine.java
License:Open Source License
/** * get results , if no search was started before, no results are returned * this method will requery if end end is more than first query from search, * hence performance hit applies, if you want results in later pages than * number of cachePages also end has to be bigger than start ! * * @param start start of the hit list/*from ww w. j av a 2 s . c o m*/ * @param end end of the hit list * @param ret list of results from start to end or null/empty if no search * was started */ public void results(int start, int end, List<Hit> ret) { //return if no start search() was done if (hits == null || (end < start)) { ret.clear(); return; } ret.clear(); //TODO check if below fits for if end=old hits.length, or it should include it if (end > hits.length & !allCollected) { //do the requery, we want more than 5 pages collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder); try { searcher.search(query, collector); } catch (Exception e) { // this exception should never be hit, since search() will hit this before OpenGrokLogger.getLogger().log(Level.WARNING, SEARCH_EXCEPTION_MSG, e); } hits = collector.topDocs().scoreDocs; Document d = null; for (int i = start; i < hits.length; i++) { int docId = hits[i].doc; try { d = searcher.doc(docId); } catch (Exception e) { OpenGrokLogger.getLogger().log(Level.SEVERE, SEARCH_EXCEPTION_MSG, e); } docs.add(d); } allCollected = true; } //TODO generation of ret(results) could be cashed and consumers of engine would just print them in whatever form they need, this way we could get rid of docs // the only problem is that count of docs is usually smaller than number of results for (int ii = start; ii < end; ++ii) { boolean alt = (ii % 2 == 0); boolean hasContext = false; try { Document doc = docs.get(ii); String filename = doc.get(QueryBuilder.PATH); Genre genre = Genre.get(doc.get(QueryBuilder.T)); Definitions tags = null; IndexableField tagsField = doc.getField(QueryBuilder.TAGS); if (tagsField != null) { tags = Definitions.deserialize(tagsField.binaryValue().bytes); } int nhits = docs.size(); if (sourceContext != null) { try { if (Genre.PLAIN == genre && (source != null)) { hasContext = sourceContext.getContext( new InputStreamReader(new FileInputStream(source + filename)), null, null, null, filename, tags, nhits > 100, false, ret); } else if (Genre.XREFABLE == genre && data != null && summarizer != null) { int l; try (Reader r = RuntimeEnvironment.getInstance().isCompressXref() ? new HTMLStripCharFilter( new BufferedReader(new InputStreamReader(new GZIPInputStream( new FileInputStream(data + Prefix.XREF_P + filename + ".gz"))))) : new HTMLStripCharFilter( new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) { l = r.read(content); } //TODO FIX below fragmenter according to either summarizer or context (to get line numbers, might be hard, since xref writers will need to be fixed too, they generate just one line of html code now :( ) Summary sum = summarizer.getSummary(new String(content, 0, l)); Fragment fragments[] = sum.getFragments(); for (int jj = 0; jj < fragments.length; ++jj) { String match = fragments[jj].toString(); if (match.length() > 0) { if (!fragments[jj].isEllipsis()) { Hit hit = new Hit(filename, fragments[jj].toString(), "", true, alt); ret.add(hit); } hasContext = true; } } } else { OpenGrokLogger.getLogger().log(Level.WARNING, "Unknown genre: {0} for {1}", new Object[] { genre, filename }); hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret); } } catch (FileNotFoundException exp) { OpenGrokLogger.getLogger().log(Level.WARNING, "Couldn''t read summary from {0} ({1})", new Object[] { filename, exp.getMessage() }); hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret); } } if (historyContext != null) { hasContext |= historyContext.getContext(source + filename, filename, ret); } if (!hasContext) { ret.add(new Hit(filename, "...", "", false, alt)); } } catch (IOException | ClassNotFoundException | HistoryException e) { OpenGrokLogger.getLogger().log(Level.WARNING, SEARCH_EXCEPTION_MSG, e); } } }
From source file:org.owasp.dependencycheck.data.lucene.FieldAnalyzerTest.java
License:Apache License
@Test public void testAnalyzers() throws Exception { Analyzer analyzer = new FieldAnalyzer(LuceneUtils.CURRENT_VERSION); Directory index = new RAMDirectory(); String field1 = "product"; String text1 = "springframework"; String field2 = "vendor"; String text2 = "springsource"; try (IndexWriter w = createIndex(analyzer, index)) { addDoc(w, field1, text1, field2, text2); text1 = "x-stream"; text2 = "xstream"; addDoc(w, field1, text1, field2, text2); }//from w w w . ja va 2 s . c o m //Analyzer searchingAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); String querystr = "product:\"(Spring Framework Core)\" vendor:(SpringSource)"; SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); HashMap<String, Analyzer> map = new HashMap<>(); map.put(field1, searchAnalyzerProduct); map.put(field2, searchAnalyzerVendor); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(LuceneUtils.CURRENT_VERSION), map); QueryParser parser = new QueryParser(LuceneUtils.CURRENT_VERSION, field1, wrapper); Query q = parser.parse(querystr); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; assertEquals("Did not find 1 document?", 1, hits.length); assertEquals("springframework", searcher.doc(hits[0].doc).get(field1)); assertEquals("springsource", searcher.doc(hits[0].doc).get(field2)); searchAnalyzerProduct.clear(); //ensure we don't have anything left over from the previous search. searchAnalyzerVendor.clear(); querystr = "product:(Apache Struts) vendor:(Apache)"; Query q2 = parser.parse(querystr); assertFalse("second parsing contains previousWord from the TokenPairConcatenatingFilter", q2.toString().contains("core")); querystr = "product:( x-stream^5 ) AND vendor:( thoughtworks.xstream )"; Query q3 = parser.parse(querystr); collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q3, collector); hits = collector.topDocs().scoreDocs; assertEquals("x-stream", searcher.doc(hits[0].doc).get(field1)); assertEquals("xstream", searcher.doc(hits[0].doc).get(field2)); }
From source file:org.quelea.services.lucene.BibleSearchIndex.java
License:Open Source License
/** * Search for bible chapters that match the given filter. * * @param queryString the query string to filter. * @param type ignored - may be null./*from w ww. ja va2s .c om*/ * @return a list of all bible chapters that match the given filter. */ @Override public BibleChapter[] filter(String queryString, FilterType type) { String sanctifyQueryString = SearchIndexUtils.makeLuceneQuery(queryString); if (chapters.isEmpty() || sanctifyQueryString.isEmpty()) { return chapters.values().toArray(new BibleChapter[chapters.size()]); } List<BibleChapter> ret; try (DirectoryReader dr = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(dr); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); Query q = new ComplexPhraseQueryParser("text", analyzer).parse(sanctifyQueryString); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, 10000); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; ret = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); BibleChapter chapter = chapters.get(Integer.parseInt(d.get("number"))); ret.add(chapter); } return ret.toArray(new BibleChapter[ret.size()]); } catch (ParseException | IOException ex) { LOGGER.log(Level.WARNING, "Invalid query string: " + sanctifyQueryString, ex); return new BibleChapter[0]; } }
From source file:org.quelea.services.lucene.SongSearchIndex.java
License:Open Source License
/** * Search for songs that match the given filter. * * @param queryString the query to use to search. * @param type TITLE or BODY, depending on what to search in. BODY is * equivalent to the lyrics, TITLE the title. * @return an array of songs that match the filter. *///from w w w. ja v a 2s. c om @Override public synchronized SongDisplayable[] filter(String queryString, FilterType type) { String sanctifyQueryString = SearchIndexUtils.makeLuceneQuery(queryString); if (songs.isEmpty() || sanctifyQueryString.trim().isEmpty()) { return songs.values().toArray(new SongDisplayable[songs.size()]); } String typeStr; if (type == FilterType.BODY) { typeStr = "lyrics"; } else if (type == FilterType.TITLE) { typeStr = "title"; } else if (type == FilterType.AUTHOR) { typeStr = "author"; } else { LOGGER.log(Level.SEVERE, "Unknown type: {0}", type); return new SongDisplayable[0]; } List<SongDisplayable> ret; try (DirectoryReader dr = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(dr); Query q = new ComplexPhraseQueryParser(typeStr, analyzer).parse(sanctifyQueryString); TopScoreDocCollector collector = TopScoreDocCollector.create(1000, 10000); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; ret = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); final Long songNumber = Long.parseLong(d.get("number")); SongDisplayable song = songs.get(songNumber); ret.add(song); } if (type == FilterType.BODY) { for (SongDisplayable song : filter(queryString, FilterType.TITLE)) { ret.remove(song); } } return ret.toArray(new SongDisplayable[ret.size()]); } catch (ClosedByInterruptException | ThreadInterruptedException ex) { //Ignore, thread is being shut down by other character being typed return new SongDisplayable[0]; } catch (ParseException | IOException ex) { LOGGER.log(Level.WARNING, "Invalid query string: " + sanctifyQueryString, ex); return new SongDisplayable[0]; } }
From source file:org.sc.probro.lucene.BiothesaurusSearcher.java
License:Apache License
public Collection<Document> search(Query q) throws IOException { //System.out.println(String.format("Search: \"%s\"", query.toString())); //BioThesaurusCollector clt = new BioThesaurusCollector(); TopDocsCollector clt = TopScoreDocCollector.create(25, true); search.search(q, clt);// w ww. ja v a 2 s .c o m LinkedList<Document> docs = new LinkedList<Document>(); for (ScoreDoc sdoc : clt.topDocs().scoreDocs) { docs.add(reader.document(sdoc.doc)); } //return clt.getDocuments(); return docs; }
From source file:org.schors.evlampia.search.Aggregator.java
License:Open Source License
public SearchResult<T> Result(String story, int count, int start) throws Exception { int nDocs = start + count; //Query query = parser.parse(QueryParser.escape(story)); Query query = parser.parse(story); TopScoreDocCollector collector = TopScoreDocCollector.create(Math.max(nDocs, 1), docsScoreInOrder); indexSearcher.search(query, collector); TopDocs topDocs = collector.topDocs(); if (nDocs <= 0) return new SearchResult<T>(topDocs.totalHits, null); ScoreDoc[] scoreDocs = topDocs.scoreDocs; int length = scoreDocs.length - start; if (length <= 0) return new SearchResult<T>(topDocs.totalHits, null); ArrayList<T> items = new ArrayList<T>(length); A aggregator = classA.newInstance(); aggregator.query = query;/*ww w. j a v a 2s .co m*/ aggregator.indexSearcher = indexSearcher; for (int i = start; i < scoreDocs.length; i++) { items.add(i - start, aggregator.aggregate(scoreDocs[i])); } return new SearchResult<T>(topDocs.totalHits, items); }
From source file:org.simple.nlp.dictionary.index.SearchEngine.java
License:Open Source License
public TopDocsCollector<ScoreDoc> query(String query, int limit) throws ParseException, IOException { TopScoreDocCollector collector = TopScoreDocCollector.create(limit, false); QueryParser parser = new QueryParser(Version.LUCENE_46, "name", new CustomAnalyzer()); Query lquery = parser.parse(query); searcher.search(lquery, collector);//from w w w .ja v a 2s. c o m return collector; }
From source file:org.splevo.vpm.analyzer.semantic.lucene.finder.SharedTermFinder.java
License:Open Source License
/** * Executes a query.// ww w. jav a2s . com * * @param indexSearcher * The {@link IndexSearcher} to be used. * @param maxDoc * The max. number of results. * @param query * The {@link Query} to be executed. * @return The result of the search. * @throws IOException * If there were errors while executing the query. */ private ScoreDoc[] executeQuery(IndexSearcher indexSearcher, int maxDoc, Query query) throws IOException { TopScoreDocCollector collector = TopScoreDocCollector.create(maxDoc, true); indexSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; return hits; }
From source file:org.talend.dataquality.standardization.index.test.MyTest.java
License:Open Source License
public void testRun() { Directory index;//from w ww . j av a2 s . com try { index = new RAMDirectory(); // The same analyzer should be used for indexing and searching Analyzer analyzer = SynonymTest.createAnalyzer(); // Analyzer analyzer = new StandardAnalyzer(); // the boolean arg in the IndexWriter ctor means to // create a new index, overwriting any existing index IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); // read the data (this will be the input data of a component called // tFirstNameStandardize) for (String data : input) { Document doc = createDoc(data); w.addDocument(doc); } w.close(); // now search IndexSearcher is = new IndexSearcher(index); Set<String> set = new HashSet<String>(Arrays.asList(input)); for (String data : set) { // Term termName = new Term("steph"); QueryParser qp = new QueryParser(Version.LUCENE_30, "FIELD_NAME", analyzer); Query q = qp.parse(data); TopDocsCollector<?> collector = TopScoreDocCollector.create(2, false); is.search(q, collector); ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs; System.out.println("\nnb doc= " + scoreDocs.length); System.out.print("input: " + data); for (ScoreDoc scoreDoc : scoreDocs) { String matchedData = is.doc(scoreDoc.doc).get("FIELD_NAME"); System.out.println(" matches " + matchedData); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.talend.dataquality.standardization.index.test.SynonymTest.java
License:Open Source License
public void testSearch() { // TODO search for steph Directory dir = null;/*from ww w .j a va2 s . c o m*/ IndexSearcher is = null; try { dir = FSDirectory.open(new File(directoryPath)); is = new IndexSearcher(dir); Analyzer analyzer = createAnalyzer(); // Term termName = new Term("steph"); QueryParser qp = new QueryParser(luceneVersion, FIELD_NAME, analyzer); Query q = qp.parse("Stephane"); TopDocsCollector<?> collector = TopScoreDocCollector.create(2, false); is.search(q, collector); ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs; System.out.println("nb doc= " + scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { System.out.println(scoreDoc); } } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }