List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:gogoph.search.Server.java
License:Open Source License
/** * @param args/*from w w w. j av a2s. com*/ * @throws IOException * @throws ParseException */ public static void main(String[] args) throws IOException, ParseException { Directory index; index = new SimpleFSDirectory(new File(args[0])); String searchTerms = args[1]; StandardAnalyzer analyzer; // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching analyzer = new StandardAnalyzer(Version.LUCENE_35); QueryParser parser = new QueryParser(Version.LUCENE_35, "content", analyzer); Query query = parser.parse(searchTerms); // 3. search int hitsPerPage = 40; IndexReader reader = IndexReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results SearchResult[] tab = new SearchResult[hits.length]; //System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); //System.out.println((i + 1) + ". " + d.get("title")); GopherDirectoryEntity gop = new GopherDirectoryEntity(); gop.setType(d.get("type")); gop.setUsername(d.get("title")); gop.setHost(d.get("host")); gop.setPort(Integer.parseInt(d.get("port"))); gop.setSelector(d.get("selector")); tab[i] = new SearchResult(gop.getUsername(), gop, hits[i].score); } // searcher can only be closed when there // is no need to access the documents any more. searcher.close(); reader.close(); ArrayList<GopherDirectoryEntity> tib; tib = new ArrayList<GopherDirectoryEntity>(); for (int i = 0; i < tab.length; i++) { SearchResult item = tab[i]; GopherDirectoryEntity node = item.getEntity(); node.setUsername("(Score: " + item.getScore() + ") " + item.getTitle()); GopherDirectoryEntity nodeComment = newComment("gopher://" + node.getHost() + ":" + node.getPort() + "/" + node.getType() + node.getSelector()); //GopherDirectoryEntity nodeComment2 = // GopherDirectoryEntity.newComment(node.getUserName()); tib.add(node); tib.add(nodeComment); //tab.add(nodeComment2); } index.close(); // Load index for (GopherDirectoryEntity item : tib) { System.out.print(item.getType() + item.getUsername() + "\t" + item.getSelector() + "\t" + item.getHost() + "\t" + item.getPort() + "\r\n"); } }
From source file:hellolucene.HelloLucene.java
public static void main(String[] args) throws IOException, ParseException { // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching StandardAnalyzer analyzer = new StandardAnalyzer(Version.LATEST); // 1. create the index Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter w = new IndexWriter(index, config); addDoc(w, "Lucene in Action", "193398817"); addDoc(w, "Lucene for Dummies", "55320055Z"); addDoc(w, "Managing Gigabytes", "55063554A"); addDoc(w, "The Art of Computer Science", "9900333X"); w.close();//from w w w.j a va 2s . c o m // 2. query String querystr = args.length > 0 ? args[0] : "lucene"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query q = new QueryParser(Version.LATEST, "title", analyzer).parse(querystr); // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title")); } // reader can only be closed when there // is no need to access the documents any more. reader.close(); }
From source file:ie.cmrc.smtx.skos.index.lucene.LuceneSKOSIndex.java
License:Apache License
/** * Processes a Lucene query//from ww w . j a v a 2s . c o m * @param query Lucene query to process * @param filter Additional filter for the query * @param offset Number of results to skip before returning any results. Results * that are skipped due to {@code offset} do not count against {@code limit}. * @param limit Maximum number of results the query will return * @return List of semantic entities matching the provided query */ private List<Scored<SemanticEntity>> processQuery(Query query, Filter filter, int offset, int limit) { if (this.initialised) { int qOffset = offset; int qLimit = limit; if (qOffset < 0) qOffset = 0; if (qLimit <= 0) qLimit = 200; List<Scored<SemanticEntity>> scoredConcepts = new ArrayList<>(qLimit); TopScoreDocCollector collector = TopScoreDocCollector.create(qOffset + qLimit, true); try { if (filter != null) { this.indexSearcher.search(query, filter, collector); } else { this.indexSearcher.search(query, collector); } ScoreDoc[] hits = collector.topDocs(qOffset).scoreDocs; if (hits != null) { for (ScoreDoc hit : hits) { int docId = hit.doc; Document doc = this.indexSearcher.doc(docId); double score = hit.score; scoredConcepts.add(new Scored<SemanticEntity>(new DocSemanticEntityWrapper(doc), score)); } } } catch (IOException e) { throw new RuntimeException(e); } return scoredConcepts; } else { return new ArrayList<>(0); } }
From source file:in.cdac.medinfo.csnotk.csnolib.agents.LuceneSearchAgent.java
License:Apache License
/** * @param query/*from w ww. j ava 2s . c om*/ * @param hitsPerPage * @param inOrder * @return scoreDocs * @throws IOException */ private ScoreDoc[] search(Query query, int hitsPerPage, boolean inOrder) throws IOException { Properties properties = null; int maxRecords = 10; try { properties = PropertyReader.loadSystemProperties(); maxRecords = Integer.parseInt(properties.getProperty("max.records")); } catch (IOException e) { //Get the Stack trace and form the exception message. StackTraceElement arrStackTraceElement[]; arrStackTraceElement = e.getStackTrace(); String strMessage = e.getClass() + Constants.NEW_LINE + Constants.CLASS_NAME + arrStackTraceElement[0].getClassName() + Constants.NEW_LINE + Constants.METHOD_NAME + arrStackTraceElement[0].getMethodName() + Constants.NEW_LINE + Constants.LINE_NUMBER + arrStackTraceElement[0].getLineNumber() + Constants.NEW_LINE + Constants.MESSAGE_DESCRIPTION + e.getMessage(); //Log the Exception CSNOLogger.logException(strMessage); } TopScoreDocCollector topScoreDocCollector = null; if (hitsPerPage == 0) { hitsPerPage = hitsPerPage + maxRecords; topScoreDocCollector = TopScoreDocCollector.create(hitsPerPage, inOrder); topScoreDocCollector = TopScoreDocCollector.create(Constants.DESCRIPTION_All_RECORDS, inOrder); } else { topScoreDocCollector = TopScoreDocCollector.create(hitsPerPage, inOrder); } this.getIndexSearcher(this.getIndexReader()).search(query, topScoreDocCollector); TopDocs topDocs = topScoreDocCollector.topDocs(); //System.out.println("TOP DOC Size: " + topDocs.scoreDocs.length); return topDocs.scoreDocs; }
From source file:indexer.IndexHtmlToText.java
static String getHTMLFromDocId(String indexDirPath, String docId) throws Exception { IndexReader reader;/*from w w w .java 2 s. com*/ IndexSearcher searcher; File indexDir = new File(indexDirPath); reader = DirectoryReader.open(FSDirectory.open(indexDir)); searcher = new IndexSearcher(reader); TopScoreDocCollector collector; TopDocs topDocs; Query query = new TermQuery(new Term(TrecDocIndexer.FIELD_ID, docId)); collector = TopScoreDocCollector.create(1, true); searcher.search(query, collector); topDocs = collector.topDocs(); ScoreDoc sd = topDocs.scoreDocs[0]; Document doc = reader.document(sd.doc); String htmlDecompressed = decompress(doc.getBinaryValue(WTDOC_FIELD_HTML).bytes); System.out.println(htmlDecompressed); reader.close(); return htmlDecompressed; }
From source file:invertedindex.LineNumberSearcher.java
public ArrayList<String> search(String keyword, String filePath) throws IOException { String indexLocation = getLineIndexLocation(); // System.out.println("Inside LINE search method"); try {//from w ww. j a va 2 s. c o m IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String queryContent = keyword; queryContent = "\"" + queryContent + "\""; // queryContent = "*" + queryContent + "*"; String queryFilePath = filePath; // System.out.println("FIELPATH "+queryFilePath); queryFilePath = "\"" + queryFilePath + "\""; // queryFilePath = "*" + queryFilePath + "*"; QueryParser queryParserContent = new QueryParser(Version.LUCENE_47, "contents", analyzer); QueryParser queryParserFilePath = new QueryParser(Version.LUCENE_47, "path", analyzer); queryParserContent.setAllowLeadingWildcard(true); //queryParserFileName.setAllowLeadingWildcard(true); // Query q = queryParser.parse(query); Query qContent = queryParserContent.parse(queryContent); Query qFileName = queryParserFilePath.parse(queryFilePath); // System.out.println("FIELPATH "+qFileName); BooleanQuery q = new BooleanQuery(); q.add(qContent, Occur.MUST); // MUST implies that the keyword must occur. q.add(qFileName, Occur.MUST); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // System.out.println("Found " + hits.length + " hits."); lineNumbersList = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); // System.out.println((i + 1) + ". " + d.get("filename") // + " score=" + hits[i].score); // System.out.println("Line Number is "+d.get("lineNumber")); // System.out.println("Content is "+d.get("contents")); // String filePath = d.get("path"); lineNumbersList.add(d.get("lineNumber")); } reader.close(); return lineNumbersList; } catch (Exception e) { System.out.println("Error searching in line number search " + indexLocation + " : " + e.getMessage()); } return lineNumbersList; }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> search(String keyword) throws IOException { String indexLocation = this.getIndexLocation(); // System.out.println("Inside search method"); // indexLocation = ""; // BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); // while (true) { try {/*from www .j ava 2 s. c o m*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query = keyword; query = "\"" + query + "\""; Query q = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query); SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); // System.out.println(q); // searcher.search(q, collector); // searcher.search(q, null,topDocs); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(q); // System.out.println("fq "+fq); String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); //String[] lineFragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 18,10); Document d = searcher.doc(docId); String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { // System.out.println("FRAGMENT iS "+fragments[j]); // int k=0; // for(k=0;k<lineFragments.length;k++){ // fragments[j].getSc String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumberArrayList = new ArrayList<>(); lineNumber = "null"; boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if(lineNumbersList.get(0).isEmpty()){ // lineNumber = "Not Found"; // }else { if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } // } } //here is the tried code for enter space /* else{ System.out.println("YES G"); String lines[] = fragments[j].split("\\r?\\n"); // ArrayList<String> newLines = new ArrayList<>(); ArrayList<String> newLines = new ArrayList<>(Arrays.asList(lines)); System.out.println("Here 3"); int special = 0; for(String line : newLines){ if(Pattern.compile("^$").matcher(line).find()){ newLines.remove(line); special++; } } System.out.println("Here 4"); // List<String> list = Arrays.asList(lines); // if(list.contains(temp)){ // // } // for(String line: newLines){ // System.out.println("LINE IS "+line); // } if(newLines.size()==1){ // System.out.println("Yes G but NOT G"); lineNumbersList = lns.search(temp,filePath); if(!lineNumberArrayList.isEmpty()){ lineNumber = lineNumbersList.get(0); } System.out.println("Here 1"); }else{ System.out.println("Here 2"); ArrayList<String> a0 = lns.search(Jsoup.parse(newLines.get(0)).text(),filePath); ArrayList<String> a1 = lns.search(Jsoup.parse(newLines.get(1)).text(),filePath); int k,l; outerloop: for(k=0;k<a0.size();k++){ for(l=0;l<a1.size();l++){ int secondline = Integer.parseInt(a1.get(l)); // System.out.println("second line is"+ secondline); int firstline = Integer.parseInt(a0.get(k)); // System.out.println("first line is"+firstline); int diff = secondline - firstline; // System.out.println("DIFFERENCE IS "+diff); // System.out.println("Special IS "+special); if(diff == special+1){ insideLoopFlag = true; // System.out.println("K IS "+k); // System.out.println("IN BREAK "); break outerloop; } } // System.out.println("K IS "+k); } // System.out.println("OUT OF FOR LOOP"); // System.out.println("K IS "+k); if(insideLoopFlag==true){ lineNumber = String.valueOf(a0.get(k)); } // System.out.println("LINE NUMBER IS "+lineNumber); } } */ // } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + "...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } //Setting Results SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); // sr.setLineNumber(lineNumber); searchResulsAL.add(sr); // } // writer.close(); reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); // break; } // } return searchResulsAL; }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> multipleSearch(String keyword1, String keyword2, String radio) throws IOException { String indexLocation = this.getIndexLocation(); try {//from w w w . j a va 2 s.com IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query1 = keyword1; String query2 = keyword2; query1 = "\"" + query1 + "\""; query2 = "\"" + query2 + "\""; Query q1 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query1); Query q2 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query2); BooleanQuery apiQuery = new BooleanQuery(); if (radio.equalsIgnoreCase("and")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST); } else if (radio.equalsIgnoreCase("or")) { apiQuery.add(q1, BooleanClause.Occur.SHOULD); apiQuery.add(q2, BooleanClause.Occur.SHOULD); } else if (radio.equalsIgnoreCase("not")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST_NOT); } SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); searcher.search(apiQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(apiQuery); // String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); Document d = searcher.doc(docId); // String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumber = "null"; lineNumberArrayList = new ArrayList<>(); boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G g"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + " ...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); searchResulsAL.add(sr); } reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); } return searchResulsAL; }
From source file:io.datalayer.lucene.search.LuceneQueryTest.java
License:Apache License
private static void query(String indexDir, Query q) throws IOException, ParseException { int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocsCollector collector = TopScoreDocCollector.create(hitsPerPage, false); indexSearcher.search(q, collector);/*w w w . ja v a2s . c o m*/ ScoreDoc[] hits = collector.topDocs().scoreDocs; LOGGER.info("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = indexSearcher.doc(docId); // LOGGER.info((i + 1) + ". " + d.get("title")); } // searcher can only be closed when there // is no need to access the documents any more. // indexSearcher.close(); }
From source file:io.datalayer.lucene.search.LuceneSearchTest.java
License:Apache License
private void queryIndex(Query query, String fieldname) throws CorruptIndexException, IOException { LOGGER.info("-------------------------------------"); long start = java.util.Calendar.getInstance().getTimeInMillis(); int hitsPerPage = 100; IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocsCollector collector = TopScoreDocCollector.create(hitsPerPage, false); indexSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; long end = java.util.Calendar.getInstance().getTimeInMillis(); // float duration = (end - start) / 1000; LOGGER.info("Found " + hits.length + " hits in " + (end - start) + " milliseconds"); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document document = indexSearcher.doc(docId); LOGGER.info((i + 1) + ". " + document.get(fieldname)); }/*from w w w . j av a2s . com*/ }