List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:com.yahoo.bard.webservice.data.dimension.impl.TimeLimitingCollectorManager.java
License:Apache License
@Override public AccessibleTimeLimitingCollector newCollector() throws IOException { return new AccessibleTimeLimitingCollector(TopScoreDocCollector.create(perPage, lastEntry), Counter.newCounter(false), searchTimeoutMs); }
From source file:cz.muni.fi.japanesedictionary.engine.CharacterLoader.java
License:Open Source License
/** * Searchs for japanese characters in KanjiDict2. * /*from w ww.j ava2 s .c o m*/ * @param params string which contains characters * @return Map<String, JapaneseCharacter> if some characters were found returns map else null */ @SuppressWarnings("MalformedRegex") @Override protected Map<String, JapaneseCharacter> doInBackground(String... params) { String characterList = params[0]; if (characterList == null || characterList.length() < 1) { return null; } SharedPreferences settings = mContext.getSharedPreferences(ParserService.DICTIONARY_PREFERENCES, 0); String pathToDictionary = settings.getString(Const.PREF_KANJIDIC_PATH, null); if (pathToDictionary == null) { Log.e(LOG_TAG, "No path to kanjidict2 dictionary"); return null; } File file = new File(pathToDictionary); if (file == null || !file.exists() || !file.canRead()) { Log.e(LOG_TAG, "Can't read dictionary directory"); return null; } StringBuilder searchBuilder = new StringBuilder(); final int characterListSize = characterList.length(); // search string for (int i = 0; i < characterListSize; i++) { String character = String.valueOf(characterList.charAt(i)); if (Pattern.matches("\\p{Han}", character)) { if (i > 0) { //searchBuilder.length() > 0 searchBuilder.append(' '); // in lucene space serve as OR } searchBuilder.append('"').append(character).append('"'); } } String search = searchBuilder.toString(); if (search.length() == 0) { return null; } Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_36); try { QueryParser query = new QueryParser(Version.LUCENE_36, "literal", analyzer); query.setPhraseSlop(0); Query q = query.parse(search); if (mSearcher == null) { Directory dir = FSDirectory.open(file); IndexReader reader = IndexReader.open(dir); mSearcher = new IndexSearcher(reader); } TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); mSearcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; Map<String, JapaneseCharacter> result = new HashMap<>(); for (ScoreDoc document : hits) { int docId = document.doc; Document d = mSearcher.doc(docId); JapaneseCharacter japanCharacter = new JapaneseCharacter(); String literal = d.get("literal"); if (literal != null && literal.length() > 0) { japanCharacter.setLiteral(literal); } String radicalClassic = d.get("radicalClassic"); if (radicalClassic != null && radicalClassic.length() > 0) { try { int radicalClassicInt = Integer.parseInt(radicalClassic); if (radicalClassicInt > 0) { japanCharacter.setRadicalClassic(radicalClassicInt); } } catch (NumberFormatException ex) { Log.w(LOG_TAG, "Couldn't parse radical-classical: " + radicalClassic); } } String grade = d.get("grade"); if (grade != null && grade.length() > 0) { try { int gradeInt = Integer.parseInt(grade); if (gradeInt > 0) { japanCharacter.setGrade(gradeInt); } } catch (NumberFormatException ex) { Log.w(LOG_TAG, "Couldn't parse grade: " + grade); } } String strokeCount = d.get("strokeCount"); if (strokeCount != null && strokeCount.length() > 0) { try { int strokeCountInt = Integer.parseInt(strokeCount); if (strokeCountInt > 0) { japanCharacter.setStrokeCount(strokeCountInt); } } catch (NumberFormatException ex) { Log.w(LOG_TAG, "Couldn't parse strokeCount: " + strokeCount); } } String skip = d.get("queryCodeSkip"); if (skip != null && skip.length() > 0) { japanCharacter.setSkip(skip); } String dicRef = d.get("dicRef"); if (dicRef != null && dicRef.length() > 0) { japanCharacter.parseDicRef(dicRef); } String rmGroupJaOn = d.get("rmGroupJaOn"); if (rmGroupJaOn != null && rmGroupJaOn.length() > 0) { japanCharacter.parseRmGroupJaOn(rmGroupJaOn); } String rmGroupJaKun = d.get("rmGroupJaKun"); if (rmGroupJaKun != null && rmGroupJaKun.length() > 0) { japanCharacter.parseRmGroupJaKun(rmGroupJaKun); } String meaningEnglish = d.get("meaningEnglish"); if (meaningEnglish != null && meaningEnglish.length() > 0) { japanCharacter.parseMeaningEnglish(meaningEnglish); } String meaningFrench = d.get("meaningFrench"); if (meaningFrench != null && meaningFrench.length() > 0) { japanCharacter.parseMeaningFrench(meaningFrench); } String meaningDutch = d.get("meaningDutch"); if (meaningDutch != null && meaningDutch.length() > 0) { japanCharacter.parseMeaningDutch(meaningDutch); } String meaningGerman = d.get("meaningGerman"); if (meaningGerman != null && meaningGerman.length() > 0) { japanCharacter.parseMeaningGerman(meaningGerman); } String meaningRussian = d.get("meaningRussian"); if (meaningRussian != null && meaningRussian.length() > 0) { japanCharacter.parseMeaningRussian(meaningRussian); } String nanori = d.get("nanori"); if (nanori != null && nanori.length() > 0) { japanCharacter.parseNanori(nanori); } if (japanCharacter.getLiteral() != null && japanCharacter.getLiteral().length() > 0) { result.put(japanCharacter.getLiteral(), japanCharacter); } } return result.size() > 0 ? result : null; } catch (ParseException ex) { Log.e(LOG_TAG, "Searching for charaters ParseException caught: " + ex); } catch (IOException ex) { Log.e(LOG_TAG, "Searching for charaters IOException caught: " + ex); } catch (Exception ex) { Log.e(LOG_TAG, "Searching for charaters Exception caught: " + ex); } return null; }
From source file:de.anycook.db.lucene.FulltextIndex.java
License:Open Source License
public Set<String> search(String q) throws IOException { Set<String> recipes = new LinkedHashSet<>(); String fields[] = new String[] { "description", "steps" }; logger.debug(String.format("searching for %s", q)); try (IndexReader reader = DirectoryReader.open(index)) { int hitsPerPage = 1000; IndexSearcher searcher = new IndexSearcher(reader); Query query = new MultiFieldQueryParser(fields, analyzer).parse(q); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, null); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); recipes.add(d.get("title")); }/*from w w w .j a v a2s . c om*/ } catch (CorruptIndexException | ParseException e) { logger.error(e); } logger.debug(String.format("found %d results", recipes.size())); return recipes; }
From source file:de.csw.linkgenerator.plugin.lucene.LucenePlugin.java
License:Open Source License
/** * Creates and submits a query to the Lucene engine. * /*w w w . j av a 2s. co m*/ * @param query The base query, using the query engine supported by Lucene. * @param sort A Lucene sort object, can contain one or more sort criterias. If <tt>null</tt>, * sort by hit score. * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be * <tt>null</tt> to search all virtual wikis. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> * or empty to search all languages. * @param indexes List of Lucene indexes (searchers) to search. * @param context The context of the request. * @return The list of search results. * @throws IOException If the Lucene searchers encounter a problem reading the indexes. * @throws ParseException If the query is not valid. */ private SearchResults search(String query, Sort sort, String virtualWikiNames, String languages, IndexSearcher[] indexes, XWikiContext context) throws IOException, org.apache.lucene.queryparser.classic.ParseException { // MultiSearcher searcher = new MultiSearcher(indexes); IndexReader[] readers = new IndexReader[indexes.length]; for (int i = 0; i < readers.length; i++) { readers[i] = indexes[i].getIndexReader(); } IndexSearcher searcher = new IndexSearcher(new MultiReader(readers)); // Enhance the base query with wiki names and languages. Query q = buildQuery(query, virtualWikiNames, languages); TopDocsCollector<? extends ScoreDoc> topDocs; if (sort != null) { topDocs = TopFieldCollector.create(sort, MAX_RESULTS, true, true, false, false); } else { topDocs = TopScoreDocCollector.create(MAX_RESULTS, false); } // Perform the actual search searcher.search(q, topDocs); // Transform the raw Lucene search results into XWiki-aware results return new SearchResults(topDocs, searcher, new com.xpn.xwiki.api.XWiki(context.getWiki(), context), context); }
From source file:ead.editor.model.ModelIndex.java
License:Open Source License
/** * Get a (sorted) list of nodes that match a query *///from www . j a va 2s. c om public List<DependencyNode> searchAll(String queryText, Map<Integer, DependencyNode> nodesById) { ArrayList<DependencyNode> nodes = new ArrayList<DependencyNode>(); try { IndexReader reader = IndexReader.open(searchIndex); Query query = getQueryAllParser().parse(queryText); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { String nodeId = searcher.doc(hit.doc).get(editorIdFieldName); nodes.add(nodesById.get(Integer.parseInt(nodeId))); } searcher.close(); } catch (Exception e) { logger.error("Error parsing or looking up query '{}' in index", queryText, e); } return nodes; }
From source file:ead.editor.model.ModelIndex.java
License:Open Source License
/** * Get a (sorted) list of nodes that match a query *//*from ww w . j a v a2 s .c o m*/ public List<DependencyNode> search(String field, String queryText, Map<Integer, DependencyNode> nodesById) { ArrayList<DependencyNode> nodes = new ArrayList<DependencyNode>(); try { IndexReader reader = IndexReader.open(searchIndex); Query query = new QueryParser(Version.LUCENE_35, field, searchAnalyzer).parse(queryText); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { String nodeId = searcher.doc(hit.doc).get(editorIdFieldName); nodes.add(nodesById.get(Integer.parseInt(nodeId))); } searcher.close(); } catch (Exception e) { logger.error("Error parsing or looking up query '{}' in index", queryText, e); } return nodes; }
From source file:edu.cuhk.hccl.cmd.AppSearchEngine.java
License:Apache License
public static void main(String[] args) throws IOException { // Get parameters CommandLineParser parser = new BasicParser(); Options options = createOptions();/*from www .java 2s .c om*/ File dataFolder = null; String queryStr = null; int topK = 0; File resultFile = null; String queryType = null; File similarityFile = null; try { CommandLine line = parser.parse(options, args); dataFolder = new File(line.getOptionValue('d')); queryStr = line.getOptionValue('q'); queryType = line.getOptionValue('t'); topK = Integer.parseInt(line.getOptionValue('k')); resultFile = new File(line.getOptionValue('f')); similarityFile = new File(line.getOptionValue('s')); if (line.hasOption('m')) { String modelPath = line.getOptionValue('m'); if (queryType.equalsIgnoreCase("WordVector")) { expander = new WordVectorExpander(modelPath); } else if (queryType.equalsIgnoreCase("WordNet")) { expander = new WordNetExpander(modelPath); } else { System.out.println("Please choose a correct expander: WordNet or WordVector!"); System.exit(-1); } } } catch (ParseException exp) { System.out.println("Error in parameters: \n" + exp.getMessage()); System.exit(-1); } // Create Index StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = createIndex(dataFolder, analyzer); // Build query Query query = buildQuery(analyzer, queryStr, queryType); // Search index for topK hits IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topK, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Show search results System.out.println("\n[INFO] " + hits.length + " hits were returned:"); List<String> hitLines = new ArrayList<String>(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); String line = (i + 1) + "\t" + d.get(PATH_FIELD) + "\t" + hits[i].score; System.out.println(line); hitLines.add(line); } // Compute cosine similarity between documents List<String> simLines = new ArrayList<String>(); for (int m = 0; m < hits.length; m++) { int doc1 = hits[m].doc; Terms terms1 = reader.getTermVector(doc1, CONTENT_FIELD); for (int n = m + 1; n < hits.length; n++) { int doc2 = hits[n].doc; Terms terms2 = reader.getTermVector(doc2, CONTENT_FIELD); CosineDocumentSimilarity cosine = new CosineDocumentSimilarity(terms1, terms2); double similarity = cosine.getCosineSimilarity(); String line = searcher.doc(doc1).get(PATH_FIELD) + "\t" + searcher.doc(doc2).get(PATH_FIELD) + "\t" + similarity; simLines.add(line); } } // Release resources reader.close(); if (expander != null) { expander.close(); } // Save search results System.out.println("\n[INFO] Search results are saved in file: " + resultFile.getPath()); FileUtils.writeLines(resultFile, hitLines, false); System.out.println("\n[INFO] Cosine similarities are saved in file: " + similarityFile.getPath()); FileUtils.writeLines(similarityFile, simLines, false); }
From source file:edu.cuhk.hccl.SearchController.java
License:Apache License
/** * Run search engine against keyWords//from ww w. j av a 2s. c o m * @param queryStr * @param model * @return * @throws IOException */ private String runSearch(String queryStr, Model model) throws IOException { // Find synonymous words of queryStr List<String> synWords = expander.expandQuery(queryStr, EXPAND_K); List<String> wordList = new ArrayList<String>(); for (int i = 0; i < synWords.size(); i++) { wordList.add(synWords.get(i).replace('_', ' ')); } model.addAttribute("wordList", wordList); // Build query Query query = buildQuery(synWords); TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_N, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Get search results List<String> hitLines = new ArrayList<String>(); List<SearchResult> resultList = new ArrayList<SearchResult>(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); String line = (i + 1) + "\t" + d.get(Indexer.PATH_FIELD) + "\t" + hits[i].score; hitLines.add(line); SearchResult result = new SearchResult(); result.setId(i + 1); String url = d.get(Indexer.PATH_FIELD); String[] urlSplit = url.split("/"); String title = urlSplit[urlSplit.length - 2] + "-" + urlSplit[urlSplit.length - 1]; result.setTitle(title); result.setUrl(url); result.setScore(hits[i].score); resultList.add(result); } // Return search results for page "result.html" model.addAttribute("resultList", resultList); return "result"; }
From source file:edu.isi.pfindr.learn.search.LuceneDictionaryAugmenter.java
License:Apache License
public String expandWithDictionaryFromTopLuceneIndexTerms(String data) { //System.out.println("Original data"+ data); StringBuilder dictionaryDataBuilder = new StringBuilder(); data = data.replaceAll("\\s+", " "); dictionaryDataBuilder.append(data);/*www.j a v a 2s.c o m*/ try { //Construct the query //Query q = new QueryParser(Version.LUCENE_30, "id_content", analyzer).parse(data); //Query q = new QueryParser(Version.LUCENE_30, "content", analyzer).parse(data); //IndexReader indexReader = IndexReader.open(indexDir); IndexSearcher indexSearcher = new IndexSearcher(indexDir); QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", new StandardAnalyzer(Version.LUCENE_30, new File(ServletContextInfo.getContextPath() + stopWordsDirectory + "stopwords.txt"))); //queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.parse(data); //Get the top hits TopScoreDocCollector collector = TopScoreDocCollector.create(HIT_COUNT, true); //Search dictionary index indexSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; //System.out.println("Hits retrieved:"+ hits.length); //Parse through the top hits (number of hits specified by HIT_COUNT above) and //collect the frequency of the terms in a Map Map<String, Double> termFreqMap = new HashMap<String, Double>(); double value; for (int i = 0; i < hits.length; ++i) { TermPositionVector v = (TermPositionVector) indexSearcher.getIndexReader() .getTermFreqVector(hits[i].doc, "content"); //.getTermFreqVector(hits[i].doc, "id_content"); String[] terms = v.getTerms(); int[] freq = v.getTermFrequencies(); double[] tfidf = new double[v.getTerms().length]; double termTotal = 0.0; int docTotal = indexSearcher.getIndexReader().numDocs(); for (int t = 0; t < terms.length; t++) { termTotal += freq[t]; } for (int j = 0; j < terms.length; ++j) { tfidf[j] = (double) (freq[j] / termTotal) * (1 + Math.log(docTotal / (1 + //(indexSearcher.getIndexReader().docFreq(new Term("id_contents", terms[j])))))); (indexSearcher.getIndexReader().docFreq(new Term("content", terms[j])))))); if (!termFreqMap.containsKey(terms[j])) {//if the map does not already contain the phenotype termFreqMap.put(terms[j], tfidf[j]); } else { //else add to the existing value value = termFreqMap.get(terms[j]).doubleValue() > tfidf[j] ? termFreqMap.get(terms[j]).doubleValue() : tfidf[j]; //value = ((Double)termFreqMap.get(terms[j])).doubleValue() + tfidf[j]; termFreqMap.put(terms[j], value); } } } //Append the original query term with the top (specified by MAX_DICTIONARY_TERMS) most frequent terms if (hits.length > 0) { value = 0; //reusing variable as an index now //System.out.println("Sorted Map......"); Map<String, String> sortedMap = SortMap.sortByComparator(termFreqMap); //Include the top 10 matches from the dictionary definition for (Map.Entry entry : sortedMap.entrySet()) { dictionaryDataBuilder.append(" ") .append((((String) entry.getKey()).replaceAll("\\t", " ")).replaceAll("\\s+", " ")); if (value++ > MAX_DICTIONARY_TERMS) //get the top 10 terms break; //System.out.println("Key : " + entry.getKey() //+ " Value : " + entry.getValue()); } } // close searcher, no need to access the documents any more. indexSearcher.close(); } catch (CorruptIndexException ce) { ce.printStackTrace(); } catch (IOException io) { io.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } //System.out.println(" Expand word with dictionary .."+ dictionaryDataBuilder.toString()); return dictionaryDataBuilder.toString(); }
From source file:edu.isi.pfindr.learn.search.LuceneSearchEngine.java
License:Apache License
public static Map<String, Double> search(String queryString, String descriptionExpandedNotStemmed, String descriptionExpandedStemmed) { Map<String, Double> searchResultMap = new HashMap<String, Double>(); //Escape special characters in Lucene String originalDefinitionEscaped = LUCENE_PATTERN.matcher(queryString).replaceAll(REPLACEMENT_STRING_ESCAPE) .toLowerCase();//from www . j a va 2 s . com descriptionExpandedNotStemmed = LUCENE_PATTERN.matcher(descriptionExpandedNotStemmed) .replaceAll(REPLACEMENT_STRING_ESCAPE).toLowerCase(); descriptionExpandedStemmed = LUCENE_PATTERN.matcher(descriptionExpandedStemmed) .replaceAll(REPLACEMENT_STRING_ESCAPE).toLowerCase(); try { String originalDefinitionStemmedQuery = CleanDataUtil .preprocessStemAndTokenize(queryString.toLowerCase()).trim(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); Query query; //Get the top hits TopScoreDocCollector collector = TopScoreDocCollector.create(100000, true); indexSearcher = getIndexSearcher(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); //+(+contents:hello +contents:world) +priority:high //"jakarta apache"^4 "Apache Lucene" //////////// if (!originalDefinitionStemmedQuery.equals("")) { originalDefinitionStemmedQuery = LUCENE_PATTERN.matcher(originalDefinitionStemmedQuery) .replaceAll(REPLACEMENT_STRING_ESCAPE); String[] fields = new String[] { "content", "contentStemmed", "contentExpanded", "contentExpandedStemmed" }; String[] queries = new String[] { "\"" + originalDefinitionEscaped.trim().toLowerCase() + "\"^8 " + originalDefinitionEscaped.trim().toLowerCase(), originalDefinitionStemmedQuery + "^3", descriptionExpandedNotStemmed, descriptionExpandedStemmed }; query = MultiFieldQueryParser.parse(Version.LUCENE_30, queries, fields, analyzer); } else { QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer); query = queryParser.parse("\"" + originalDefinitionEscaped.trim().toLowerCase() + "\"^8 " + originalDefinitionEscaped.trim().toLowerCase()); } //////// indexSearcher.search(query, collector); ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs; int hitCount = collector.getTotalHits(); if (hitCount > 0) { //System.out.println("Hits for \"" + queryString + "\" were found by:"); // Iterate over the Documents in the Hits object ScoreDoc scoreDoc; for (int i = 0; i < hitCount; i++) { scoreDoc = scoreDocs[i]; //System.out.println("docId: " + scoreDoc.doc + "\t" + "docScore: " + scoreDoc.score); Document doc = indexSearcher.doc(scoreDoc.doc); //System.out.println(" " + (i + 1) + ". " + doc.get("id")); //System.out.println("Content: " + doc.get("orgContent")); if (!searchResultMap.containsKey((String) doc.get("orgContent"))) searchResultMap.put(((String) doc.get("orgContent")), new Double(scoreDoc.score)); } } analyzer = null; } catch (org.apache.lucene.queryParser.ParseException pe) { // TODO Auto-generated catch block pe.printStackTrace(); } catch (IOException ioe) { // TODO Auto-generated catch block ioe.printStackTrace(); } finally { /*try{ closeIndexSearcher(); }catch(IOException ioe){ ioe.printStackTrace(); }*/ } return searchResultMap; }