List of usage examples for org.apache.lucene.queryparser.classic MultiFieldQueryParser MultiFieldQueryParser
public MultiFieldQueryParser(String[] fields, Analyzer analyzer)
From source file:lab_mri.SearchEngine.java
public List<SearchResult> search(String query, String[] fields, int n) throws IOException, ParseException, Exception { String eq = QueryParser.escape(query); QueryParser parser = new MultiFieldQueryParser(fields, new CustomAnalyzer()); Query q = parser.parse(eq);/*www .j a v a 2 s .c om*/ IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(this.dir))); List<SearchResult> results = new ArrayList<>(); TopDocs top = searcher.search(q, n); for (ScoreDoc doc : top.scoreDocs) { results.add(new SearchResult(searcher.doc(doc.doc).get("id"), doc.score)); } return results; }
From source file:lucene.lab.ReaderClass.java
public List<Album> search(String consulta) throws ParseException, IOException { Path indexPath = Paths.get("C:\\index\\"); System.out.println("Entr! fuck"); //File indexPath = new File("C:\\index\\"); Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); PerFieldAnalyzerWrapper analyzerWrapper = this.createAnalyzer(); Analyzer analyzer = new WhitespaceAnalyzer(); String[] fields = { "title", "titleMin", "artist", "artistMin", "text", "summary", "tags", "tagsMin" }; //QueryParser parser = new QueryParser(field, analyzerWrapper); MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzerWrapper); Query query = parser.parse(consulta); int hitsPerPage = 100; //IndexReader reader = IndexReader.open(index); //IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); searcher.search(query, collector);//from www .ja v a 2 s. co m ScoreDoc[] hits = collector.topDocs().scoreDocs; int max_cant_comentarios = 0; List<Integer> lista_cant_comentarios = new ArrayList<>(); List<Album> lista_album = new ArrayList<>(); System.out.println("Encontr " + hits.length + " hits."); if (hits.length > 0) { float maxLuceneScore = hits[0].score; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); Album album = new Album(); album.d = d; album.luceneScore = hits[i].score; lista_cant_comentarios.add(album.cant_comentarios()); lista_album.add(album); } Collections.sort(lista_cant_comentarios); max_cant_comentarios = lista_cant_comentarios.get(lista_cant_comentarios.size() - 1); for (Album album : lista_album) { album.formulaRanking(maxLuceneScore, max_cant_comentarios); album.title = album.d.get("title"); album.artist = album.d.get("artist"); //System.out.println(album.score); } Collections.sort(lista_album); int i = 0; for (Album album : lista_album) { System.out.println((i + 1) + ". Ttulo: " + album.d.get("title") + "\t" + "Artista: " + album.d.get("artist") + "\t" + "Score" + album.score); i++; } } return lista_album; }
From source file:luceneGazateer.EntryData.java
License:Apache License
public ArrayList<EntryData> searchDocuments(String indexerPath, String inputRecord, DocType recordType) throws IOException { File indexfile = new File(indexerPath); indexDir = FSDirectory.open(indexfile.toPath()); //inputRecord.replace(","," "); if (!DirectoryReader.indexExists(indexDir)) { LOG.log(Level.SEVERE, "No Lucene Index Dierctory Found, Invoke indexBuild() First !"); System.out.println("No Lucene Index Dierctory Found, Invoke indexBuild() First !"); System.exit(1);/*from w w w .j a va 2s . c om*/ } IndexReader reader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(reader); Query q = null; HashMap<String, ArrayList<ArrayList<String>>> allCandidates = new HashMap<String, ArrayList<ArrayList<String>>>(); if (!allCandidates.containsKey(inputRecord)) { try { ArrayList<ArrayList<String>> topHits = new ArrayList<ArrayList<String>>(); //System.out.println("query is : "+inputRecord); q = new MultiFieldQueryParser(new String[] { "DATA" }, analyzer).parse(inputRecord); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; ++i) { ArrayList<String> tmp1 = new ArrayList<String>(); int docId = hits[i].doc; Document d; try { d = searcher.doc(docId); tmp1.add(d.get("ID")); tmp1.add(d.get("DATA")); tmp1.add(((Float) hits[i].score).toString()); } catch (IOException e) { e.printStackTrace(); } topHits.add(tmp1); } allCandidates.put(inputRecord, topHits); } catch (org.apache.lucene.queryparser.classic.ParseException e) { e.printStackTrace(); } } ArrayList<EntryData> resolvedEntities = new ArrayList<EntryData>(); pickBestCandidates(resolvedEntities, allCandidates); reader.close(); return resolvedEntities; }
From source file:net.simpleframework.ado.lucene.AbstractLuceneManager.java
License:Apache License
protected Query getQuery(final String domain, final String[] queryFields, final String queryString) { Query query = null;/*from w ww. ja va 2 s . c o m*/ QueryParser qp; if (StringUtils.hasText(queryString) && indexExists() && (qp = new MultiFieldQueryParser(queryFields, getDefaultAnalyzer())) != null) { try { // qp.setSplitOnWhitespace(true); qp.setAutoGeneratePhraseQueries(true); query = qp.parse(queryString.trim()); } catch (final ParseException e) { getLog().warn(e); } } return query; }
From source file:net.tourbook.search.FTSearchManager.java
License:Open Source License
/** * @param searchText// w w w . ja v a 2s . c o m * @param searchFrom * @param searchTo * @param searchResult * @return */ private static void search(final String searchText, final int searchFrom, final int searchTo, final SearchResult searchResult) { try { setupIndexReader(); final int maxDoc = _indexReader.maxDoc(); if (maxDoc == 0) { // there are 0 documents searchResult.totalHits = 0; return; } final String[] queryFields = { // SEARCH_FIELD_TITLE, SEARCH_FIELD_DESCRIPTION, // }; final int maxPassages[] = new int[queryFields.length]; Arrays.fill(maxPassages, 1); final Analyzer analyzer = getAnalyzer(); final MultiFieldQueryParser queryParser = new MultiFieldQueryParser(queryFields, analyzer); queryParser.setAllowLeadingWildcard(true); final Query query = queryParser.parse(searchText); if (_topDocsSearchText == null || _topDocsSearchText.equals(searchText) == false || true) { // this is a new search /* * Set sorting */ final SortField sortByTime = new SortField(SEARCH_FIELD_TIME, Type.LONG, _isSortDateAscending == false); final Sort sort = new Sort(sortByTime); if (_isShowContentAll) { // no filtering _topDocs = _indexSearcher.search(query, maxDoc, sort); } else { // filter by content final BooleanFilter searchFilter = new BooleanFilter(); if (_isShowContentMarker) { final NumericRangeFilter<Integer> filter = NumericRangeFilter.newIntRange( SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_TOUR_MARKER, DOC_SOURCE_TOUR_MARKER, true, true); searchFilter.add(new FilterClause(filter, Occur.SHOULD)); } if (_isShowContentTour) { final NumericRangeFilter<Integer> filter = NumericRangeFilter .newIntRange(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_TOUR, DOC_SOURCE_TOUR, true, true); searchFilter.add(new FilterClause(filter, Occur.SHOULD)); } if (_isShowContentWaypoint) { final NumericRangeFilter<Integer> filter = NumericRangeFilter.newIntRange( SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_WAY_POINT, DOC_SOURCE_WAY_POINT, true, true); searchFilter.add(new FilterClause(filter, Occur.SHOULD)); } _topDocs = _indexSearcher.search(query, searchFilter, maxDoc, sort); } _topDocsSearchText = searchText; } searchResult.totalHits = _topDocs.totalHits; /** * Get doc id's only for the current page. * <p> * It is very cheap to query the doc id's but very expensive to retrieve the documents. */ final int docStartIndex = searchFrom; int docEndIndex = searchTo; final ScoreDoc[] scoreDocs = _topDocs.scoreDocs; final int scoreSize = scoreDocs.length; if (docEndIndex >= scoreSize) { docEndIndex = scoreSize - 1; } final int resultSize = docEndIndex - docStartIndex + 1; final int docids[] = new int[resultSize]; for (int docIndex = 0; docIndex < resultSize; docIndex++) { docids[docIndex] = scoreDocs[docStartIndex + docIndex].doc; } // this can occure: field 'description' was indexed without offsets, cannot highlight final MyPostingsHighlighter highlighter = new MyPostingsHighlighter(); final Map<String, String[]> highlights = highlighter.highlightFields(queryFields, query, _indexSearcher, docids, maxPassages); search_CreateResult(highlights, _indexReader, searchResult, docids, docStartIndex); } catch (final Exception e) { StatusUtil.showStatus(e); searchResult.error = e.getMessage(); } }
From source file:NewsIR_search.NewsIRSearcher.java
public NewsIRSearcher() throws IOException, Exception { GetProjetBaseDirAndSetProjectPropFile setPropFile = new GetProjetBaseDirAndSetProjectPropFile(); prop = setPropFile.prop;// ww w.j av a2 s . c om propFileName = setPropFile.propFileName; tweet_starts_from_date = Integer.parseInt(prop.getProperty("tweet.starts.from.date", "20")); tweet_ends_from_date = Integer.parseInt(prop.getProperty("tweet.ends.from.date", "29")); /* max number of files to return for each query */ num_ret = Integer.parseInt(setPropFile.prop.getProperty("retrieve.num_wanted")); /* Set Retrieval model set */ setSimilarityFlag = setPropFile.prop.getProperty("retrieval.model"); String indexDirectoryPath = prop.getProperty("indexPath"); if (!indexDirectoryPath.endsWith("/")) { indexDirectoryPath = indexDirectoryPath.concat("/"); } // reader=new IndexReader[tweet_ends_from_date-tweet_starts_from_date+1]; // searcher=new IndexSearcher[tweet_ends_from_date-tweet_starts_from_date+1]; for (int i = 0; i < tweet_ends_from_date - tweet_starts_from_date + 1; i++) { reader = DirectoryReader.open(FSDirectory.open(new File(indexDirectoryPath))); searcher = new IndexSearcher(reader); /* setting the similarity function */ /* 1-BM25, 2-LM-JM, 3-LM-D, 4-DefaultLuceneSimilarity */ setSimilarityFn_ResFileName(i); /* */ } /* setting the fields in which the searching will be perfomed */ String[] fields = setQueryFieldsToSearch(); // String []fields = new String[]{"abstract", "title", "contexts"}; /* === */ /* using the same analyzer which is used for indexing */ Analyzer engAnalyzer = getAnalyzer(); mFQueryParser = new MultiFieldQueryParser(fields, engAnalyzer); // num_wanted = Integer.parseInt(prop.getProperty("retrieve.num_wanted","100")); num_wanted = num_ret; /* setting query list*/ queries = constructQuery(); /* queries has all the RAW data read from the query file like: query_num, paper_title, paper_abtract, context etc. */ String qfield = "qfield-"; for (int i = 0; i < fields.length; i++) { if (i == 0) { qfield = qfield.concat(fields[i]); } else { qfield = qfield.concat("-").concat(fields[i]); } } DateFormat dateFormat = new SimpleDateFormat("yyyyMMdd-HHmmss"); //get current date time with Date() Date date = new Date(); resultsFile = dateFormat.format(date).concat("-" + resultsFile).concat("-").concat(qfield).concat("-topres") .concat(new Integer(num_wanted).toString()).concat(".res"); String path = prop.getProperty("resPath"); if (!path.endsWith("/")) { path = path.concat("/"); } resultsFile = path.concat(resultsFile); System.out.println("Result will be saved in: " + resultsFile); }
From source file:org.apache.carbondata.datamap.lucene.LuceneCoarseGrainDataMap.java
License:Apache License
/** * Prune the datamap with filter expression. It returns the list of * blocklets where these filters can exist. *///from ww w .j ava2s. co m @Override public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException { // convert filter expr into lucene list query List<String> fields = new ArrayList<String>(); // only for test , query all data String strQuery = "*:*"; String[] sFields = new String[fields.size()]; fields.toArray(sFields); // get analyzer if (analyzer == null) { analyzer = new StandardAnalyzer(); } // use MultiFieldQueryParser to parser query QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer); Query query; try { query = queryParser.parse(strQuery); } catch (ParseException e) { String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage()); LOGGER.error(errorMessage); return null; } // execute index search TopDocs result; try { result = indexSearcher.search(query, MAX_RESULT_NUMBER); } catch (IOException e) { String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage()); LOGGER.error(errorMessage); throw new IOException(errorMessage); } // temporary data, delete duplicated data // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>> Map<String, Set<Number>> mapBlocks = new HashMap<String, Set<Number>>(); for (ScoreDoc scoreDoc : result.scoreDocs) { // get a document Document doc = indexSearcher.doc(scoreDoc.doc); // get all fields List<IndexableField> fieldsInDoc = doc.getFields(); // get this block id Map<BlockId, Set<BlockletId>>>> String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue(); Set<Number> setBlocklets = mapBlocks.get(blockId); if (setBlocklets == null) { setBlocklets = new HashSet<Number>(); mapBlocks.put(blockId, setBlocklets); } // get the blocklet id Set<BlockletId> Number blockletId = fieldsInDoc.get(BLOCKLETID_ID).numericValue(); if (!setBlocklets.contains(blockletId.intValue())) { setBlocklets.add(blockletId.intValue()); } } // result blocklets List<Blocklet> blocklets = new ArrayList<Blocklet>(); // transform all blocks into result type blocklets Map<BlockId, Set<BlockletId>> for (Map.Entry<String, Set<Number>> mapBlock : mapBlocks.entrySet()) { String blockId = mapBlock.getKey(); Set<Number> setBlocklets = mapBlock.getValue(); // for blocklets in this block Set<BlockletId> for (Number blockletId : setBlocklets) { // add a CoarseGrainBlocklet blocklets.add(new Blocklet(blockId, blockletId.toString())); } } return blocklets; }
From source file:org.apache.carbondata.datamap.lucene.LuceneFineGrainDataMap.java
License:Apache License
/** * Prune the datamap with filter expression. It returns the list of * blocklets where these filters can exist. *//* w ww. j a v a 2 s .c o m*/ @Override public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException { // convert filter expr into lucene list query List<String> fields = new ArrayList<String>(); // only for test , query all data String strQuery = getQueryString(filterExp.getFilterExpression()); int maxDocs; try { maxDocs = getMaxDoc(filterExp.getFilterExpression()); } catch (NumberFormatException e) { maxDocs = Integer.MAX_VALUE; } if (null == strQuery) { return null; } String[] sFields = new String[fields.size()]; fields.toArray(sFields); // get analyzer if (analyzer == null) { analyzer = new StandardAnalyzer(); } // use MultiFieldQueryParser to parser query QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer); queryParser.setAllowLeadingWildcard(true); Query query; try { query = queryParser.parse(strQuery); } catch (ParseException e) { String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage()); LOGGER.error(errorMessage); return null; } // temporary data, delete duplicated data // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>> Map<String, Map<Integer, List<Short>>> mapBlocks = new HashMap<>(); long luceneSearchStartTime = System.currentTimeMillis(); for (Map.Entry<String, IndexSearcher> searcherEntry : indexSearcherMap.entrySet()) { IndexSearcher indexSearcher = searcherEntry.getValue(); // take the min of total documents available in the reader and limit if set by the user maxDocs = Math.min(maxDocs, indexSearcher.getIndexReader().maxDoc()); // execute index search TopDocs result = null; // the number of documents to be queried in one search. It will always be minimum of // search result and maxDocs int numberOfDocumentsToBeQueried = 0; // counter for maintaining the total number of documents finished querying int documentHitCounter = 0; try { numberOfDocumentsToBeQueried = Math.min(maxDocs, SEARCH_LIMIT); result = indexSearcher.search(query, numberOfDocumentsToBeQueried); documentHitCounter += numberOfDocumentsToBeQueried; } catch (IOException e) { String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage()); LOGGER.error(errorMessage); throw new IOException(errorMessage); } ByteBuffer intBuffer = ByteBuffer.allocate(4); // last scoreDoc in a result to be used in searchAfter API ScoreDoc lastScoreDoc = null; while (true) { for (ScoreDoc scoreDoc : result.scoreDocs) { // get a document Document doc = indexSearcher.doc(scoreDoc.doc); // get all fields List<IndexableField> fieldsInDoc = doc.getFields(); if (writeCacheSize > 0) { // It fills rowids to the map, its value is combined with multiple rows. fillMapForCombineRows(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey()); } else { // Fill rowids to the map fillMap(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey()); } lastScoreDoc = scoreDoc; } // result will have the total number of hits therefore we always need to query on the // left over documents int remainingHits = result.totalHits - documentHitCounter; // break the loop if count reaches maxDocs to be searched or remaining hits become <=0 if (remainingHits <= 0 || documentHitCounter >= maxDocs) { break; } numberOfDocumentsToBeQueried = Math.min(remainingHits, SEARCH_LIMIT); result = indexSearcher.searchAfter(lastScoreDoc, query, numberOfDocumentsToBeQueried); documentHitCounter += numberOfDocumentsToBeQueried; } } LOGGER.info( "Time taken for lucene search: " + (System.currentTimeMillis() - luceneSearchStartTime) + " ms"); // result blocklets List<FineGrainBlocklet> blocklets = new ArrayList<>(); // transform all blocks into result type blocklets // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>> for (Map.Entry<String, Map<Integer, List<Short>>> mapBlocklet : mapBlocks.entrySet()) { String blockletId = mapBlocklet.getKey(); Map<Integer, List<Short>> mapPageIds = mapBlocklet.getValue(); List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>(); // for pages in this blocklet Map<PageId, Set<RowId>>> for (Map.Entry<Integer, List<Short>> mapPageId : mapPageIds.entrySet()) { // construct array rowid int[] rowIds = new int[mapPageId.getValue().size()]; int i = 0; // for rowids in this page Set<RowId> for (Short rowid : mapPageId.getValue()) { rowIds[i++] = rowid; } // construct one page FineGrainBlocklet.Page page = new FineGrainBlocklet.Page(); page.setPageId(mapPageId.getKey()); page.setRowId(rowIds); // add this page into list pages pages.add(page); } // add a FineGrainBlocklet blocklets.add(new FineGrainBlocklet(filePath, blockletId, pages)); } return blocklets; }
From source file:org.apache.maven.indexer.examples.indexing.RepositoryIndexer.java
License:Apache License
public Set<ArtifactInfo> search(final String queryText) throws ParseException, IOException { final Query query = new MultiFieldQueryParser(LUCENE_FIELDS, LUCENE_ANALYZER).parse(queryText); LOGGER.debug("Executing search query: {}; ctx id: {}; idx dir: {}", new String[] { query.toString(), indexingContext.getId(), indexingContext.getIndexDirectory().toString() }); final FlatSearchResponse response = getIndexer().searchFlat(new FlatSearchRequest(query, indexingContext)); final Set<ArtifactInfo> results = response.getResults(); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Hit count: {}", response.getReturnedHitsCount()); for (final ArtifactInfo result : results) { LOGGER.debug("Found artifact: {}; uinfo: {}", result.toString(), result.getUinfo()); }//from w w w . ja va 2 s .com } return results; }
From source file:org.apache.tika.parser.geo.topic.GeoNameResolver.java
License:Apache License
/** * Search corresponding GeoName for each location entity * /* ww w . java 2 s. c om*/ * @param querystr * it's the NER actually * @return HashMap each name has a list of resolved entities * @throws IOException * @throws RuntimeException */ public HashMap<String, ArrayList<String>> searchGeoName(ArrayList<String> locationNameEntities) throws IOException { if (locationNameEntities.size() == 0 || locationNameEntities.get(0).length() == 0) return new HashMap<String, ArrayList<String>>(); Logger logger = Logger.getLogger(this.getClass().getName()); if (!DirectoryReader.indexExists(indexDir)) { logger.log(Level.SEVERE, "No Lucene Index Dierctory Found, Invoke indexBuild() First !"); System.exit(1); } IndexReader reader = DirectoryReader.open(indexDir); if (locationNameEntities.size() >= 200) hitsPerPage = 5; // avoid heavy computation IndexSearcher searcher = new IndexSearcher(reader); Query q = null; HashMap<String, ArrayList<ArrayList<String>>> allCandidates = new HashMap<String, ArrayList<ArrayList<String>>>(); for (String name : locationNameEntities) { if (!allCandidates.containsKey(name)) { try { // q = new QueryParser("name", analyzer).parse(name); q = new MultiFieldQueryParser(new String[] { "name", "alternatenames" }, analyzer).parse(name); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; ArrayList<ArrayList<String>> topHits = new ArrayList<ArrayList<String>>(); for (int i = 0; i < hits.length; ++i) { ArrayList<String> tmp1 = new ArrayList<String>(); ArrayList<String> tmp2 = new ArrayList<String>(); int docId = hits[i].doc; Document d; try { d = searcher.doc(docId); tmp1.add(d.get("name")); tmp1.add(d.get("longitude")); tmp1.add(d.get("latitude")); if (!d.get("alternatenames").equalsIgnoreCase(d.get("name"))) { tmp2.add(d.get("alternatenames")); tmp2.add(d.get("longitude")); tmp2.add(d.get("latitude")); } } catch (IOException e) { e.printStackTrace(); } topHits.add(tmp1); if (tmp2.size() != 0) topHits.add(tmp2); } allCandidates.put(name, topHits); } catch (org.apache.lucene.queryparser.classic.ParseException e) { e.printStackTrace(); } } } HashMap<String, ArrayList<String>> resolvedEntities = new HashMap<String, ArrayList<String>>(); pickBestCandidates(resolvedEntities, allCandidates); reader.close(); return resolvedEntities; }