List of usage examples for org.apache.lucene.queryparser.classic MultiFieldQueryParser MultiFieldQueryParser
public MultiFieldQueryParser(String[] fields, Analyzer analyzer)
From source file:de.anycook.db.lucene.FulltextIndex.java
License:Open Source License
public Set<String> search(String q) throws IOException { Set<String> recipes = new LinkedHashSet<>(); String fields[] = new String[] { "description", "steps" }; logger.debug(String.format("searching for %s", q)); try (IndexReader reader = DirectoryReader.open(index)) { int hitsPerPage = 1000; IndexSearcher searcher = new IndexSearcher(reader); Query query = new MultiFieldQueryParser(fields, analyzer).parse(q); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, null); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); recipes.add(d.get("title")); }/*w w w . jav a 2 s . c om*/ } catch (CorruptIndexException | ParseException e) { logger.error(e); } logger.debug(String.format("found %d results", recipes.size())); return recipes; }
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
public void searchContent(ContentSearchResultData result) { result.getResults().clear();//from w w w . ja v a 2 s . c om String[] fieldNames = result.getFieldNames(); ScoreDoc[] hits = null; float maxScore = 0f; try { String indexPath = ApplicationPath.getAppPath() + "contentindex"; ensureDirectory(indexPath); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer); String pattern = result.getPattern(); pattern = pattern.trim(); Query query = null; if (pattern.length() != 0) { query = parser.parse(pattern); //Log.log("Searching for: " + query.toString()); TopDocs topDocs = searcher.search(query, result.getMaxSearchResults()); hits = topDocs.scoreDocs; maxScore = topDocs.getMaxScore(); } if (hits != null) { for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); ContentSearchData data = null; String type = doc.get("type"); switch (type) { case SiteSearchData.TYPE: data = new SiteSearchData(); break; case PageSearchData.TYPE: data = new PageSearchData(); break; case FileSearchData.TYPE: data = new FileSearchData(); break; } assert (data != null); data.setDoc(doc); data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore); data.evaluateDoc(); data.setContexts(query, analyzer); result.getResults().add(data); } } reader.close(); } catch (Exception ignore) { } }
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
public void searchUsers(UserSearchResultData result) { result.getResults().clear();/*from www.j a va2 s .co m*/ String[] fieldNames = result.getFieldNames(); ScoreDoc[] hits = null; float maxScore = 0f; try { String indexPath = ApplicationPath.getAppPath() + "userindex"; ensureDirectory(indexPath); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer); String pattern = result.getPattern(); pattern = pattern.trim(); Query query = null; if (pattern.length() != 0) { query = parser.parse(pattern); //Log.log("Searching for: " + query.toString()); TopDocs topDocs = searcher.search(query, result.getMaxSearchResults()); hits = topDocs.scoreDocs; maxScore = topDocs.getMaxScore(); } if (hits != null) { for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); UserSearchData data = new UserSearchData(); data.setDoc(doc); data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore); data.evaluateDoc(); data.setContexts(query, analyzer); result.getResults().add(data); } } reader.close(); } catch (Exception ignore) { } }
From source file:de.unihildesheim.iw.lucene.query.TryExactTermsQuery.java
License:Open Source License
/** * New instance using the supplied query. * * @param analyzer Query analyzer/*from w w w . ja v a 2 s . c o m*/ * @param queryStr Query string * @param fields Fields to query * @throws ParseException Thrown, if the query could not be parsed */ public TryExactTermsQuery(@NotNull final Analyzer analyzer, @NotNull final String queryStr, @NotNull final String... fields) throws ParseException { if (fields.length == 0) { throw new IllegalArgumentException("Empty fields list."); } if (StringUtils.isStrippedEmpty(queryStr)) { throw new IllegalArgumentException("Empty query."); } this.queryTerms = QueryUtils.tokenizeQueryString(queryStr, analyzer); final QueryParser qParser = new MultiFieldQueryParser(fields, analyzer); this.query = new BooleanQuery(); this.uniqueQueryTerms = new HashSet<>(this.queryTerms); for (final String term : this.uniqueQueryTerms) { @SuppressWarnings("ObjectAllocationInLoop") final BooleanClause bc = new BooleanClause(qParser.parse(QueryParserBase.escape(term)), Occur.SHOULD); this.query.add(bc); } this.query.setMinimumNumberShouldMatch(this.uniqueQueryTerms.size()); if (LOG.isDebugEnabled()) { LOG.debug("TEQ {} uQt={}", this.query, this.uniqueQueryTerms); } }
From source file:edu.cmu.lti.oaqa.baseqa.concept.rerank.scorers.LuceneConceptScorer.java
License:Apache License
@Override public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams) throws ResourceInitializationException { super.initialize(aSpecifier, aAdditionalParams); hits = Integer.class.cast(getParameterValue("hits")); // query constructor String stoplistPath = String.class.cast(getParameterValue("stoplist-path")); try {/* www.j av a 2 s . c o m*/ stoplist = Resources.readLines(getClass().getResource(stoplistPath), UTF_8).stream().map(String::trim) .collect(toSet()); } catch (IOException e) { throw new ResourceInitializationException(e); } // load index parameters idFieldName = String.class.cast(getParameterValue("id-field")); sourceFieldName = String.class.cast(getParameterValue("source-field")); //noinspection unchecked fields = Iterables.toArray((Iterable<String>) getParameterValue("fields"), String.class); String uriPrefixPath = String.class.cast(getParameterValue("uri-prefix")); try { uriPrefix = Resources.readLines(getClass().getResource(uriPrefixPath), UTF_8).stream() .map(line -> line.split("\t")).collect(toMap(segs -> segs[0], segs -> segs[1])); } catch (IOException e) { throw new ResourceInitializationException(e); } String index = String.class.cast(getParameterValue("index")); // create lucene Analyzer analyzer = new StandardAnalyzer(); parser = new MultiFieldQueryParser(fields, analyzer); try { reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); } catch (IOException e) { throw new ResourceInitializationException(e); } searcher = new IndexSearcher(reader); return true; }
From source file:edu.cmu.lti.oaqa.baseqa.concept.retrieval.LuceneConceptRetrievalExecutor.java
License:Apache License
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 100); // query constructor constructor = UimaContextHelper.createObjectFromConfigParameter(context, "query-string-constructor", "query-string-constructor-params", BooleanBagOfPhraseQueryStringConstructor.class, QueryStringConstructor.class); // lucene//w w w .j a v a 2s.com Analyzer analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer", "query-analyzer-params", StandardAnalyzer.class, Analyzer.class); String[] fields = UimaContextHelper.getConfigParameterStringArrayValue(context, "fields"); parser = new MultiFieldQueryParser(fields, analyzer); String index = UimaContextHelper.getConfigParameterStringValue(context, "index"); try { reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); } catch (IOException e) { throw new ResourceInitializationException(e); } searcher = new IndexSearcher(reader); idFieldName = UimaContextHelper.getConfigParameterStringValue(context, "id-field", null); nameFieldName = UimaContextHelper.getConfigParameterStringValue(context, "name-field", null); sourceFieldName = UimaContextHelper.getConfigParameterStringValue(context, "source-field", null); String uriPrefixPath = UimaContextHelper.getConfigParameterStringValue(context, "uri-prefix"); try { uriPrefix = Resources.readLines(getClass().getResource(uriPrefixPath), UTF_8).stream() .map(line -> line.split("\t")).collect(toMap(segs -> segs[0], segs -> segs[1])); } catch (IOException e) { throw new ResourceInitializationException(e); } }
From source file:edu.cmu.lti.oaqa.baseqa.document.rerank.scorers.LuceneDocumentScorer.java
License:Apache License
private void search(String queryString, String conf) throws RuntimeException { if (queryString.trim().isEmpty()) return;/*from w ww. j a v a2 s. c o m*/ ScoreDoc[] results; try { QueryParser parser = new MultiFieldQueryParser(fields, analyzer); Query query = parser.parse(queryString); results = searcher.search(query, hits).scoreDocs; } catch (ParseException | IOException e) { throw new RuntimeException(e); } for (int i = 0; i < results.length; i++) { try { int doc = results[i].doc; String uri = uriPrefix + reader.document(doc).get(idFieldName); if (!uri2conf2rank.contains(uri, conf) || uri2conf2rank.get(uri, conf) > i) { synchronizedPut(uri2conf2rank, uri, conf, i); } double score = results[i].score; if (!uri2conf2score.contains(uri, conf) || uri2conf2score.get(uri, conf) < score) { synchronizedPut(uri2conf2score, uri, conf, score); } } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:edu.cmu.lti.oaqa.baseqa.document.retrieval.LuceneDocumentRetrievalExecutor.java
License:Apache License
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 100); // query constructor constructor = UimaContextHelper.createObjectFromConfigParameter(context, "query-string-constructor", "query-string-constructor-params", BooleanBagOfPhraseQueryStringConstructor.class, QueryStringConstructor.class); // lucene//from w w w .j av a 2s . c o m Analyzer analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer", "query-analyzer-params", StandardAnalyzer.class, Analyzer.class); String[] fields = UimaContextHelper.getConfigParameterStringArrayValue(context, "fields"); parser = new MultiFieldQueryParser(fields, analyzer); String index = UimaContextHelper.getConfigParameterStringValue(context, "index"); try { reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); } catch (IOException e) { throw new ResourceInitializationException(e); } searcher = new IndexSearcher(reader); idFieldName = UimaContextHelper.getConfigParameterStringValue(context, "id-field", null); titleFieldName = UimaContextHelper.getConfigParameterStringValue(context, "title-field", null); textFieldName = UimaContextHelper.getConfigParameterStringValue(context, "text-field", null); uriPrefix = UimaContextHelper.getConfigParameterStringValue(context, "uri-prefix", null); }
From source file:edu.usc.ir.geo.gazetteer.GeoNameResolver.java
License:Apache License
private HashMap<String, List<Location>> resolveEntities(List<String> locationNames, int count, IndexReader reader) throws IOException { if (locationNames.size() >= 200) hitsPerPage = 5; // avoid heavy computation IndexSearcher searcher = new IndexSearcher(reader); Query q = null;//from w ww . j a v a2s . com HashMap<String, List<Location>> allCandidates = new HashMap<String, List<Location>>(); for (String name : locationNames) { if (!allCandidates.containsKey(name)) { try { //query is wrapped in additional quotes (") to avoid query tokenization on space q = new MultiFieldQueryParser(new String[] { FIELD_NAME_NAME, FIELD_NAME_ALTERNATE_NAMES }, analyzer).parse(String.format("\"%s\"", name)); //sort descending on population SortField populationSort = new SortedNumericSortField(FIELD_NAME_POPULATION, SortField.Type.LONG, true); Sort sort = new Sort(populationSort); //Fetch 3 times desired values, these will be sorted on code and only desired number will be kept ScoreDoc[] hits = searcher.search(q, hitsPerPage * 3, sort).scoreDocs; List<Location> topHits = new ArrayList<Location>(); for (int i = 0; i < hits.length; ++i) { Location tmpLocObj = new Location(); int docId = hits[i].doc; Document d; try { d = searcher.doc(docId); tmpLocObj.setName(d.get(FIELD_NAME_NAME)); tmpLocObj.setLongitude(d.get(FIELD_NAME_LONGITUDE)); tmpLocObj.setLatitude(d.get(FIELD_NAME_LATITUDE)); //If alternate names are empty put name as actual name //This covers missing data and equals weight for later computation if (d.get(FIELD_NAME_ALTERNATE_NAMES).isEmpty()) { tmpLocObj.setAlternateNames(d.get(FIELD_NAME_NAME)); } else { tmpLocObj.setAlternateNames(d.get(FIELD_NAME_ALTERNATE_NAMES)); } tmpLocObj.setCountryCode(d.get(FIELD_NAME_COUNTRY_CODE)); tmpLocObj.setAdmin1Code(d.get(FIELD_NAME_ADMIN1_CODE)); tmpLocObj.setAdmin2Code(d.get(FIELD_NAME_ADMIN2_CODE)); tmpLocObj.setFeatureCode(d.get(FIELD_NAME_FEATURE_CODE)); } catch (IOException e) { e.printStackTrace(); } topHits.add(tmpLocObj); } //Picking hitsPerPage number of locations from feature code sorted list allCandidates.put(name, pickTopSortedByCode(topHits, hitsPerPage)); } catch (org.apache.lucene.queryparser.classic.ParseException e) { e.printStackTrace(); } } } HashMap<String, List<Location>> resolvedEntities = new HashMap<String, List<Location>>(); pickBestCandidates(resolvedEntities, allCandidates, count); return resolvedEntities; }
From source file:eu.eexcess.wikipedialocal.recommender.PartnerConnector.java
License:Apache License
@Override public ResultList queryPartnerNative(PartnerConfiguration partnerConfiguration, SecureUserProfile userProfile, PartnerdataLogger dataLogger) throws IOException { partnerConfig = partnerConfiguration; ResultList resultList = new ResultList(); Analyzer analyzer = new EnglishAnalyzer(); File directoryPath = new File(PartnerConfigurationEnum.CONFIG.getPartnerConfiguration().searchEndpoint); Directory directory = FSDirectory.open(directoryPath); IndexReader indexReader = IndexReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); QueryParser queryParser = new MultiFieldQueryParser(FIELD_CONTENTS, analyzer); queryParser.setDefaultOperator(Operator.AND); String queryString = PartnerConfigurationEnum.CONFIG.getQueryGenerator().toQuery(userProfile); Query query = null;//from w ww . ja v a 2s. c o m try { query = queryParser.parse(queryString); } catch (ParseException e) { //logger.log(Level.SEVERE, "could not parse input query", e); } if (userProfile.numResults == null) userProfile.numResults = 10; TopDocs topDocs = indexSearcher.search(query, userProfile.numResults); for (ScoreDoc sDocs : topDocs.scoreDocs) { Result result = new Result(); org.apache.lucene.document.Document doc = indexSearcher.doc(sDocs.doc); if (doc != null) { IndexableField title = doc.getField("title"); IndexableField sectionTitle = doc.getField("sectionTitle"); IndexableField category = doc.getField("category"); IndexableField sectionText = doc.getField("sectionText"); if (sectionText != null) result.description = sectionText.stringValue(); if (category != null) { result.facets.type = category.stringValue(); result.facets.provider = "Category"; } if (title != null && sectionTitle != null) result.title = title.stringValue() + " - " + sectionTitle.stringValue(); resultList.results.add(result); } } resultList.totalResults = topDocs.totalHits; return resultList; }