List of usage examples for org.apache.lucene.queryparser.classic MultiFieldQueryParser MultiFieldQueryParser
public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map<String, Float> boosts)
From source file:com.orientechnologies.lucene.OLuceneIndexType.java
License:Apache License
protected static Query getQueryParser(OIndexDefinition index, String key, Analyzer analyzer, Version version) throws ParseException { QueryParser queryParser;/*from w w w .j av a2s . com*/ if ((key).startsWith("(")) { queryParser = new QueryParser(version, "", analyzer); } else { String[] fields = null; if (index.isAutomatic()) { fields = index.getFields().toArray(new String[index.getFields().size()]); } else { int length = index.getTypes().length; fields = new String[length]; for (int i = 0; i < length; i++) { fields[i] = "k" + i; } } queryParser = new MultiFieldQueryParser(version, fields, analyzer); } return queryParser.parse(key); }
From source file:com.orientechnologies.lucene.test.LuceneVsLuceneTest.java
License:Apache License
@Test public void testLuceneVsLucene() throws IOException, ParseException { InputStream stream = ClassLoader.getSystemResourceAsStream("testLuceneIndex.sql"); databaseDocumentTx.command(new OCommandScript("sql", getScriptFromStream(stream))).execute(); for (ODocument oDocument : databaseDocumentTx.browseClass("Song")) { String title = oDocument.field("title"); if (title != null) { Document d = new Document(); d.add(new Field("title", title, Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocument(d);/*from w ww.jav a2s . co m*/ } } indexWriter.close(); IndexReader reader = DirectoryReader.open(getDirectory()); IndexSearcher searcher = new IndexSearcher(reader); Query query = new MultiFieldQueryParser(OLuceneIndexManagerAbstract.LUCENE_VERSION, new String[] { "title" }, new StandardAnalyzer(OLuceneIndexManagerAbstract.LUCENE_VERSION)) .parse("down the"); final TopDocs docs = searcher.search(query, Integer.MAX_VALUE); ScoreDoc[] hits = docs.scoreDocs; List<ODocument> oDocs = databaseDocumentTx .query(new OSQLSynchQuery<ODocument>("select *,$score from Song where title LUCENE \"down the\"")); Assert.assertEquals(oDocs.size(), hits.length); int i = 0; for (ScoreDoc hit : hits) { Assert.assertEquals(oDocs.get(i).field("$score"), hit.score); i++; } reader.close(); }
From source file:com.qahit.jbug.LuceneManager.java
public static ArrayList<String> search(String terms, int n, QueryParser.Operator defaultOperator) { IndexSearcher indexSearcher = null;/*ww w . j a v a2 s . c om*/ try { indexSearcher = searcherManager.acquire(); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "easiness", "targetmilestone", "version", "component", "reporter", "product", "description", "comments", "title", "status", "assignedto", "bug_id", "priority" }, analyzer); queryParser.setDefaultOperator(defaultOperator); Query query = queryParser.parse(terms); ScoreDoc[] scoreDocs = indexSearcher.search(query, interval).scoreDocs; ArrayList<String> result = new ArrayList<>(); for (ScoreDoc scoreDoc : scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); result.add(doc.get("bug_id")); } return result; } catch (IOException | ParseException ex) { log.error("Error while searching", ex); } finally { try { searcherManager.release(indexSearcher); } catch (IOException ex) { log.error("Error while releasing indexSearcher", ex); } } return null; }
From source file:com.qahit.jbug.MySearchWarmer.java
@Override public IndexSearcher newSearcher(IndexReader reader) throws IOException { log.info("Creating a new searcher"); IndexSearcher indexSearcher = new IndexSearcher(reader); MultiFieldQueryParser queryParser = new MultiFieldQueryParser( Version.LUCENE_45, new String[] { "easiness", "targetmilestone", "version", "component", "reporter", "product", "description", "comments", "title", "status", "assignedto", "bug_id", }, analyzer);/*from w w w . j av a2 s . c o m*/ int counter = 0; try (final SQL sql = new SQL(); final ResultSet resultSet = sql.query("select * from bugs")) { // Warm up by searching for all the top categories while (resultSet.next()) { counter++; if (counter >= 100) { break; } String queryString = resultSet.getString("title"); StringBuilder cleanedString = new StringBuilder(queryString.length()); for (int i = 0; i != queryString.length(); i++) { char c = queryString.charAt(i); if (Character.isAlphabetic(c) || Character.isSpaceChar(c)) { cleanedString.append(c); } } Query query = queryParser.parse(cleanedString.toString()); indexSearcher.search(query, 1000); } } catch (SQLException | ParseException ex) { log.error("Error while warming up the index searcher", ex); } return indexSearcher; }
From source file:coreservlets.consolesearch.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from ww w. j a va 2 s . c o m } String index = "index"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; String[] fields = { "title", "description", "keywords", "contents" }; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } /*else if ("-field".equals(args[i])) { fields = field.args[i+1]; i++; } */else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } System.out.println(System.getenv()); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); Analyzer analyzer = new ICTCLASAnalyzer(); MultiFieldQueryParser mp = new MultiFieldQueryParser(Version.LUCENE_44, fields, analyzer); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } // QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } System.out.println(line); Query query = mp.parse(line); System.out.println("Searching for: " + query.toString()); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:de.minecrawler.search.AbstractSearchEngine.java
License:Open Source License
/** * Starts a search on the parsed documents using a search query. * /*from w ww . j a va2 s . c om*/ * @param queryString * The query string <a href= * "http://lucene.apache.org/core/4_1_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html" * >Query Format</a> * @param limit * The maximum numer of results * @return List of results */ public List<CrawledWebsiteResult> search(String queryString, int limit) { try { DirectoryReader ireader = DirectoryReader.open(this.dir); IndexSearcher isearcher = new IndexSearcher(ireader); QueryParser parser = new MultiFieldQueryParser(LUCENE_VERSION, FIELDS, ANALYZER); Query query = parser.parse(queryString); ScoreDoc[] hits = isearcher.search(query, null, limit).scoreDocs; List<CrawledWebsiteResult> result = new ArrayList<CrawledWebsiteResult>(); for (int i = 0; i < hits.length; ++i) { Document hitDoc = isearcher.doc(hits[i].doc); CrawledWebsite website = extractWebsite(hitDoc); result.add(new CrawledWebsiteResult(website, i + 1, hits[i].score)); } ireader.close(); return result; } catch (IOException e) { e.printStackTrace(); return Collections.<CrawledWebsiteResult>emptyList(); } catch (ParseException e) { System.out.println("Wrong query! Check your query format!"); System.out.println(e.getMessage()); return Collections.<CrawledWebsiteResult>emptyList(); } }
From source file:de.unioninvestment.eai.portal.portlet.crud.domain.model.CompoundSearch.java
License:Apache License
private Query parseQuery(String queryString) { if (Strings.isNullOrEmpty(queryString)) { return null; }// w w w . jav a2 s. c o m Collection<String> defaultFields = getSearchableColumns().getDefaultSearchablePrefixes().values(); String[] defaultFieldsArray = defaultFields.toArray(new String[defaultFields.size()]); QueryParser luceneParser = new MultiFieldQueryParser(Version.LUCENE_46, defaultFieldsArray, new AsIsAnalyzer()); try { return luceneParser.parse(queryString); } catch (org.apache.lucene.queryparser.classic.ParseException e) { throw new BusinessException("portlet.crud.error.compoundsearch.invalidQuery", queryString); } }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
/** * sets up indexer just for reading... if needed for writing only, call * setupForWrite. if need both read & write, call both. *//*from w ww . ja v a2s . c o m*/ synchronized void setupForRead() { log.info("setting up index for read only access"); long startTime = System.currentTimeMillis(); //closeHandles(); try { setupDirectory(); String[] defaultSearchFields, defaultSearchFieldsOriginal; String[] defaultSearchFieldSubject = new String[] { "title" }; // for subject only search String[] defaultSearchFieldCorrespondents; //body field should be there, as the content of the attachment lies in this field, should also include meta field? //why the search over en-names and en-names-original when body/body_original is included in the search fields? defaultSearchFields = new String[] { "body", "title", "to_names", "from_names", "cc_names", "bcc_names", "to_emails", "from_emails", "cc_emails", "bcc_emails" }; defaultSearchFieldsOriginal = new String[] { "body_original", "title" }; // we want to leave title there because we want to always hit the title -- discussed with Peter June 27 2015 defaultSearchFieldCorrespondents = new String[] { "to_names", "from_names", "cc_names", "bcc_names", "to_emails", "from_emails", "cc_emails", "bcc_emails" }; // names field added above after email discussion with Sit 6/11/2013. problem is that we're not using the Lucene EnglishPossessiveFilter, so // NER will extract the name Stanford University in a sentence like: // "This is Stanford University's website." // but when the user clicks on the name "Stanford University" in say monthly cards, we // will not match the message with this sentence because of the apostrophe. //for searching an attchment with fileName String[] metaSearchFields = new String[] { "fileName" }; // Parse a simple query that searches for "text": if (parser == null) { //parser = new QueryParser(MUSE_LUCENE_VERSION, defaultSearchField, analyzer); parser = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFields, analyzer); parserOriginal = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldsOriginal, analyzer); parserSubject = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldSubject, analyzer); parserCorrespondents = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldCorrespondents, analyzer); parserMeta = new MultiFieldQueryParser(LUCENE_VERSION, metaSearchFields, new KeywordAnalyzer()); } /** * Bunch of gotchas here * Its a bad idea to store lucene internal docIds, as no assumptions about the internal docIds should be made; * not even that they are serial. When searching, lucene may ignore logically deleted docs. * Lucene does not handle deleted docs, and having these docs in search may bring down the search performance by 50% * Deleted docs are cleaned only during merging of indices.*/ int numContentDocs = 0, numContentDeletedDocs = 0, numAttachmentDocs = 0, numAttachmentDeletedDocs = 0; if (DirectoryReader.indexExists(directory)) { DirectoryReader ireader = DirectoryReader.open(directory); if (ireader.numDeletedDocs() > 0) log.warn("!!!!!!!\nIndex reader has " + ireader.numDocs() + " doc(s) of which " + ireader.numDeletedDocs() + " are deleted)\n!!!!!!!!!!"); isearcher = new IndexSearcher(ireader); contentDocIds = new LinkedHashMap<>(); numContentDocs = ireader.numDocs(); numContentDeletedDocs = ireader.numDeletedDocs(); Bits liveDocs = MultiFields.getLiveDocs(ireader); Set<String> fieldsToLoad = new HashSet<>(); fieldsToLoad.add("docId"); for (int i = 0; i < ireader.maxDoc(); i++) { org.apache.lucene.document.Document doc = ireader.document(i, fieldsToLoad); if (liveDocs != null && !liveDocs.get(i)) continue; if (doc == null || doc.get("docId") == null) continue; contentDocIds.put(i, doc.get("docId")); } log.info("Loaded: " + contentDocIds.size() + " content docs"); } if (DirectoryReader.indexExists(directory_blob)) { IndexReader ireader_blob = DirectoryReader.open(directory_blob); isearcher_blob = new IndexSearcher(ireader_blob); // read-only=true blobDocIds = new LinkedHashMap<Integer, String>(); numAttachmentDocs = ireader_blob.numDocs(); numAttachmentDeletedDocs = ireader_blob.numDeletedDocs(); Bits liveDocs = MultiFields.getLiveDocs(ireader_blob); Set<String> fieldsToLoad = new HashSet<String>(); fieldsToLoad.add("docId"); for (int i = 0; i < ireader_blob.maxDoc(); i++) { org.apache.lucene.document.Document doc = ireader_blob.document(i, fieldsToLoad); if (liveDocs != null && !liveDocs.get(i)) continue; if (doc == null || doc.get("docId") == null) continue; blobDocIds.put(i, doc.get("docId")); } log.info("Loaded: " + blobDocIds.size() + " attachment docs"); } log.warn("Number of content docs: " + numContentDocs + ", number deleted: " + numContentDeletedDocs); log.warn("Number of attachment docs: " + numAttachmentDocs + ", number deleted: " + numAttachmentDeletedDocs); if (dirNameToDocIdMap == null) dirNameToDocIdMap = new LinkedHashMap<String, Map<Integer, String>>(); } catch (Exception e) { Util.print_exception(e, log); } log.info("Setting up index for read took " + (System.currentTimeMillis() - startTime) + " ms"); }
From source file:edu.ucdenver.ccp.nlp.index.Search.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String index = "index"; String queries = null;// w ww . j av a 2s . co m String queryString = null; int hitsPerPage = 100; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); EnglishAnalyzer analyzer = new EnglishAnalyzer(Version.LUCENE_40); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); //query building starts here. //QueryParser parser = new QueryParser(Version.LUCENE_40, "title", analyzer); MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_40, new String[] { "title", "abs", "mentions" }, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user //c for cisplatin System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } //Query q = queryParser.parse(querystr); Query query = parser.parse(line); //System.out.println("Searching for: " + query.toString(field)); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("pmid") + "\t" + d.get("title")); } //doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:evalita.q4faq.baseline.Search.java
License:Open Source License
/** * @param args the command line arguments *///from ww w. j av a2s .c o m public static void main(String[] args) { try { if (args.length > 2) { IndexSearcher searcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(new File(args[0])))); BufferedReader reader = new BufferedReader(new FileReader(args[1])); BufferedWriter writer = new BufferedWriter(new FileWriter(args[2])); String[] fields = new String[] { "question", "answer", "tag" }; Map<String, Float> boosts = new HashMap<>(); boosts.put("question", 4f); boosts.put("answer", 2f); boosts.put("tag", 1f); QueryParser parser = new MultiFieldQueryParser(fields, new ItalianAnalyzer(), boosts); while (reader.ready()) { String[] split = reader.readLine().split("\t"); Query q = parser.parse(split[1].replace("?", " ").replace("!", " ").replace("/", " ")); TopDocs topdocs = searcher.search(q, 25); for (ScoreDoc res : topdocs.scoreDocs) { writer.append(split[0]).append("\t"); writer.append(searcher.doc(res.doc).get("id")).append("\t"); writer.append(String.valueOf(res.score)); writer.newLine(); } } reader.close(); writer.close(); } else { throw new IllegalArgumentException("Number of arguments not valid"); } } catch (IOException | IllegalArgumentException | ParseException ex) { Logger.getLogger(Search.class.getName()).log(Level.SEVERE, null, ex); } }