List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:edu.ku.brc.specify.dbsupport.cleanuptools.AgentCleanupIndexer.java
License:Open Source License
/** * // w w w . ja v a 2 s. com */ public FindItemInfo getNextAgent() { if (parser == null) { initLuceneforReading("full"); } if (!hasMoreAgents()) { return null; } FindItemInfo fii = null; boolean cont = true; while (cont) { Integer prevAgentId = currAgentID; // Do 20 at a time String sql = String.format("SELECT LastName, FirstName, MiddleInitial, AgentID FROM agent " + "WHERE SpecifyUserID IS NULL AND LastName IS NOT NULL AND AgentID >= %d AND DivisionID = DIVID LIMIT 0, 20", currAgentID); sql = QueryAdjusterForDomain.getInstance().adjustSQL(sql); System.out.println(sql); Vector<Object[]> rows = query(sql); if (rows == null || rows.size() < 2) { isQuitting = true; return null; } for (Object[] row : rows) { String lastNm = (String) row[0]; String firstNm = (String) row[1]; String midNm = (String) row[2]; currAgentID = (Integer) row[3]; //System.out.println("last["+lastNm+"] first["+firstNm+"] mid["+midNm+"] full["+fullName+"]"); String[] nms = FirstLastVerifier.parseName(lastNm.toString()); if (nms != null) { String last = nms[0]; String first = nms.length > 1 ? nms[1] : null; if (first == null && isNotEmpty(firstNm)) { first = firstNm; } //System.out.println("last["+lastNm+"] first["+firstNm+"] mid["+midNm+"]"); fii = new FindItemInfo(currAgentID, scriptlet.buildNameString(firstNm, lastNm, midNm)); StringBuilder sb = new StringBuilder(); if (isNotEmpty(last)) { sb.append("last:"); sb.append(last); sb.append("^4"); // Boost 4 times } if (isNotEmpty(first)) { sb.append(" "); boolean ok = first.length() > 1; if (ok) sb.append("AND (first:"); sb.append(first); sb.append("~0.6"); if (ok) { sb.append(" OR first:"); sb.append(first.charAt(0)); sb.append("~0.4)"); } } System.out.println(sb.toString()); String queryString = sb.toString(); Vector<Pair<Document, Float>> docList = new Vector<Pair<Document, Float>>(); try { Query query = parser.parse(queryString); log.debug("Searching for: " + query.toString()); Document doc = null; int hitsPerPage = 10; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Hits: " + (hits != null ? hits.length : 0)); docList.clear(); for (int i = 0; i < hits.length; ++i) { System.out.println("doc: " + i + " scrore: " + hits[i].score + " " + searcher.doc(hits[i].doc).get("full")); //if (hits[i].score > 1.0) { int docId = hits[i].doc; doc = searcher.doc(docId); lastNm = doc.get("last"); int numCommas = countMatches(lastNm, ","); int numSemiColons = countMatches(lastNm, ";"); int recId = Integer.parseInt(doc.get("id")); if (numSemiColons > 0 || numCommas > 0 || recId == currAgentID) { System.out.println(numSemiColons + " " + numCommas + " " + (recId == currAgentID) + " " + recId); continue; } docList.add(new Pair<Document, Float>(doc, (Float) hits[i].score)); } } System.out.println("docList.size(): " + docList.size()); if (docList.size() > 0) { //System.out.println("\n--------------------------------------------------------------"); //System.out.println(String.format("[%s] ->[%s][%s]", lastNm.toString(), p.first, (p.second != null ? p.second : "null"))); int i = 0; for (Pair<Document, Float> pp : docList) { if (pp.second > 1.0) { String idStr = pp.first.get("id"); int dupId = Integer.parseInt(idStr); if (dupId != currAgentID) { fii.addDuplicate(dupId); System.out.println(String.format("%d - %5.3f - %s", i, pp.second, pp.first.get("full"))); i++; } } } if (i > 0) { int btn = chooseAgentsToMergeNew(fii); if (btn == CustomDialog.OK_BTN) { return fii; } if (btn == CustomDialog.CANCEL_BTN) { isQuitting = true; return null; } } } } catch (Exception e) { e.printStackTrace(); } } else { //System.out.println(String.format("Group [%s] ", name.toString())); } } // for loop if (prevAgentId == currAgentID) { int numRemaining = getCountAsInt("SELECT COUNT(*) FROM agent WHERE AgentID > " + currAgentID); if (numRemaining == 0) { return null; } currAgentID = getCountAsInt("SELECT AgentID FROM agent WHERE AgentID > " + currAgentID + " ORDER BY AgentID ASC LIMIT 0,1"); } } // while loop return fii; }
From source file:edu.ku.brc.specify.dbsupport.cleanuptools.FirstLastVerifier.java
License:Open Source License
/** * @param fieldName//from ww w.java 2 s .c om * @param searchText * @return */ private boolean search(final String fieldName, final String searchText) { if (parser == null) { initLuceneforReading(fieldName); } try { String srchTxt = StringUtils.replace(searchText, ")", ""); srchTxt = StringUtils.replace(srchTxt, "(", ""); srchTxt = StringUtils.replace(srchTxt, ":", ""); srchTxt = StringUtils.replace(srchTxt, "?", ""); Query query = parser.parse(fieldName + ":" + srchTxt.toUpperCase()); int hitsPerPage = 10; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; //System.out.println("Hits: "+(hits != null ? hits.length : 0)); return hits.length > 0; } catch (Exception e) { System.err.println(e.getMessage()); //e.printStackTrace(); } return false; }
From source file:edu.ku.brc.specify.dbsupport.cleanuptools.GeoCleanupFuzzySearch.java
License:Open Source License
public static void main(String[] args) throws IOException { //For Debug/*from ww w . java 2 s .co m*/ String connectStr = "jdbc:mysql://localhost/testfish"; String username = "root"; String password = "root"; DBConnection dbConn; // Debug dbConn = DBConnection.getInstance(); dbConn.setConnectionStr(connectStr); dbConn.setDatabaseName("stats"); dbConn.setUsernamePassword(username, password); dbConn.setDriver("com.mysql.jdbc.Driver"); boolean doBuildIndex = false; //String indexLocation = "/Users/rods/Downloads/lucene/geonames-index"; String indexLocation = "/Users/rods/Documents/Specify/geonames-index"; GeoCleanupFuzzySearch indexer = null; try { indexer = new GeoCleanupFuzzySearch(null); if (doBuildIndex) { indexer.startIndexingProcessSync(1, null); } } catch (Exception ex) { System.out.println("Cannot create index..." + ex.getMessage()); System.exit(-1); } // =================================================== // after adding, we always have to call the // closeIndex, otherwise the index is not created // =================================================== // indexer.closeIndex(); // ========================================================= // Now search // ========================================================= IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); boolean doFuzzy = false; boolean doTerm = false; boolean doParse = true; if (doFuzzy) { System.out.println("-------------------------- Fuzzy -----------------------"); String[] searchStrs = { "Comoro Islands", "Solomon", "united states iowa", "germany brandenburg", "bulgaria sofia", "costa rica alajuela", "costa rica cartago", "costa rica alajuela", "canada newfoundland", "mexico campeche", "australia ashmore and cartier islands", "fiji lau", "fiji lomaiviti", "guam agana", "germany Lower Saxony", "germany Saxony", "germany Sachsen Anhalt", "germany Sachsen-Anhalt", "germany Land Sachsen-Anhalt", "united states iowa,Fayette", "united states iowa Fayette County", "Argentina Buenos Aires", "buenos aires argentina ", }; for (String searchText : searchStrs) { try { Query query = new FuzzyQuery(new Term("name", searchText)); TopDocs docs = searcher.search(query, 10); ScoreDoc[] hits = docs.scoreDocs; System.out.println(searchText + " -> Hits " + hits.length + " hits [" + query + "]"); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("name") + " score=" + hits[i].score); } } catch (Exception e) { System.out.println("Error searching " + searchText + " : " + e.getMessage()); } } } if (doTerm) { System.out.println("-------------------------- Terms -----------------------"); String[] searchStrs = { "Comoro Islands", "Solomon", "united states,iowa", "germany,brandenburg", "bulgaria,sofia", "costa rica,alajuela", "costa rica,cartago", "costa rica,alajuela", "canada,newfoundland", "mexico,campeche", "australia,ashmore and cartier islands", "fiji,lau", "fiji,lomaiviti", "guam,agana", "germany,Lower Saxony", "germany,Saxony", "germany,Sachsen Anhalt", "germany,Sachsen-Anhalt", "germany,Land Sachsen-Anhalt", "united states,iowa,Fayette", "united states,iowa,Fayette County", "argentina,buenos aires", "Argentina,Buenos Aires", }; for (String searchText : searchStrs) { try { String[] tokens = StringUtils.split(searchText, ','); BooleanQuery query = new BooleanQuery(); TermQuery t1 = new TermQuery(new Term("country", tokens[0])); t1.setBoost(0.2f); query.add(t1, Occur.SHOULD); if (tokens.length > 1) { TermQuery t2 = new TermQuery(new Term("state", tokens[1])); t2.setBoost(0.4f); query.add(t2, Occur.SHOULD); } if (tokens.length > 2) { TermQuery t3 = new TermQuery(new Term("county", tokens[2])); t3.setBoost(0.8f); query.add(t3, Occur.MUST); } TopDocs docs = searcher.search(query, 20); ScoreDoc[] hits = docs.scoreDocs; System.out.println(searchText + " -> Hits " + hits.length + " hits [" + query + "]"); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("name") + " score=" + hits[i].score); } } catch (Exception e) { System.out.println("Error searching " + searchText + " : " + e.getMessage()); } } } if (doParse) { System.out.println("-------------------------- Parsing -----------------------"); String[] searchStrs = { "Comoro Islands", "Bahamas Elbow Bank", // "Solomon", // "united states iowa", // "germany brandenburg", // "bulgaria sofia", // "costa rica alajuela", // "costa rica cartago", // "costa rica alajuela", // "canada newfoundland", // "mexico campeche", // "australia ashmore and cartier islands", // "fiji lau", // "fiji lomaiviti", // "guam agana", // "germany Lower Saxony", // "germany Saxony", // "germany Sachsen Anhalt", // "germany Sachsen-Anhalt", // "germany Land Sachsen-Anhalt", // "united states iowa,Fayette", // "united states iowa Fayette County", // "Argentina Buenos Aires", // "buenos aires argentina " }; for (String searchText : searchStrs) { try { TopScoreDocCollector collector = TopScoreDocCollector.create(5, true); Query q = new QueryParser(Version.LUCENE_47, "name", analyzer).parse(searchText); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; if (hits != null) { System.out.println(searchText + " -> Hits " + hits.length + " hits."); // System.out.println("For: ["+seatchText+"] Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); if (d != null) { System.out.println((i + 1) + ". " + d.get("name") + " score=" + hits[i].score); } else { System.err.println("Doc was null searching " + searchText); } } } else { System.err.println("Hits was null searching " + searchText); } } catch (Exception e) { e.printStackTrace(); System.err.println("Error searching " + searchText + " : " + e.getMessage()); } } } }
From source file:edu.ku.brc.specify.dbsupport.cleanuptools.GeographyAssignISOs.java
License:Open Source License
/** * @param level// www .j av a 2 s . c o m * @param rankId * @param parentNames * @param parentRanks * @param parentISOCodes * @return */ private LuceneSearchResultsType searchLuceneWithFuzzy(final int level, final int rankId, final String[] parentNames, final int[] parentRanks, final String[] parentISOCodes) throws IOException { luceneResults.removeAllElements(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < level + 1; i++) { if (i > 0) sb.append(' '); sb.append(parentNames[i]); } //log.debug("["+sb.toString()+"]"); //Query query = new FuzzyQuery(new Term("name", sb.toString())); String isoCode = null; Document doc = null; HashSet<Integer> usedIds = new HashSet<Integer>(); TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); String searchStr = ""; try { System.out.println("searchStr[" + searchStr + "]"); searchStr = GeoCleanupFuzzySearch.stripExtrasFromName(sb.toString()); if (isEmpty(searchStr)) { String parentName = level == 0 ? "Earth" : parentNames[level]; blankGeoNameParents.add(parentName); return LuceneSearchResultsType.eNotFound; } System.out.println("searchStr[" + searchStr + "]"); Query q = new QueryParser(Version.LUCENE_47, "name", GeoCleanupFuzzySearch.getAnalyzer()) .parse(searchStr); luceneSearch.getSearcher().search(q, collector); } catch (ParseException e) { e.printStackTrace(); return LuceneSearchResultsType.eNotFound; } ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; doc = luceneSearch.getSearcher().doc(docId); //System.out.println("Fuzzy: "+i+" "+hits[i].score+" ["+doc.get("name")+"][cntry: "+doc.get("country")+" st:"+doc.get("state")+" co:"+ // doc.get("county")+"] rnk:"+doc.get("rankid")+" gnId: "+doc.get("geonmid")); int docRankId = Integer.parseInt(doc.get("rankid")); if (rankId == docRankId) { int geoId = Integer.parseInt(doc.get("geonmid")); String fullName = doc.get("name"); isoCode = doc.get("code"); String country = doc.get("country"); if (i == 0 && ((isNotEmpty(fullName) && fullName.equals(searchStr)) || (rankId == 200 && isNotEmpty(country) && country.equals(searchStr)))) { selectedSearchItem = new GeoSearchResultsItem(fullName, geoId, isoCode); return LuceneSearchResultsType.eMatch; } if (!usedIds.contains(geoId)) { usedIds.add(geoId); String state = doc.get("state"); String county = doc.get("county"); if (isNotEmpty(country) || isNotEmpty(country) || isNotEmpty(country)) { sb = new StringBuilder(); String[] names = { country, state, county }; for (String nm : names) { if (nm != null) { if (sb.length() > 0) sb.append(", "); sb.append(nm); } } fullName = sb.toString(); } luceneResults.add(new GeoSearchResultsItem(fullName, geoId, isoCode)); } } // int docId = hits[i].doc; // doc = luceneSearch.getSearcher().doc(docId); // System.out.println("Fuzzy: "+i+" "+hits[i].score+" "+doc.get("name")); } if (rankId == 400 && !doInvCountry[2]) { return LuceneSearchResultsType.eNotFound; } boolean hasItems = luceneResults.size() > 0; if (hasItems) { selectedSearchItem = luceneResults.get(0); } return hasItems ? LuceneSearchResultsType.eFound : LuceneSearchResultsType.eNotFound; }
From source file:edu.ku.brc.specify.dbsupport.cleanuptools.LocalityCleanupIndexer.java
License:Open Source License
/** * /*from w w w .j a v a2 s . c o m*/ */ public FindItemInfo getNextLocality() { foundNothing = true; // start by saying we haven't found any matches if (parser == null) { initLuceneforReading(); } if (!hasMoreLocalities()) { isQuitting = true; return null; } FindItemInfo fii = null; boolean cont = true; while (cont) { // Do 20 at a time String sql = String.format( "SELECT LocalityName, FullName, LocalityID FROM locality l LEFT JOIN geography g ON l.GeographyID = g.GeographyID " + "WHERE LocalityName IS NOT NULL AND LocalityID >= %d AND DisciplineID = DSPLNID LIMIT 0, 20", currLocId); sql = QueryAdjusterForDomain.getInstance().adjustSQL(sql); System.out.println(sql); Vector<Object[]> rows = BasicSQLUtils.query(sql); if (rows == null || rows.size() == 0) { isQuitting = true; return null; } for (Object[] row : rows) { String localityName = (String) row[0]; String geoName = fixGeo((String) row[1]); currLocId = (Integer) row[2]; fii = new FindItemInfo(currLocId, localityName); Pair<BigDecimal, BigDecimal> mainLatLon = getLatLon(currLocId); boolean isMainLatLon = (mainLatLon.first != null && mainLatLon.first.doubleValue() != 0.0 && mainLatLon.second != null && mainLatLon.second.doubleValue() != 0.0); StringBuilder sb = new StringBuilder(); sb.append("loc:"); sb.append(localityName); sb.append("^4 AND geo:"); // Boost 4 times sb.append(geoName); System.out.println(sb.toString()); String queryString = sb.toString(); Vector<Pair<Document, Float>> docList = new Vector<Pair<Document, Float>>(); try { Query query = parser.parse(queryString); log.debug("Searching for: " + query.toString()); Document doc = null; int hitsPerPage = 10; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; docList.clear(); for (int i = 0; i < hits.length; ++i) { if (hits[i].score > 1.0) { int docId = hits[i].doc; doc = searcher.doc(docId); int recId = Integer.parseInt(doc.get("id")); if (currLocId != recId) { docList.add(new Pair<Document, Float>(doc, (Float) hits[i].score)); } } } if (docList.size() > 0) { //System.out.println("\n--------------------------------------------------------------"); //System.out.println(String.format("[%s] ->[%s][%s]", lastNm.toString(), p.first, (p.second != null ? p.second : "null"))); int dupAddedCnt = 0; for (Pair<Document, Float> pp : docList) { if (pp.second > 1.0) { String idStr = pp.first.get("id"); int dupId = Integer.parseInt(idStr); if (dupId != currLocId) { Pair<BigDecimal, BigDecimal> dupLatLon = getLatLon(dupId); boolean isDupLatLon = (dupLatLon.first != null && dupLatLon.first.doubleValue() != 0.0 && dupLatLon.second != null && dupLatLon.second.doubleValue() != 0.0); boolean isOKToAdd = true; if (isMainLatLon && isDupLatLon) { LatLon mainLL = LatLon.fromDegrees(mainLatLon.first.doubleValue(), mainLatLon.second.doubleValue()); LatLon dupLL = LatLon.fromDegrees(dupLatLon.first.doubleValue(), dupLatLon.second.doubleValue()); double distInMeters = LatLon.ellipsoidalDistance(mainLL, dupLL, Earth.WGS84_EQUATORIAL_RADIUS, Earth.WGS84_POLAR_RADIUS); System.out.println(String.format("%8.5f, %8.5f", distInMeters, 0.0)); isOKToAdd = distInMeters < 50.0; } if (isOKToAdd) { fii.addDuplicate(dupId); System.out.println(String.format("%d - %5.3f - %s", dupAddedCnt, pp.second, pp.first.get("full"))); dupAddedCnt++; } } } } if (dupAddedCnt > 0) { foundNothing = false; int btn = chooseLocalitiesToMerge(fii); if (btn == CustomDialog.OK_BTN) { return fii; } if (btn == CustomDialog.CANCEL_BTN) { isQuitting = true; return null; } } } } catch (Exception e) { e.printStackTrace(); } } // for loop if (!hasMoreLocalities()) { isQuitting = true; return null; } } // while loop return fii; }
From source file:edu.ku.brc.specify.tools.LocalizerSearchHelper.java
License:Open Source License
public int[] doPropsSearch(final String searchText, final String fieldName) { try {//w w w.ja v a2 s . c o m QueryParser parser = new QueryParser(Version.LUCENE_47, fieldName, analyzer); Query query = parser.parse(searchText.toLowerCase()); //System.out.println("Searching for: " + query.toString(fieldName)); IndexSearcher memSearcher = new IndexSearcher(DirectoryReader.open(memIndexer)); TopScoreDocCollector collector = TopScoreDocCollector.create(50000, true); memSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int[] inxs = new int[hits.length]; int i = 0; for (ScoreDoc doc : hits) { Document d = memSearcher.doc(doc.doc); //System.out.println(doc.doc+" "+doc.score+" "+d.get("index")); inxs[i++] = Integer.parseInt(d.get("index")); } return inxs; } catch (Exception ex) { ex.printStackTrace(); } return null; }
From source file:edu.ku.brc.specify.tools.LocalizerSearchHelper.java
License:Open Source License
protected int getTotalHits(final Query query, int hitsPerPage) throws IOException { TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector);//from w w w .j a v a2 s. c o m @SuppressWarnings("unused") ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); //log.debug(numTotalHits + " total matching documents"); return numTotalHits; }
From source file:edu.ku.brc.specify.tools.schemalocale.LocalizerApp.java
License:Open Source License
protected int getTotalHits(final Query query, int hitsPerPage) throws IOException { TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector);//from www . j av a 2 s . c om ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); //System.out.println(numTotalHits + " total matching documents"); return numTotalHits; }
From source file:edu.ku.brc.specify.tools.schemalocale.LocalizerApp.java
License:Open Source License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./* w w w .j ava 2 s.c om*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.search(query, collector); hits = collector.topDocs().scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:edu.ku.brc.specify.tools.webportal.BuildSearchIndex.java
License:Open Source License
/** * // ww w . j a v a 2s. c o m */ /*public void testSearch() { Statement stmt = null; String querystr = "(Pengelly) OR (Castilleja AND applegatei)"; String term = "contents"; try { stmt = dbConn.createStatement(ResultSet.TYPE_FORWARD_ONLY,ResultSet.CONCUR_READ_ONLY); analyzers = new Analyzer[fileNames.length]; for (int i=0;i<analyzers.length;i++) { files[i] = new File(fileNames[i]); analyzers[i] = new StandardAnalyzer(Version.LUCENE_30); readers[i] = IndexReader.open(FSDirectory.open(files[i]), true); } HashMap<Integer, Integer> tblIdHash = new HashMap<Integer, Integer>(); for (int inx=0;inx<analyzers.length;inx++) { long startTime = System.currentTimeMillis(); Query query = new QueryParser(Version.LUCENE_30, term, analyzers[inx]).parse(querystr); int hitsPerPage = 10; searcher = new IndexSearcher(readers[inx]); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("\n------------- "+fileNames[inx] + " - Found: " + hits.length + " hits."); for (int i=0;i<hits.length;++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); //System.out.println((i + 1) + ". " + d.get("id") + " -> "+ d.get("xref")); tblIdHacssh.clear(); String pairStr = d.get("xref"); if (StringUtils.isNotEmpty(pairStr)) { String [] pairs = StringUtils.split(d.get("xref"), ','); for (String p : pairs) { String [] ids = StringUtils.split(p, '='); tblIdHash.put(Integer.parseInt(ids[0]), Integer.parseInt(ids[1])); } } if (inx == 0) { String id = d.get("id"); ResultSet rs = stmt.executeQuery("SELECT CatalogNumber FROM collectionobject WHERE CollectionObjectID = "+id); ResultSetMetaData rsmd = rs.getMetaData(); while (rs.next()) { for (int j=1;j<=rsmd.getColumnCount();j++) { System.out.print(rs.getObject(j) + "\t"); } System.out.println(); } rs.close(); Integer agentId = tblIdHash.get(5); if (agentId != null) { rs = stmt.executeQuery("SELECT LastName, FirstName, MiddleInitial FROM agent WHERE AgentID = "+agentId); rsmd = rs.getMetaData(); while (rs.next()) { for (int j=1;j<=rsmd.getColumnCount();j++) { if (rs.getObject(j) != null) System.out.print(rs.getObject(j) + "\t"); } System.out.println(); } rs.close(); } } else { Integer colObjId = tblIdHash.get(1); if (colObjId != null) { ResultSet rs = stmt.executeQuery("SELECT CatalogNumber FROM collectionobject WHERE CollectionObjectID = "+colObjId); ResultSetMetaData rsmd = rs.getMetaData(); while (rs.next()) { for (int j=1;j<=rsmd.getColumnCount();j++) { System.out.print(rs.getObject(j) + "\t"); } System.out.println(); } rs.close(); } } } System.out.println(String.format("Time: %8.2f", (System.currentTimeMillis() - startTime) / 1000.0)); searcher.close(); } for (int i=0;i<analyzers.length;i++) { readers[i].close(); analyzers[i].close(); } } catch (SQLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (stmt != null) { try { stmt.close(); } catch (SQLException e) { e.printStackTrace(); } } } }*/ public void testSearch() { Statement stmt = null; String querystr = "23033";//(Pengelly) OR (Castilleja AND applegatei)"; String term = "1";//"contents" try { //stmt = dbConn.createStatement(ResultSet.TYPE_FORWARD_ONLY,ResultSet.CONCUR_READ_ONLY); analyzers = new Analyzer[fileNames.length]; for (int i = 0; i < analyzers.length; i++) { files[i] = new File(fileNames[i]); analyzers[i] = new StandardAnalyzer(Version.LUCENE_30); readers[i] = IndexReader.open(FSDirectory.open(files[i]), true); } HashMap<Integer, Integer> tblIdHash = new HashMap<Integer, Integer>(); for (int inx = 0; inx < analyzers.length; inx++) { long startTime = System.currentTimeMillis(); QueryParser queryParser = new QueryParser(Version.LUCENE_30, term, analyzers[inx]); Query query = queryParser.parse(querystr); int hitsPerPage = 10; searcher = new IndexSearcher(readers[inx]); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("\n------------- " + fileNames[inx] + " - Found: " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("1")); //tblIdHacssh.clear(); } System.out.println(String.format("Time: %8.2f", (System.currentTimeMillis() - startTime) / 1000.0)); searcher.close(); } for (int i = 0; i < analyzers.length; i++) { readers[i].close(); analyzers[i].close(); } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (stmt != null) { try { stmt.close(); } catch (SQLException e) { e.printStackTrace(); } } } }