Java tutorial
package perf; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.range.LongRange; import org.apache.lucene.facet.range.LongRangeFacetCounts; import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Collector; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.grouping.BlockGroupingCollector; import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.SearchGroup; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.search.grouping.term.TermAllGroupsCollector; import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector; import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.search.vectorhighlight.FieldQuery; import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.List; final class SearchTask extends Task { private final String category; private final Query q; private final Sort s; private final String group; private final int topN; private final boolean singlePassGroup; private final boolean doCountGroups; private final boolean doHilite; private final boolean doStoredLoads; private final boolean doDrillSideways; private TopDocs hits; private TopGroups<?> groupsResultBlock; private TopGroups<BytesRef> groupsResultTerms; private FieldQuery fieldQuery; private Highlighter highlighter; private List<FacetResult> facetResults; private double hiliteMsec; private double getFacetResultsMsec; private List<String> facetRequests; public SearchTask(String category, Query q, Sort s, String group, int topN, boolean doHilite, boolean doStoredLoads, List<String> facetRequests, boolean doDrillSideways) { this.category = category; this.q = q; this.s = s; if (group != null && group.startsWith("groupblock")) { this.group = "groupblock"; this.singlePassGroup = group.equals("groupblock1pass"); doCountGroups = true; } else { this.group = group; this.singlePassGroup = false; doCountGroups = false; } this.topN = topN; this.doHilite = doHilite; this.doStoredLoads = doStoredLoads; this.facetRequests = facetRequests; this.doDrillSideways = doDrillSideways; } @Override public Task clone() { if (singlePassGroup) { return new SearchTask(category, q, s, "groupblock1pass", topN, doHilite, doStoredLoads, facetRequests, doDrillSideways); } else { return new SearchTask(category, q, s, group, topN, doHilite, doStoredLoads, facetRequests, doDrillSideways); } } @Override public String getCategory() { return category; } @Override public void go(IndexState state) throws IOException { //System.out.println("go group=" + this.group + " single=" + singlePassGroup + " xxx=" + xxx + " this=" + this); final IndexSearcher searcher = state.mgr.acquire(); //System.out.println("GO query=" + q); try { if (doHilite) { if (state.fastHighlighter != null) { fieldQuery = state.fastHighlighter.getFieldQuery(q, searcher.getIndexReader()); } else if (state.useHighlighter) { highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q)); } else { // no setup for postingshighlighter } } if (group != null) { if (singlePassGroup) { final BlockGroupingCollector c = new BlockGroupingCollector(Sort.RELEVANCE, 10, true, searcher.createNormalizedWeight(state.groupEndQuery, false)); searcher.search(q, c); groupsResultBlock = c.getTopGroups(Sort.RELEVANCE, 0, 0, 10, true); if (doHilite) { hilite(groupsResultBlock, state, searcher); } } else { //System.out.println("GB: " + group); final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(group, Sort.RELEVANCE, 10); final Collector c; final TermAllGroupsCollector allGroupsCollector; // Turn off AllGroupsCollector for now -- it's very slow: if (false && doCountGroups) { allGroupsCollector = new TermAllGroupsCollector(group); //c = MultiCollector.wrap(allGroupsCollector, c1); c = c1; } else { allGroupsCollector = null; c = c1; } searcher.search(q, c); final Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(0, true); if (topGroups != null) { final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(group, topGroups, Sort.RELEVANCE, Sort.RELEVANCE, 10, true, true, true); searcher.search(q, c2); groupsResultTerms = c2.getTopGroups(0); if (allGroupsCollector != null) { groupsResultTerms = new TopGroups<BytesRef>(groupsResultTerms, allGroupsCollector.getGroupCount()); } if (doHilite) { hilite(groupsResultTerms, state, searcher); } } } } else if (!facetRequests.isEmpty()) { // TODO: support sort, filter too!! // TODO: support other facet methods if (doDrillSideways) { // nocommit todo hits = null; facetResults = null; } else { facetResults = new ArrayList<FacetResult>(); FacetsCollector fc = new FacetsCollector(); hits = FacetsCollector.search(searcher, q, 10, fc); long t0 = System.nanoTime(); Facets mainFacets = null; for (String request : facetRequests) { if (request.startsWith("range:")) { int i = request.indexOf(':', 6); if (i == -1) { throw new IllegalArgumentException("range facets request \"" + request + "\" is missing field; should be range:field:0-10,10-20"); } String field = request.substring(6, i); String[] rangeStrings = request.substring(i + 1, request.length()).split(","); LongRange[] ranges = new LongRange[rangeStrings.length]; for (int rangeIDX = 0; rangeIDX < ranges.length; rangeIDX++) { String rangeString = rangeStrings[rangeIDX]; int j = rangeString.indexOf('-'); if (j == -1) { throw new IllegalArgumentException( "range facets request should be X-Y; got: " + rangeString); } long start = Long.parseLong(rangeString.substring(0, j)); long end = Long.parseLong(rangeString.substring(j + 1)); ranges[rangeIDX] = new LongRange(rangeString, start, true, end, true); } LongRangeFacetCounts facets = new LongRangeFacetCounts(field, fc, ranges); facetResults.add(facets.getTopChildren(ranges.length, field)); } else { Facets facets = new FastTaxonomyFacetCounts(state.taxoReader, state.facetsConfig, fc); facetResults.add(facets.getTopChildren(10, request)); } } getFacetResultsMsec = (System.nanoTime() - t0) / 1000000.0; } } else if (s == null) { hits = searcher.search(q, topN); if (doHilite) { hilite(hits, state, searcher, q); } } else { hits = searcher.search(q, topN, s); if (doHilite) { hilite(hits, state, searcher, q); } /* final boolean fillFields = true; final boolean fieldSortDoTrackScores = true; final boolean fieldSortDoMaxScore = true; final TopFieldCollector c = TopFieldCollector.create(s, topN, fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, false); searcher.search(q, c); hits = c.topDocs(); */ } if (hits != null) { totalHitCount = hits.totalHits; if (doStoredLoads) { for (int i = 0; i < hits.scoreDocs.length; i++) { ScoreDoc scoreDoc = hits.scoreDocs[i]; searcher.doc(scoreDoc.doc); } } } else if (groupsResultBlock != null) { totalHitCount = groupsResultBlock.totalHitCount; } } catch (Throwable t) { System.out.println("EXC: " + q); throw new RuntimeException(t); //System.out.println("TE: " + TermsEnum.getStats()); } finally { state.mgr.release(searcher); fieldQuery = null; highlighter = null; } } private void hilite(TopGroups<?> groups, IndexState indexState, IndexSearcher searcher) throws IOException { for (GroupDocs<?> group : groups.groups) { for (ScoreDoc sd : group.scoreDocs) { hilite(sd.doc, indexState, searcher); } } } private void hilite(TopDocs hits, IndexState indexState, IndexSearcher searcher, Query query) throws IOException { long t0 = System.nanoTime(); if (indexState.fastHighlighter != null || indexState.useHighlighter) { for (ScoreDoc sd : hits.scoreDocs) { hilite(sd.doc, indexState, searcher); } //System.out.println(" q=" + query + ": hilite time: " + ((t1-t0)/1000000.0)); } else { // TODO: why is this one finding 2 frags when the others find 1? String[] frags = indexState.postingsHighlighter.highlight(indexState.textFieldName, query, searcher, hits, 2); //System.out.println(" q=" + query + ": hilite time: " + ((t1-t0)/1000000.0)); for (int hit = 0; hit < frags.length; hit++) { String frag = frags[hit]; //System.out.println(" title=" + searcher.doc(hits.scoreDocs[hit].doc).get("titleTokenized")); //System.out.println(" frags: " + frag); if (frag != null) { // It's fine for frag to be null: it's a // placeholder, meaning this hit had no hilite totHiliteHash += frag.hashCode(); } } } long t1 = System.nanoTime(); hiliteMsec = (t1 - t0) / 1000000.0; } public int totHiliteHash; private void hilite(int docID, IndexState indexState, IndexSearcher searcher) throws IOException { //System.out.println(" title=" + searcher.doc(docID).get("titleTokenized")); if (indexState.fastHighlighter != null) { for (String h : indexState.fastHighlighter.getBestFragments(fieldQuery, searcher.getIndexReader(), docID, indexState.textFieldName, 100, 2)) { totHiliteHash += h.hashCode(); //System.out.println(" frag: " + h); } } else { Document doc = searcher.doc(docID); String text = doc.get(indexState.textFieldName); // NOTE: passing null for analyzer: TermVectors must // be indexed! TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), docID, indexState.textFieldName, null); TextFragment[] frags; try { frags = highlighter.getBestTextFragments(tokenStream, text, false, 2); } catch (InvalidTokenOffsetsException ioe) { throw new RuntimeException(ioe); } for (int j = 0; j < frags.length; j++) { if (frags[j] != null && frags[j].getScore() > 0) { //System.out.println(" frag " + j + ": " + frags[j].toString()); totHiliteHash += frags[j].toString().hashCode(); } } } } @Override public boolean equals(Object other) { if (other instanceof SearchTask) { final SearchTask otherSearchTask = (SearchTask) other; if (!q.equals(otherSearchTask.q)) { return false; } if (s != null) { if (otherSearchTask.s != null) { if (!s.equals(otherSearchTask.s)) { return false; } } else { if (otherSearchTask.s != null) { return false; } } } if (topN != otherSearchTask.topN) { return false; } if (group != null && !group.equals(otherSearchTask.group)) { return false; } else if (otherSearchTask.group != null) { return false; } return true; } else { return false; } } @Override public int hashCode() { int hashCode = q.hashCode(); if (s != null) { hashCode ^= s.hashCode(); } if (group != null) { hashCode ^= group.hashCode(); } hashCode *= topN; return hashCode; } @Override public long checksum() { final long PRIME = 641; long sum = 0; //System.out.println("checksum q=" + q + " f=" + f); if (group != null) { if (singlePassGroup) { for (GroupDocs<?> groupDocs : groupsResultBlock.groups) { sum += groupDocs.totalHits; for (ScoreDoc hit : groupDocs.scoreDocs) { sum = sum * PRIME + hit.doc; } } } else { for (GroupDocs<BytesRef> groupDocs : groupsResultTerms.groups) { sum += groupDocs.totalHits; for (ScoreDoc hit : groupDocs.scoreDocs) { sum = sum * PRIME + hit.doc; if (hit instanceof FieldDoc) { final FieldDoc fd = (FieldDoc) hit; if (fd.fields != null) { for (Object o : fd.fields) { sum = sum * PRIME + o.hashCode(); } } } } } } } else { sum = hits.totalHits; for (ScoreDoc hit : hits.scoreDocs) { //System.out.println(" " + hit.doc); sum = sum * PRIME + hit.doc; if (hit instanceof FieldDoc) { final FieldDoc fd = (FieldDoc) hit; if (fd.fields != null) { for (Object o : fd.fields) { if (o != null) { sum = sum * PRIME + o.hashCode(); } } } } } //System.out.println(" final=" + sum); } return sum; } @Override public String toString() { return "cat=" + category + " q=" + q + " s=" + s + " group=" + (group == null ? null : group.replace("\n", "\\n")) + (group == null ? " hits=" + (hits == null ? "null" : hits.totalHits) : " groups=" + (singlePassGroup ? (groupsResultBlock.groups.length + " hits=" + groupsResultBlock.totalHitCount + " groupTotHits=" + groupsResultBlock.totalGroupedHitCount + " totGroupCount=" + groupsResultBlock.totalGroupCount) : (groupsResultTerms.groups.length + " hits=" + groupsResultTerms.totalHitCount + " groupTotHits=" + groupsResultTerms.totalGroupedHitCount + " totGroupCount=" + groupsResultTerms.totalGroupCount))); } @Override public void printResults(PrintStream out, IndexState state) throws IOException { IndexSearcher searcher = state.mgr.acquire(); try { if (group != null) { if (singlePassGroup) { for (GroupDocs<?> groupDocs : groupsResultBlock.groups) { out.println(" group=null" + " totalHits=" + groupDocs.totalHits + " groupRelevance=" + groupDocs.groupSortValues[0]); for (ScoreDoc hit : groupDocs.scoreDocs) { out.println(" doc=" + hit.doc + " score=" + hit.score); } } } else { for (GroupDocs<BytesRef> groupDocs : groupsResultTerms.groups) { out.println(" group=" + (groupDocs.groupValue == null ? "null" : groupDocs.groupValue.utf8ToString().replace("\n", "\\n")) + " totalHits=" + groupDocs.totalHits + " groupRelevance=" + groupDocs.groupSortValues[0]); for (ScoreDoc hit : groupDocs.scoreDocs) { out.println(" doc=" + hit.doc + " score=" + hit.score); } } } } else if (hits instanceof TopFieldDocs) { for (int idx = 0; idx < hits.scoreDocs.length; idx++) { FieldDoc hit = (FieldDoc) hits.scoreDocs[idx]; final Object v = hit.fields[0]; final String vs; if (v instanceof Long) { vs = v.toString(); } else if (v == null) { vs = "null"; } else { vs = ((BytesRef) v).utf8ToString(); } out.println(" doc=" + LineFileDocs.idToInt(searcher.doc(hit.doc).get("id")) + " " + s.getSort()[0].getField() + "=" + vs); } } else { for (ScoreDoc hit : hits.scoreDocs) { out.println(" doc=" + LineFileDocs.idToInt(searcher.doc(hit.doc).get("id")) + " score=" + hit.score); } } if (hiliteMsec > 0) { out.println(String.format(" hilite time %.4f msec", hiliteMsec)); } if (getFacetResultsMsec > 0) { out.println(String.format(" getFacetResults time %.4f msec", getFacetResultsMsec)); } if (facetResults != null) { out.println(" facets:"); for (FacetResult fr : facetResults) { out.println(" " + fr); } } } finally { state.mgr.release(searcher); } } }