List of usage examples for org.apache.lucene.index.memory MemoryIndex createSearcher
public IndexSearcher createSearcher()
From source file:ch.sentric.hbase.prospective.Percolator.java
License:Apache License
/** * Tries to find a set of queries that match the given document. * * @param doc//from w ww. j av a 2 s . c o m * the Lucene document * @return the matching queries * @throws IOException * if an I/O error occurs */ public Response<T> percolate(final Document doc, final Map<T, Query> queries) throws IOException { // first, parse the source doc into a MemoryIndex final MemoryIndex memoryIndex = new MemoryIndex(); for (final Fieldable field : doc.getFields()) { if (!field.isIndexed()) { continue; } final TokenStream tokenStream = field.tokenStreamValue(); if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.getBoost()); } else { final Reader reader = field.readerValue(); if (reader != null) { memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), reader), field.getBoost()); } else { final String value = field.stringValue(); if (value != null) { memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), new CharSequenceReader(value)), field.getBoost()); } } } } // do the search final IndexSearcher searcher = memoryIndex.createSearcher(); final Map<T, Query> matches = new HashMap<T, Query>(0); if (queries != null && !queries.isEmpty()) { final ExistsCollector collector = new ExistsCollector(); for (final Map.Entry<T, Query> entry : queries.entrySet()) { collector.reset(); searcher.search(entry.getValue(), collector); if (collector.exists()) { matches.put(entry.getKey(), entry.getValue()); } } } return new Response<T>(matches); }
From source file:com.jaeksoft.searchlib.parser.HtmlParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum forcedLang) throws IOException, SearchLibException { titleBoost = getFloatProperty(ClassPropertyEnum.TITLE_BOOST); boostTagMap = new TreeMap<String, BoostTag>(); boostTagMap.put("h1", new BoostTag(ClassPropertyEnum.H1_BOOST)); boostTagMap.put("h2", new BoostTag(ClassPropertyEnum.H2_BOOST)); boostTagMap.put("h3", new BoostTag(ClassPropertyEnum.H3_BOOST)); boostTagMap.put("h4", new BoostTag(ClassPropertyEnum.H4_BOOST)); boostTagMap.put("h5", new BoostTag(ClassPropertyEnum.H5_BOOST)); boostTagMap.put("h6", new BoostTag(ClassPropertyEnum.H6_BOOST)); ignoreMetaNoIndex = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOINDEX); ignoreMetaNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOFOLLOW); ignoreLinkNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_LINK_NOFOLLOW); ignoreUntitledDocuments = getBooleanProperty(ClassPropertyEnum.IGNORE_UNTITLED_DOCUMENTS); ignoreNonCanonical = getBooleanProperty(ClassPropertyEnum.IGNORE_NON_CANONICAL); String currentCharset = null; String headerCharset = null;/*from ww w.j av a 2 s . c om*/ String detectedCharset = null; IndexDocument sourceDocument = getSourceDocument(); if (sourceDocument != null) { FieldValueItem fieldValueItem = sourceDocument .getFieldValue(UrlItemFieldEnum.INSTANCE.contentTypeCharset.getName(), 0); if (fieldValueItem != null) headerCharset = fieldValueItem.getValue(); if (headerCharset == null) { fieldValueItem = sourceDocument.getFieldValue(UrlItemFieldEnum.INSTANCE.contentEncoding.getName(), 0); if (fieldValueItem != null) headerCharset = fieldValueItem.getValue(); } currentCharset = headerCharset; } if (currentCharset == null) { detectedCharset = streamLimiter.getDetectedCharset(); currentCharset = detectedCharset; } if (currentCharset == null) { currentCharset = getProperty(ClassPropertyEnum.DEFAULT_CHARSET).getValue(); } String xPathExclusions = getProperty(ClassPropertyEnum.XPATH_EXCLUSION).getValue(); Set<Object> xPathExclusionsSet = null; if (!StringUtils.isEmpty(xPathExclusions)) xPathExclusionsSet = new HashSet<Object>(); HtmlParserEnum htmlParserEnum = HtmlParserEnum.find(getProperty(ClassPropertyEnum.HTML_PARSER).getValue()); HtmlDocumentProvider htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter, xPathExclusions, xPathExclusionsSet); if (htmlProvider == null) return; URL currentURL = htmlProvider.getBaseHref(); IndexDocument srcDoc = getSourceDocument(); String streamOriginalUrl = streamLimiter.getOriginURL(); try { if (currentURL == null && !StringUtils.isEmpty(streamOriginalUrl)) currentURL = LinkUtils.newEncodedURL(streamOriginalUrl); if (currentURL == null && srcDoc != null) { FieldValueItem fvi = srcDoc.getFieldValue(UrlItemFieldEnum.INSTANCE.url.getName(), 0); if (fvi != null) currentURL = LinkUtils.newEncodedURL(fvi.getValue()); } } catch (URISyntaxException e) { throw new IOException(e); } URL canonicalURL = htmlProvider.getCanonicalLink(currentURL); if (canonicalURL != null) { String canUrl = canonicalURL.toExternalForm(); addDetectedLink(canUrl); if (ignoreNonCanonical) { String curUrl = currentURL.toExternalForm(); if (!canUrl.equals(curUrl)) { isCanonical = false; return; } } } isCanonical = true; String title = htmlProvider.getTitle(); if (ignoreUntitledDocuments) if (title == null || title.length() == 0) return; ParserResultItem result = getNewParserResultItem(); addFieldTitle(result, title); result.addField(ParserFieldEnum.htmlProvider, htmlProvider.getName()); // Check ContentType charset in meta http-equiv String metaCharset = htmlProvider.getMetaCharset(); String selectedCharset = selectCharset(headerCharset, metaCharset, detectedCharset); if (selectedCharset != null) { if (!selectedCharset.equals(currentCharset)) { currentCharset = selectedCharset; htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter, xPathExclusions, xPathExclusionsSet); } } StringWriter writer = new StringWriter(); IOUtils.copy(streamLimiter.getNewInputStream(), writer, currentCharset); result.addField(ParserFieldEnum.htmlSource, writer.toString()); writer.close(); HtmlNodeAbstract<?> rootNode = htmlProvider.getRootNode(); if (rootNode == null) return; for (HtmlNodeAbstract<?> metaNode : htmlProvider.getMetas()) { String metaName = metaNode.getAttributeText("name"); if (metaName != null && metaName.startsWith(OPENSEARCHSERVER_FIELD)) { String field = metaName.substring(OPENSEARCHSERVER_FIELD_LENGTH); String[] fields = field.split("\\."); if (fields != null) { String content = metaNode.getAttributeText("content"); result.addDirectFields(fields, content); } } } result.addField(ParserFieldEnum.charset, currentCharset); String metaRobots = null; String metaDcLanguage = null; String metaContentLanguage = null; for (HtmlNodeAbstract<?> node : htmlProvider.getMetas()) { String attr_name = node.getAttributeText("name"); String attr_http_equiv = node.getAttributeText("http-equiv"); if ("keywords".equalsIgnoreCase(attr_name)) result.addField(ParserFieldEnum.meta_keywords, HtmlDocumentProvider.getMetaContent(node)); else if ("description".equalsIgnoreCase(attr_name)) result.addField(ParserFieldEnum.meta_description, HtmlDocumentProvider.getMetaContent(node)); else if ("robots".equalsIgnoreCase(attr_name)) metaRobots = HtmlDocumentProvider.getMetaContent(node); else if ("dc.language".equalsIgnoreCase(attr_name)) metaDcLanguage = HtmlDocumentProvider.getMetaContent(node); else if ("content-language".equalsIgnoreCase(attr_http_equiv)) metaContentLanguage = HtmlDocumentProvider.getMetaContent(node); } boolean metaRobotsFollow = true; boolean metaRobotsNoIndex = false; if (metaRobots != null) { metaRobots = metaRobots.toLowerCase(); if (metaRobots.contains("noindex") && !ignoreMetaNoIndex) { metaRobotsNoIndex = true; result.addField(ParserFieldEnum.meta_robots, "noindex"); } if (metaRobots.contains("nofollow") && !ignoreMetaNoFollow) { metaRobotsFollow = false; result.addField(ParserFieldEnum.meta_robots, "nofollow"); } } UrlFilterItem[] urlFilterList = getUrlFilterList(); boolean removeFragment = ClassPropertyEnum.KEEP_REMOVE_LIST[1] .equalsIgnoreCase(getProperty(ClassPropertyEnum.URL_FRAGMENT).getValue()); List<HtmlNodeAbstract<?>> nodes = rootNode.getAllNodes("a", "frame", "img"); if (srcDoc != null && nodes != null && metaRobotsFollow) { for (HtmlNodeAbstract<?> node : nodes) { String href = null; String rel = null; String nodeName = node.getNodeName(); if ("a".equals(nodeName)) { href = node.getAttributeText("href"); rel = node.getAttributeText("rel"); } else if ("frame".equals(nodeName) || "img".equals(nodeName)) { href = node.getAttributeText("src"); } boolean follow = true; if (rel != null) if (rel.contains("nofollow") && !ignoreLinkNoFollow) follow = false; URL newUrl = null; if (href != null) if (!href.startsWith("javascript:")) if (currentURL != null) { href = StringEscapeUtils.unescapeXml(href); newUrl = LinkUtils.getLink(currentURL, href, urlFilterList, removeFragment); } if (newUrl != null) { ParserFieldEnum field = null; if (newUrl.getHost().equalsIgnoreCase(currentURL.getHost())) { if (follow) field = ParserFieldEnum.internal_link; else field = ParserFieldEnum.internal_link_nofollow; } else { if (follow) field = ParserFieldEnum.external_link; else field = ParserFieldEnum.external_link_nofollow; } String link = newUrl.toExternalForm(); result.addField(field, link); if (follow) addDetectedLink(link); } } } if (!metaRobotsNoIndex) { nodes = rootNode.getNodes("html", "body"); if (nodes == null || nodes.size() == 0) nodes = rootNode.getNodes("html"); if (nodes != null && nodes.size() > 0) { StringBuilder sb = new StringBuilder(); getBodyTextContent(result, sb, nodes.get(0), true, null, 1024, xPathExclusionsSet); result.addField(ParserFieldEnum.body, sb); } } // Identification de la langue: Locale lang = null; String langMethod = null; String[] pathHtml = { "html" }; nodes = rootNode.getNodes(pathHtml); if (nodes != null && nodes.size() > 0) { langMethod = "html lang attribute"; String l = nodes.get(0).getAttributeText("lang"); if (l != null) lang = Lang.findLocaleISO639(l); } if (lang == null && metaContentLanguage != null) { langMethod = "meta http-equiv content-language"; lang = Lang.findLocaleISO639(metaContentLanguage); } if (lang == null && metaDcLanguage != null) { langMethod = "meta dc.language"; lang = Lang.findLocaleISO639(metaDcLanguage); } if (lang != null) { result.addField(ParserFieldEnum.lang, lang.getLanguage()); result.addField(ParserFieldEnum.lang_method, langMethod); } else if (!metaRobotsNoIndex) lang = result.langDetection(10000, ParserFieldEnum.body); if (getFieldMap().isMapped(ParserFieldEnum.generated_title)) { StringBuilder sb = new StringBuilder(); try { if (!StringUtils.isEmpty(streamOriginalUrl)) sb.append(new URI(streamOriginalUrl).getHost()); } catch (URISyntaxException e) { Logging.error(e); } String generatedTitle = null; for (Map.Entry<String, BoostTag> entry : boostTagMap.entrySet()) { BoostTag boostTag = entry.getValue(); if (boostTag.firstContent != null) { generatedTitle = boostTag.firstContent; break; } } if (generatedTitle == null) { final String FIELD_TITLE = "contents"; MemoryIndex bodyMemoryIndex = new MemoryIndex(); Analyzer bodyAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_36); String bodyText = result.getMergedBodyText(100000, " ", ParserFieldEnum.body); bodyMemoryIndex.addField(FIELD_TITLE, bodyText, bodyAnalyzer); IndexSearcher indexSearcher = bodyMemoryIndex.createSearcher(); IndexReader indexReader = indexSearcher.getIndexReader(); MoreLikeThis mlt = new MoreLikeThis(indexReader); mlt.setAnalyzer(bodyAnalyzer); mlt.setFieldNames(new String[] { FIELD_TITLE }); mlt.setMinWordLen(3); mlt.setMinTermFreq(1); mlt.setMinDocFreq(1); String[] words = mlt.retrieveInterestingTerms(0); if (words != null && words.length > 0) generatedTitle = words[0]; } if (generatedTitle != null) { if (sb.length() > 0) sb.append(" - "); sb.append(generatedTitle); } if (sb.length() > 67) { int pos = sb.indexOf(" ", 60); if (pos == -1) pos = 67; sb.setLength(pos); sb.append("..."); } result.addField(ParserFieldEnum.generated_title, sb.toString()); } }
From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java
License:Apache License
private IndexReader getReaderForField(String field) throws IOException { if (wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) { tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); cachedTokenStream = true;/*from w ww. j av a2 s .c o m*/ } IndexReader reader = readers.get(field); if (reader == null) { MemoryIndex indexer = new MemoryIndex(); indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); tokenStream.reset(); IndexSearcher searcher = indexer.createSearcher(); reader = searcher.getIndexReader(); readers.put(field, reader); } return reader; }
From source file:org.elasticsearch.index.percolator.ExtractQueryTermsServiceTests.java
License:Apache License
public void testCreateQueryMetadataQuery() throws Exception { MemoryIndex memoryIndex = new MemoryIndex(false); memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer()); memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer()); memoryIndex.addField("field4", "123", new WhitespaceAnalyzer()); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); Query query = ExtractQueryTermsService.createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD); assertThat(query, instanceOf(TermsQuery.class)); // no easy way to get to the terms in TermsQuery, // if there a less then 16 terms then it gets rewritten to bq and then we can easily check the terms BooleanQuery booleanQuery = (BooleanQuery) ((ConstantScoreQuery) query.rewrite(indexReader)).getQuery(); assertThat(booleanQuery.clauses().size(), equalTo(15)); assertClause(booleanQuery, 0, QUERY_TERMS_FIELD, "_field3\u0000me"); assertClause(booleanQuery, 1, QUERY_TERMS_FIELD, "_field3\u0000unhide"); assertClause(booleanQuery, 2, QUERY_TERMS_FIELD, "field1\u0000brown"); assertClause(booleanQuery, 3, QUERY_TERMS_FIELD, "field1\u0000dog"); assertClause(booleanQuery, 4, QUERY_TERMS_FIELD, "field1\u0000fox"); assertClause(booleanQuery, 5, QUERY_TERMS_FIELD, "field1\u0000jumps"); assertClause(booleanQuery, 6, QUERY_TERMS_FIELD, "field1\u0000lazy"); assertClause(booleanQuery, 7, QUERY_TERMS_FIELD, "field1\u0000over"); assertClause(booleanQuery, 8, QUERY_TERMS_FIELD, "field1\u0000quick"); assertClause(booleanQuery, 9, QUERY_TERMS_FIELD, "field1\u0000the"); assertClause(booleanQuery, 10, QUERY_TERMS_FIELD, "field2\u0000more"); assertClause(booleanQuery, 11, QUERY_TERMS_FIELD, "field2\u0000some"); assertClause(booleanQuery, 12, QUERY_TERMS_FIELD, "field2\u0000text"); assertClause(booleanQuery, 13, QUERY_TERMS_FIELD, "field4\u0000123"); assertClause(booleanQuery, 14, UNKNOWN_QUERY_FIELD, ""); }
From source file:org.elasticsearch.index.query.MoreLikeThisQueryBuilderTests.java
License:Apache License
/** * Here we could go overboard and use a pre-generated indexed random document for a given Item, * but for now we'd prefer to simply return the id as the content of the document and that for * every field.// w w w. java2s. co m */ private static Fields generateFields(String[] fieldNames, String text) throws IOException { MemoryIndex index = new MemoryIndex(); for (String fieldName : fieldNames) { index.addField(fieldName, text, new WhitespaceAnalyzer()); } return MultiFields.getFields(index.createSearcher().getIndexReader()); }
From source file:org.elasticsearch.index.query.PercolateQueryBuilder.java
License:Apache License
@Override protected Query doToQuery(QueryShardContext context) throws IOException { if (indexedDocumentIndex != null || indexedDocumentType != null || indexedDocumentId != null) { throw new IllegalStateException("query builder must be rewritten first"); }/*from ww w.j av a 2 s .c o m*/ if (document == null) { throw new IllegalStateException("nothing to percolator"); } MapperService mapperService = context.getMapperService(); DocumentMapperForType docMapperForType = mapperService.documentMapperWithAutoCreate(documentType); DocumentMapper docMapper = docMapperForType.getDocumentMapper(); ParsedDocument doc = docMapper.parse(source(context.index().getName(), documentType, "_temp_id", document)); FieldNameAnalyzer fieldNameAnalyzer = (FieldNameAnalyzer) docMapper.mappers().indexAnalyzer(); // Need to this custom impl because FieldNameAnalyzer is strict and the percolator sometimes isn't when // 'index.percolator.map_unmapped_fields_as_string' is enabled: Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override protected Analyzer getWrappedAnalyzer(String fieldName) { Analyzer analyzer = fieldNameAnalyzer.analyzers().get(fieldName); if (analyzer != null) { return analyzer; } else { return context.getAnalysisService().defaultIndexAnalyzer(); } } }; final IndexSearcher docSearcher; if (doc.docs().size() > 1) { assert docMapper.hasNestedObjects(); docSearcher = createMultiDocumentSearcher(analyzer, doc); } else { MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc.rootDoc(), analyzer, true, false); docSearcher = memoryIndex.createSearcher(); docSearcher.setQueryCache(null); } PercolatorQueryCache registry = context.getPercolatorQueryCache(); if (registry == null) { throw new QueryShardException(context, "no percolator query registry"); } PercolateQuery.Builder builder = new PercolateQuery.Builder(documentType, registry, document, docSearcher); Settings indexSettings = registry.getIndexSettings().getSettings(); if (indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) .onOrAfter(Version.V_5_0_0_alpha1)) { MappedFieldType fieldType = context.fieldMapper(field); if (fieldType == null) { throw new QueryShardException(context, "field [" + field + "] does not exist"); } if (!(fieldType instanceof PercolatorFieldMapper.PercolatorFieldType)) { throw new QueryShardException(context, "expected field [" + field + "] to be of type [percolator], but is of type [" + fieldType.typeName() + "]"); } PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType; builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getUnknownQueryFieldName()); } else { Query percolateTypeQuery = new TermQuery( new Term(TypeFieldMapper.NAME, PercolatorFieldMapper.LEGACY_TYPE_NAME)); builder.setPercolateTypeQuery(percolateTypeQuery); } return builder.build(); }
From source file:org.elasticsearch.index.query.PercolateQueryBuilder.java
License:Apache License
private IndexSearcher createMultiDocumentSearcher(Analyzer analyzer, ParsedDocument doc) { IndexReader[] memoryIndices = new IndexReader[doc.docs().size()]; List<ParseContext.Document> docs = doc.docs(); int rootDocIndex = docs.size() - 1; assert rootDocIndex > 0; for (int i = 0; i < docs.size(); i++) { ParseContext.Document d = docs.get(i); MemoryIndex memoryIndex = MemoryIndex.fromDocument(d, analyzer, true, false); memoryIndices[i] = memoryIndex.createSearcher().getIndexReader(); }//from ww w . ja v a 2 s .co m try { MultiReader mReader = new MultiReader(memoryIndices, true); LeafReader slowReader = SlowCompositeReaderWrapper.wrap(mReader); final IndexSearcher slowSearcher = new IndexSearcher(slowReader) { @Override public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException { BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(query, BooleanClause.Occur.MUST); bq.add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT); return super.createNormalizedWeight(bq.build(), needsScores); } }; slowSearcher.setQueryCache(null); return slowSearcher; } catch (IOException e) { throw new ElasticsearchException("Failed to create index for percolator with nested document ", e); } }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
public void testVariousQueries() throws Exception { addPercolatorQuery("1", new TermQuery(new Term("field", "brown"))); addPercolatorQuery("2", new TermQuery(new Term("field", "monkey"))); addPercolatorQuery("3", new TermQuery(new Term("field", "fox"))); BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD); bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD); addPercolatorQuery("4", bq1.build()); BooleanQuery.Builder bq2 = new BooleanQuery.Builder(); bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST); addPercolatorQuery("5", bq2.build()); BooleanQuery.Builder bq3 = new BooleanQuery.Builder(); bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT); addPercolatorQuery("6", bq3.build()); BooleanQuery.Builder bq4 = new BooleanQuery.Builder(); bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT); bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST); addPercolatorQuery("7", bq4.build()); PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); pq1.add(new Term("field", "lazy")); pq1.add(new Term("field", "dog")); addPercolatorQuery("8", pq1.build()); indexWriter.close();/*from w ww . java 2s .c o m*/ directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher); builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); // no scoring, wrapping it in a constant score query: Query query = new ConstantScoreQuery(builder.build()); TopDocs topDocs = shardSearcher.search(query, 10); assertThat(topDocs.totalHits, equalTo(5)); assertThat(topDocs.scoreDocs.length, equalTo(5)); assertThat(topDocs.scoreDocs[0].doc, equalTo(0)); Explanation explanation = shardSearcher.explain(query, 0); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score)); explanation = shardSearcher.explain(query, 1); assertThat(explanation.isMatch(), is(false)); assertThat(topDocs.scoreDocs[1].doc, equalTo(2)); explanation = shardSearcher.explain(query, 2); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score)); assertThat(topDocs.scoreDocs[2].doc, equalTo(3)); explanation = shardSearcher.explain(query, 3); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score)); explanation = shardSearcher.explain(query, 4); assertThat(explanation.isMatch(), is(false)); assertThat(topDocs.scoreDocs[3].doc, equalTo(5)); explanation = shardSearcher.explain(query, 5); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[3].score)); explanation = shardSearcher.explain(query, 6); assertThat(explanation.isMatch(), is(false)); assertThat(topDocs.scoreDocs[4].doc, equalTo(7)); explanation = shardSearcher.explain(query, 7); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[4].score)); }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
public void testVariousQueries_withScoring() throws Exception { SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true); snp.addClause(new SpanTermQuery(new Term("field", "jumps"))); snp.addClause(new SpanTermQuery(new Term("field", "lazy"))); snp.addClause(new SpanTermQuery(new Term("field", "dog"))); snp.setSlop(2);/*from ww w. j ava 2 s . co m*/ addPercolatorQuery("1", snp.build()); PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); pq1.add(new Term("field", "quick")); pq1.add(new Term("field", "brown")); pq1.add(new Term("field", "jumps")); pq1.setSlop(1); addPercolatorQuery("2", pq1.build()); BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST); bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST); bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); addPercolatorQuery("3", bq1.build()); indexWriter.close(); directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher); builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); Query query = builder.build(); TopDocs topDocs = shardSearcher.search(query, 10); assertThat(topDocs.totalHits, equalTo(3)); assertThat(topDocs.scoreDocs[0].doc, equalTo(2)); Explanation explanation = shardSearcher.explain(query, 2); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); assertThat(topDocs.scoreDocs[1].doc, equalTo(1)); explanation = shardSearcher.explain(query, 1); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); assertThat(topDocs.scoreDocs[2].doc, equalTo(0)); explanation = shardSearcher.explain(query, 0); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException { IndexSearcher percolateSearcher = memoryIndex.createSearcher(); PercolateQuery.Builder builder1 = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher);//from w w w. j av a2s. c o m // enables the optimization that prevents queries from being evaluated that don't match builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10); PercolateQuery.Builder builder2 = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher); builder2.setPercolateTypeQuery(new MatchAllDocsQuery()); TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10); assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits)); assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length)); for (int j = 0; j < topDocs1.scoreDocs.length; j++) { assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc)); assertThat(topDocs1.scoreDocs[j].score, equalTo(topDocs2.scoreDocs[j].score)); Explanation explain1 = shardSearcher.explain(builder1.build(), topDocs1.scoreDocs[j].doc); Explanation explain2 = shardSearcher.explain(builder2.build(), topDocs2.scoreDocs[j].doc); assertThat(explain1.toHtml(), equalTo(explain2.toHtml())); } }